@@ -146,8 +146,8 @@ def find_imgs(string):
146146
147147 def split_text (self , text : str ) -> List [str ]:
148148 content_dict , masked_text = self .mask_urls_and_imgs (text )
149- start_tags = [self ._table_tags ["table_open" ] "<tabular>" ]
150- end_tags = [self ._table_tags ["table_close" ] "</tabular>" ]
149+ start_tags = [self ._table_tags ["table_open" ], "<tabular>" ]
150+ end_tags = [self ._table_tags ["table_close" ], "</tabular>" ]
151151 splits = masked_text
152152 for start_tag in start_tags :
153153 splits = splits .split (start_tag )
@@ -165,13 +165,12 @@ def split_text(self, text: str) -> List[str]:
165165 minitables = self .chunk_table (table , table_caption_prefix )
166166 final_chunks .extend (minitables )
167167
168- if rest .strip ()!= "" :
169- text_minichunks = self .chunk_rest (rest )
170- final_chunks .extend (text_minichunks )
171- table_caption_prefix = self .extract_caption (text_minichunks [- 1 ])
172- else :
173- table_caption_prefix = ""
174- break
168+ if rest .strip ()!= "" :
169+ text_minichunks = self .chunk_rest (rest )
170+ final_chunks .extend (text_minichunks )
171+ table_caption_prefix = self .extract_caption (text_minichunks [- 1 ])
172+ else :
173+ table_caption_prefix = ""
175174
176175 final_final_chunks = [chunk for chunk , chunk_size in merge_chunks_serially (final_chunks , self ._chunk_size , content_dict )]
177176
0 commit comments