Skip to content

Commit 9b313b1

Browse files
fixissue7834
1 parent 579c996 commit 9b313b1

File tree

1 file changed

+8
-9
lines changed

1 file changed

+8
-9
lines changed

scripts/data_utils.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -146,8 +146,8 @@ def find_imgs(string):
146146

147147
def split_text(self, text: str) -> List[str]:
148148
content_dict, masked_text = self.mask_urls_and_imgs(text)
149-
start_tags = [self._table_tags["table_open"] "<tabular>"]
150-
end_tags = [self._table_tags["table_close"] "</tabular>"]
149+
start_tags = [self._table_tags["table_open"], "<tabular>"]
150+
end_tags = [self._table_tags["table_close"], "</tabular>"]
151151
splits = masked_text
152152
for start_tag in start_tags:
153153
splits = splits.split(start_tag)
@@ -165,13 +165,12 @@ def split_text(self, text: str) -> List[str]:
165165
minitables = self.chunk_table(table, table_caption_prefix)
166166
final_chunks.extend(minitables)
167167

168-
if rest.strip()!="":
169-
text_minichunks = self.chunk_rest(rest)
170-
final_chunks.extend(text_minichunks)
171-
table_caption_prefix = self.extract_caption(text_minichunks[-1])
172-
else:
173-
table_caption_prefix = ""
174-
break
168+
if rest.strip()!="":
169+
text_minichunks = self.chunk_rest(rest)
170+
final_chunks.extend(text_minichunks)
171+
table_caption_prefix = self.extract_caption(text_minichunks[-1])
172+
else:
173+
table_caption_prefix = ""
175174

176175
final_final_chunks = [chunk for chunk, chunk_size in merge_chunks_serially(final_chunks, self._chunk_size, content_dict)]
177176

0 commit comments

Comments
 (0)