diff --git a/unstructured/partition/html/transformations.py b/unstructured/partition/html/transformations.py index 6006cf9905..60437e3ee4 100644 --- a/unstructured/partition/html/transformations.py +++ b/unstructured/partition/html/transformations.py @@ -15,6 +15,19 @@ ONTOLOGY_CLASS_TO_UNSTRUCTURED_ELEMENT_TYPE, ) +text_classes = ( + ontology.NarrativeText, + ontology.Quote, + ontology.Paragraph, + ontology.Footnote, + ontology.FootnoteReference, + ontology.Citation, + ontology.Bibliography, + ontology.Glossary, +) + +text_categories = (ontology.ElementTypeEnum.metadata,) + RECURSION_LIMIT = 50 @@ -186,22 +199,10 @@ def can_unstructured_elements_be_merged( def is_text_element(ontology_element: ontology.OntologyElement) -> bool: """Categories or classes that we want to combine with inline text""" - text_classes = [ - ontology.NarrativeText, - ontology.Quote, - ontology.Paragraph, - ontology.Footnote, - ontology.FootnoteReference, - ontology.Citation, - ontology.Bibliography, - ontology.Glossary, - ] - text_categories = [ontology.ElementTypeEnum.metadata] - - if any(isinstance(ontology_element, class_) for class_ in text_classes): + if isinstance(ontology_element, text_classes): return True - return any(ontology_element.elementType == category for category in text_categories) + return ontology_element.elementType == text_categories[0] def is_inline_element(ontology_element: ontology.OntologyElement) -> bool: