diff --git a/unstructured/partition/html/transformations.py b/unstructured/partition/html/transformations.py index 6006cf9905..dd92c758e7 100644 --- a/unstructured/partition/html/transformations.py +++ b/unstructured/partition/html/transformations.py @@ -15,6 +15,13 @@ ONTOLOGY_CLASS_TO_UNSTRUCTURED_ELEMENT_TYPE, ) +inline_classes = (ontology.Hyperlink,) + +inline_categories = ( + ontology.ElementTypeEnum.specialized_text, + ontology.ElementTypeEnum.annotation, +) + RECURSION_LIMIT = 50 @@ -207,16 +214,10 @@ def is_text_element(ontology_element: ontology.OntologyElement) -> bool: def is_inline_element(ontology_element: ontology.OntologyElement) -> bool: """Categories or classes that we want to combine with text elements""" - inline_classes = [ontology.Hyperlink] - inline_categories = [ - ontology.ElementTypeEnum.specialized_text, - ontology.ElementTypeEnum.annotation, - ] - - if any(isinstance(ontology_element, class_) for class_ in inline_classes): + if isinstance(ontology_element, inline_classes): return True - return any(ontology_element.elementType == category for category in inline_categories) + return ontology_element.elementType in inline_categories def unstructured_elements_to_ontology(