1313)
1414
1515from .test_data_gen_flag import GEN_TEST_DATA
16- from .test_docling_doc import _construct_doc
1716
1817
1918def _assert_json_like_equal (a : Any , b : Any , eps : float = 1e-3 , path : str = "$" ) -> None :
@@ -99,25 +98,24 @@ def test_azure_serialize_activities_doc():
9998 _verify_json (exp_file = src .with_suffix (".gt.azure.json" ), actual_json = actual_json )
10099
101100
102- def test_azure_serialize_construct_doc_minimal_prov ():
101+ def test_azure_serialize_construct_doc_minimal_prov (sample_doc : DoclingDocument ):
103102 """Serialize a constructed document with minimal provenance to Azure JSON.
104103
105- The _construct_doc() builder does not attach provenance or pages; here we add a
104+ The sample_doc fixture does not attach provenance or pages; here we add a
106105 single page and minimal bounding boxes to a subset of items to allow Azure JSON
107106 output to include paragraphs/tables/pictures with boundingRegions.
108107 """
109- doc = _construct_doc ()
110108
111109 # Ensure at least one page is present
112- if not doc .pages :
113- doc .add_page (page_no = 1 , size = Size (width = 600.0 , height = 800.0 ), image = None )
110+ if not sample_doc .pages :
111+ sample_doc .add_page (page_no = 1 , size = Size (width = 600.0 , height = 800.0 ), image = None )
114112
115113 # Helper to add a simple TOPLEFT bbox provenance if missing
116114 def _ensure_prov (item , l = 10.0 , t = 10.0 , r = 200.0 , b = 40.0 ):
117115 if not item .prov :
118116 item .prov = [
119117 ProvenanceItem (
120- page_no = min (doc .pages .keys ()),
118+ page_no = min (sample_doc .pages .keys ()),
121119 bbox = BoundingBox (
122120 l = l , t = t , r = r , b = b , coord_origin = CoordOrigin .TOPLEFT
123121 ),
@@ -126,7 +124,7 @@ def _ensure_prov(item, l=10.0, t=10.0, r=200.0, b=40.0):
126124 ]
127125
128126 # Add provenance for the title and a couple of paragraphs if present
129- for it in doc .texts [:3 ]:
127+ for it in sample_doc .texts [:3 ]:
130128 if it .label in {
131129 DocItemLabel .TITLE ,
132130 DocItemLabel .TEXT ,
@@ -135,14 +133,14 @@ def _ensure_prov(item, l=10.0, t=10.0, r=200.0, b=40.0):
135133 _ensure_prov (it )
136134
137135 # Add provenance for the first table if present
138- if doc .tables :
139- _ensure_prov (doc .tables [0 ], l = 20.0 , t = 80.0 , r = 300.0 , b = 200.0 )
136+ if sample_doc .tables :
137+ _ensure_prov (sample_doc .tables [0 ], l = 20.0 , t = 80.0 , r = 300.0 , b = 200.0 )
140138
141139 # Add provenance for the first picture if present
142- if doc .pictures :
143- _ensure_prov (doc .pictures [0 ], l = 320.0 , t = 80.0 , r = 500.0 , b = 220.0 )
140+ if sample_doc .pictures :
141+ _ensure_prov (sample_doc .pictures [0 ], l = 320.0 , t = 80.0 , r = 500.0 , b = 220.0 )
144142
145- ser = AzureDocSerializer (doc = doc , params = AzureParams (indent = 2 ))
143+ ser = AzureDocSerializer (doc = sample_doc , params = AzureParams (indent = 2 ))
146144 actual_json = ser .serialize ().text
147145
148146 # Basic structure check
0 commit comments