Skip to content

Commit edda70b

Browse files
authored
chore: use sample_doc fixture (#442)
use sample_doc fixture Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
1 parent aa5c668 commit edda70b

File tree

1 file changed

+11
-13
lines changed

1 file changed

+11
-13
lines changed

test/test_azure_serializer.py

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
)
1414

1515
from .test_data_gen_flag import GEN_TEST_DATA
16-
from .test_docling_doc import _construct_doc
1716

1817

1918
def _assert_json_like_equal(a: Any, b: Any, eps: float = 1e-3, path: str = "$") -> None:
@@ -99,25 +98,24 @@ def test_azure_serialize_activities_doc():
9998
_verify_json(exp_file=src.with_suffix(".gt.azure.json"), actual_json=actual_json)
10099

101100

102-
def test_azure_serialize_construct_doc_minimal_prov():
101+
def test_azure_serialize_construct_doc_minimal_prov(sample_doc: DoclingDocument):
103102
"""Serialize a constructed document with minimal provenance to Azure JSON.
104103
105-
The _construct_doc() builder does not attach provenance or pages; here we add a
104+
The sample_doc fixture does not attach provenance or pages; here we add a
106105
single page and minimal bounding boxes to a subset of items to allow Azure JSON
107106
output to include paragraphs/tables/pictures with boundingRegions.
108107
"""
109-
doc = _construct_doc()
110108

111109
# Ensure at least one page is present
112-
if not doc.pages:
113-
doc.add_page(page_no=1, size=Size(width=600.0, height=800.0), image=None)
110+
if not sample_doc.pages:
111+
sample_doc.add_page(page_no=1, size=Size(width=600.0, height=800.0), image=None)
114112

115113
# Helper to add a simple TOPLEFT bbox provenance if missing
116114
def _ensure_prov(item, l=10.0, t=10.0, r=200.0, b=40.0):
117115
if not item.prov:
118116
item.prov = [
119117
ProvenanceItem(
120-
page_no=min(doc.pages.keys()),
118+
page_no=min(sample_doc.pages.keys()),
121119
bbox=BoundingBox(
122120
l=l, t=t, r=r, b=b, coord_origin=CoordOrigin.TOPLEFT
123121
),
@@ -126,7 +124,7 @@ def _ensure_prov(item, l=10.0, t=10.0, r=200.0, b=40.0):
126124
]
127125

128126
# Add provenance for the title and a couple of paragraphs if present
129-
for it in doc.texts[:3]:
127+
for it in sample_doc.texts[:3]:
130128
if it.label in {
131129
DocItemLabel.TITLE,
132130
DocItemLabel.TEXT,
@@ -135,14 +133,14 @@ def _ensure_prov(item, l=10.0, t=10.0, r=200.0, b=40.0):
135133
_ensure_prov(it)
136134

137135
# Add provenance for the first table if present
138-
if doc.tables:
139-
_ensure_prov(doc.tables[0], l=20.0, t=80.0, r=300.0, b=200.0)
136+
if sample_doc.tables:
137+
_ensure_prov(sample_doc.tables[0], l=20.0, t=80.0, r=300.0, b=200.0)
140138

141139
# Add provenance for the first picture if present
142-
if doc.pictures:
143-
_ensure_prov(doc.pictures[0], l=320.0, t=80.0, r=500.0, b=220.0)
140+
if sample_doc.pictures:
141+
_ensure_prov(sample_doc.pictures[0], l=320.0, t=80.0, r=500.0, b=220.0)
144142

145-
ser = AzureDocSerializer(doc=doc, params=AzureParams(indent=2))
143+
ser = AzureDocSerializer(doc=sample_doc, params=AzureParams(indent=2))
146144
actual_json = ser.serialize().text
147145

148146
# Basic structure check

0 commit comments

Comments
 (0)