Skip to content

Commit 4a30e9b

Browse files
authored
add vlm test (#224)
Add `vlm` tests with `pdf` `jpg` and `ppt` file types. Providers and models: `openai` - `gpt-4o` `bedrock` - `us.amazon.nova-pro-v1:0` - `us.amazon.nova-lite-v1:0` - `us.anthropic.claude-3-5-sonnet-20241022-v2:0` - `us.anthropic.claude-3-opus-20240229-v1:0` - `us.anthropic.claude-3-haiku-20240307-v1:0` - `us.anthropic.claude-3-sonnet-20240229-v1:0` - `us.meta.llama3-2-90b-instruct-v1:0` - `us.meta.llama3-2-11b-instruct-v1:0` `anthropic` - `claude-3-5-sonnet-20241022`
1 parent 6ee0bbb commit 4a30e9b

File tree

3 files changed

+121
-0
lines changed

3 files changed

+121
-0
lines changed
553 KB
Loading

_sample_docs/fake-power-point.ppt

594 KB
Binary file not shown.

_test_unstructured_client/integration/test_integration.py

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,3 +227,124 @@ async def call_api():
227227
uvloop.install()
228228
elements = asyncio.run(call_api())
229229
assert len(elements) > 0
230+
231+
232+
@pytest.mark.parametrize("split_pdf", [True, False])
233+
@pytest.mark.parametrize("vlm_model", ["gpt-4o"])
234+
@pytest.mark.parametrize("vlm_model_provider", ["openai"])
235+
@pytest.mark.parametrize(
236+
"filename",
237+
[
238+
"layout-parser-paper-fast.pdf",
239+
"fake-power-point.ppt",
240+
"embedded-images-tables.jpg",
241+
]
242+
)
243+
def test_partition_strategy_vlm_openai(split_pdf, vlm_model, vlm_model_provider, client, doc_path, filename):
244+
with open(doc_path / filename, "rb") as f:
245+
files = shared.Files(
246+
content=f.read(),
247+
file_name=filename,
248+
)
249+
250+
req = operations.PartitionRequest(
251+
partition_parameters=shared.PartitionParameters(
252+
files=files,
253+
strategy="vlm",
254+
vlm_model=vlm_model,
255+
vlm_model_provider=vlm_model_provider,
256+
languages=["eng"],
257+
split_pdf_page=split_pdf,
258+
)
259+
)
260+
261+
response = client.general.partition(
262+
request=req
263+
)
264+
assert response.status_code == 200
265+
assert len(response.elements) > 0
266+
assert response.elements[0]["metadata"]["partitioner_type"] == "vlm_partition"
267+
268+
269+
@pytest.mark.parametrize("split_pdf", [True, False])
270+
@pytest.mark.parametrize("vlm_model",
271+
[
272+
"us.amazon.nova-pro-v1:0",
273+
"us.amazon.nova-lite-v1:0",
274+
"us.anthropic.claude-3-5-sonnet-20241022-v2:0",
275+
"us.anthropic.claude-3-opus-20240229-v1:0",
276+
"us.anthropic.claude-3-haiku-20240307-v1:0",
277+
"us.anthropic.claude-3-sonnet-20240229-v1:0",
278+
"us.meta.llama3-2-90b-instruct-v1:0",
279+
"us.meta.llama3-2-11b-instruct-v1:0",
280+
]
281+
)
282+
@pytest.mark.parametrize("vlm_model_provider", ["bedrock"])
283+
@pytest.mark.parametrize(
284+
"filename",
285+
[
286+
"layout-parser-paper-fast.pdf",
287+
"fake-power-point.ppt",
288+
"embedded-images-tables.jpg",
289+
]
290+
)
291+
def test_partition_strategy_vlm_bedrock(split_pdf, vlm_model, vlm_model_provider, client, doc_path, filename):
292+
with open(doc_path / filename, "rb") as f:
293+
files = shared.Files(
294+
content=f.read(),
295+
file_name=filename,
296+
)
297+
298+
req = operations.PartitionRequest(
299+
partition_parameters=shared.PartitionParameters(
300+
files=files,
301+
strategy="vlm",
302+
vlm_model=vlm_model,
303+
vlm_model_provider=vlm_model_provider,
304+
languages=["eng"],
305+
split_pdf_page=split_pdf,
306+
)
307+
)
308+
309+
response = client.general.partition(
310+
request=req
311+
)
312+
assert response.status_code == 200
313+
assert len(response.elements) > 0
314+
assert response.elements[0]["metadata"]["partitioner_type"] == "vlm_partition"
315+
316+
@pytest.mark.parametrize("split_pdf", [True, False])
317+
@pytest.mark.parametrize("vlm_model", ["claude-3-5-sonnet-20241022",])
318+
@pytest.mark.parametrize("vlm_model_provider", ["anthropic"])
319+
@pytest.mark.parametrize(
320+
"filename",
321+
[
322+
"layout-parser-paper-fast.pdf",
323+
"fake-power-point.ppt",
324+
"embedded-images-tables.jpg",
325+
]
326+
)
327+
def test_partition_strategy_vlm_anthropic(split_pdf, vlm_model, vlm_model_provider, client, doc_path, filename):
328+
with open(doc_path / filename, "rb") as f:
329+
files = shared.Files(
330+
content=f.read(),
331+
file_name=filename,
332+
)
333+
334+
req = operations.PartitionRequest(
335+
partition_parameters=shared.PartitionParameters(
336+
files=files,
337+
strategy="vlm",
338+
vlm_model=vlm_model,
339+
vlm_model_provider=vlm_model_provider,
340+
languages=["eng"],
341+
split_pdf_page=split_pdf,
342+
)
343+
)
344+
345+
response = client.general.partition(
346+
request=req
347+
)
348+
assert response.status_code == 200
349+
assert len(response.elements) > 0
350+
assert response.elements[0]["metadata"]["partitioner_type"] == "vlm_partition"

0 commit comments

Comments
 (0)