@@ -227,3 +227,124 @@ async def call_api():
227227 uvloop .install ()
228228 elements = asyncio .run (call_api ())
229229 assert len (elements ) > 0
230+
231+
232+ @pytest .mark .parametrize ("split_pdf" , [True , False ])
233+ @pytest .mark .parametrize ("vlm_model" , ["gpt-4o" ])
234+ @pytest .mark .parametrize ("vlm_model_provider" , ["openai" ])
235+ @pytest .mark .parametrize (
236+ "filename" ,
237+ [
238+ "layout-parser-paper-fast.pdf" ,
239+ "fake-power-point.ppt" ,
240+ "embedded-images-tables.jpg" ,
241+ ]
242+ )
243+ def test_partition_strategy_vlm_openai (split_pdf , vlm_model , vlm_model_provider , client , doc_path , filename ):
244+ with open (doc_path / filename , "rb" ) as f :
245+ files = shared .Files (
246+ content = f .read (),
247+ file_name = filename ,
248+ )
249+
250+ req = operations .PartitionRequest (
251+ partition_parameters = shared .PartitionParameters (
252+ files = files ,
253+ strategy = "vlm" ,
254+ vlm_model = vlm_model ,
255+ vlm_model_provider = vlm_model_provider ,
256+ languages = ["eng" ],
257+ split_pdf_page = split_pdf ,
258+ )
259+ )
260+
261+ response = client .general .partition (
262+ request = req
263+ )
264+ assert response .status_code == 200
265+ assert len (response .elements ) > 0
266+ assert response .elements [0 ]["metadata" ]["partitioner_type" ] == "vlm_partition"
267+
268+
269+ @pytest .mark .parametrize ("split_pdf" , [True , False ])
270+ @pytest .mark .parametrize ("vlm_model" ,
271+ [
272+ "us.amazon.nova-pro-v1:0" ,
273+ "us.amazon.nova-lite-v1:0" ,
274+ "us.anthropic.claude-3-5-sonnet-20241022-v2:0" ,
275+ "us.anthropic.claude-3-opus-20240229-v1:0" ,
276+ "us.anthropic.claude-3-haiku-20240307-v1:0" ,
277+ "us.anthropic.claude-3-sonnet-20240229-v1:0" ,
278+ "us.meta.llama3-2-90b-instruct-v1:0" ,
279+ "us.meta.llama3-2-11b-instruct-v1:0" ,
280+ ]
281+ )
282+ @pytest .mark .parametrize ("vlm_model_provider" , ["bedrock" ])
283+ @pytest .mark .parametrize (
284+ "filename" ,
285+ [
286+ "layout-parser-paper-fast.pdf" ,
287+ "fake-power-point.ppt" ,
288+ "embedded-images-tables.jpg" ,
289+ ]
290+ )
291+ def test_partition_strategy_vlm_bedrock (split_pdf , vlm_model , vlm_model_provider , client , doc_path , filename ):
292+ with open (doc_path / filename , "rb" ) as f :
293+ files = shared .Files (
294+ content = f .read (),
295+ file_name = filename ,
296+ )
297+
298+ req = operations .PartitionRequest (
299+ partition_parameters = shared .PartitionParameters (
300+ files = files ,
301+ strategy = "vlm" ,
302+ vlm_model = vlm_model ,
303+ vlm_model_provider = vlm_model_provider ,
304+ languages = ["eng" ],
305+ split_pdf_page = split_pdf ,
306+ )
307+ )
308+
309+ response = client .general .partition (
310+ request = req
311+ )
312+ assert response .status_code == 200
313+ assert len (response .elements ) > 0
314+ assert response .elements [0 ]["metadata" ]["partitioner_type" ] == "vlm_partition"
315+
316+ @pytest .mark .parametrize ("split_pdf" , [True , False ])
317+ @pytest .mark .parametrize ("vlm_model" , ["claude-3-5-sonnet-20241022" ,])
318+ @pytest .mark .parametrize ("vlm_model_provider" , ["anthropic" ])
319+ @pytest .mark .parametrize (
320+ "filename" ,
321+ [
322+ "layout-parser-paper-fast.pdf" ,
323+ "fake-power-point.ppt" ,
324+ "embedded-images-tables.jpg" ,
325+ ]
326+ )
327+ def test_partition_strategy_vlm_anthropic (split_pdf , vlm_model , vlm_model_provider , client , doc_path , filename ):
328+ with open (doc_path / filename , "rb" ) as f :
329+ files = shared .Files (
330+ content = f .read (),
331+ file_name = filename ,
332+ )
333+
334+ req = operations .PartitionRequest (
335+ partition_parameters = shared .PartitionParameters (
336+ files = files ,
337+ strategy = "vlm" ,
338+ vlm_model = vlm_model ,
339+ vlm_model_provider = vlm_model_provider ,
340+ languages = ["eng" ],
341+ split_pdf_page = split_pdf ,
342+ )
343+ )
344+
345+ response = client .general .partition (
346+ request = req
347+ )
348+ assert response .status_code == 200
349+ assert len (response .elements ) > 0
350+ assert response .elements [0 ]["metadata" ]["partitioner_type" ] == "vlm_partition"
0 commit comments