@@ -465,7 +465,7 @@ async def imageInference(
465465 if requestImage .providerSettings :
466466 provider_data = requestImage .providerSettings .to_request_dict ()
467467 request_object .update (provider_data )
468-
468+
469469 return await asyncRetry (
470470 lambda : self ._requestImages (
471471 request_object = request_object ,
@@ -513,6 +513,7 @@ async def _requestImages(
513513 "numberResults" : image_remaining ,
514514 }
515515 }
516+
516517 await self .send (new_request_object )
517518
518519 let_lis = await self .listenToImages (
@@ -550,33 +551,69 @@ async def imageCaption(self, requestImageToText: IImageCaption) -> IImageToText:
550551 async def _requestImageToText (
551552 self , requestImageToText : IImageCaption
552553 ) -> IImageToText :
553- inputImage = requestImageToText .inputImage
554-
555- image_uploaded = await self .uploadImage (inputImage )
556-
557- if not image_uploaded or not image_uploaded .imageUUID :
558- return None
554+ # Prepare image list - inputImages is primary, inputImage is convenience
555+ if requestImageToText .inputImages is not None :
556+ images_to_process = requestImageToText .inputImages
557+ elif requestImageToText .inputImage is not None :
558+ # Single image provided via inputImage - convert to array
559+ images_to_process = [requestImageToText .inputImage ]
560+ else :
561+ raise ValueError ("Either inputImages or inputImage must be provided" )
562+
563+ # Set inputImage to inputImages[0] if not already provided
564+ actual_input_image = requestImageToText .inputImage
565+ if actual_input_image is None and images_to_process :
566+ actual_input_image = images_to_process [0 ]
567+
568+ # Upload all images
569+ uploaded_images = []
570+ for image in images_to_process :
571+ image_uploaded = await self .uploadImage (image )
572+ if not image_uploaded or not image_uploaded .imageUUID :
573+ return None
574+ uploaded_images .append (image_uploaded .imageUUID )
559575
560576 taskUUID = getUUID ()
561577
562578 # Create a dictionary with mandatory parameters
563579 task_params = {
564580 "taskType" : ETaskType .IMAGE_CAPTION .value ,
565581 "taskUUID" : taskUUID ,
566- "inputImage" : image_uploaded .imageUUID ,
567582 }
583+
584+ # Add either inputImage or inputImages, but not both (API requirement)
585+ if len (uploaded_images ) == 1 :
586+ # Single image - use inputImage parameter
587+ task_params ["inputImage" ] = uploaded_images [0 ]
588+ else :
589+ # Multiple images - use inputImages parameter
590+ task_params ["inputImages" ] = uploaded_images
591+
592+ # Add model parameter only if specified - backend handles default
593+ if requestImageToText .model is not None :
594+ task_params ["model" ] = requestImageToText .model
595+
596+ # Add template parameter if specified
597+ if requestImageToText .template is not None :
598+ task_params ["template" ] = requestImageToText .template
599+ # When using template, do NOT include prompt parameter
600+ else :
601+ # Use the provided prompt when no template
602+ task_params ["prompt" ] = requestImageToText .prompt
568603
569604 # Add optional parameters if they are provided
570605 if requestImageToText .includeCost :
571606 task_params ["includeCost" ] = requestImageToText .includeCost
572607
608+
573609 # Send the task with all applicable parameters
574610 await self .send ([task_params ])
575611
576612 lis = self .globalListener (
577613 taskUUID = taskUUID ,
578614 )
579615
616+
580617 def check (resolve : callable , reject : callable , * args : Any ) -> bool :
581618 response = self ._globalMessages .get (taskUUID )
582619 # TODO: Check why I need a conversion here?
@@ -599,6 +636,7 @@ def check(resolve: callable, reject: callable, *args: Any) -> bool:
599636 check , debugKey = "image-to-text" , timeOutDuration = self ._timeout
600637 )
601638
639+
602640 lis ["destroy" ]()
603641
604642 if "code" in response :
0 commit comments