Skip to content

Commit 4e750f9

Browse files
authored
Merge pull request #152 from Runware/feature-bytedanceSupport-imageCaptionUpdates
Add ByteDance support and update IImageCaption support
2 parents 775b1df + be45a61 commit 4e750f9

3 files changed

Lines changed: 57 additions & 10 deletions

File tree

runware/base.py

Lines changed: 46 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -465,7 +465,7 @@ async def imageInference(
465465
if requestImage.providerSettings:
466466
provider_data = requestImage.providerSettings.to_request_dict()
467467
request_object.update(provider_data)
468-
468+
469469
return await asyncRetry(
470470
lambda: self._requestImages(
471471
request_object=request_object,
@@ -513,6 +513,7 @@ async def _requestImages(
513513
"numberResults": image_remaining,
514514
}
515515
}
516+
516517
await self.send(new_request_object)
517518

518519
let_lis = await self.listenToImages(
@@ -550,33 +551,69 @@ async def imageCaption(self, requestImageToText: IImageCaption) -> IImageToText:
550551
async def _requestImageToText(
551552
self, requestImageToText: IImageCaption
552553
) -> IImageToText:
553-
inputImage = requestImageToText.inputImage
554-
555-
image_uploaded = await self.uploadImage(inputImage)
556-
557-
if not image_uploaded or not image_uploaded.imageUUID:
558-
return None
554+
# Prepare image list - inputImages is primary, inputImage is convenience
555+
if requestImageToText.inputImages is not None:
556+
images_to_process = requestImageToText.inputImages
557+
elif requestImageToText.inputImage is not None:
558+
# Single image provided via inputImage - convert to array
559+
images_to_process = [requestImageToText.inputImage]
560+
else:
561+
raise ValueError("Either inputImages or inputImage must be provided")
562+
563+
# Set inputImage to inputImages[0] if not already provided
564+
actual_input_image = requestImageToText.inputImage
565+
if actual_input_image is None and images_to_process:
566+
actual_input_image = images_to_process[0]
567+
568+
# Upload all images
569+
uploaded_images = []
570+
for image in images_to_process:
571+
image_uploaded = await self.uploadImage(image)
572+
if not image_uploaded or not image_uploaded.imageUUID:
573+
return None
574+
uploaded_images.append(image_uploaded.imageUUID)
559575

560576
taskUUID = getUUID()
561577

562578
# Create a dictionary with mandatory parameters
563579
task_params = {
564580
"taskType": ETaskType.IMAGE_CAPTION.value,
565581
"taskUUID": taskUUID,
566-
"inputImage": image_uploaded.imageUUID,
567582
}
583+
584+
# Add either inputImage or inputImages, but not both (API requirement)
585+
if len(uploaded_images) == 1:
586+
# Single image - use inputImage parameter
587+
task_params["inputImage"] = uploaded_images[0]
588+
else:
589+
# Multiple images - use inputImages parameter
590+
task_params["inputImages"] = uploaded_images
591+
592+
# Add model parameter only if specified - backend handles default
593+
if requestImageToText.model is not None:
594+
task_params["model"] = requestImageToText.model
595+
596+
# Add template parameter if specified
597+
if requestImageToText.template is not None:
598+
task_params["template"] = requestImageToText.template
599+
# When using template, do NOT include prompt parameter
600+
else:
601+
# Use the provided prompt when no template
602+
task_params["prompt"] = requestImageToText.prompt
568603

569604
# Add optional parameters if they are provided
570605
if requestImageToText.includeCost:
571606
task_params["includeCost"] = requestImageToText.includeCost
572607

608+
573609
# Send the task with all applicable parameters
574610
await self.send([task_params])
575611

576612
lis = self.globalListener(
577613
taskUUID=taskUUID,
578614
)
579615

616+
580617
def check(resolve: callable, reject: callable, *args: Any) -> bool:
581618
response = self._globalMessages.get(taskUUID)
582619
# TODO: Check why I need a conversion here?
@@ -599,6 +636,7 @@ def check(resolve: callable, reject: callable, *args: Any) -> bool:
599636
check, debugKey="image-to-text", timeOutDuration=self._timeout
600637
)
601638

639+
602640
lis["destroy"]()
603641

604642
if "code" in response:

runware/types.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -473,8 +473,12 @@ class IImageInference:
473473

474474
@dataclass
475475
class IImageCaption:
476-
inputImage: Optional[Union[File, str]] = None
476+
inputImages: Optional[List[Union[File, str]]] = None # Primary: array of images (UUIDs, URLs, base64, dataURI)
477+
inputImage: Optional[Union[File, str]] = None # Convenience: single image, defaults to inputImages[0] if not provided
478+
prompt: List[str] = field(default_factory=lambda: ["Describe this image in detail"]) # Array of prompts with default
479+
model: Optional[str] = None # Optional: AIR ID (runware:150@1, runware:150@2) - backend handles default
477480
includeCost: bool = False
481+
template: Optional[str] = None
478482

479483

480484
@dataclass
@@ -672,6 +676,7 @@ def provider_key(self) -> str:
672676
@dataclass
673677
class IBytedanceProviderSettings(BaseProviderSettings):
674678
cameraFixed: Optional[bool] = None
679+
maxSequentialImages: Optional[int] = None # Min: 1, Max: 15 - Maximum number of sequential images to generate
675680

676681
@property
677682
def provider_key(self) -> str:

runware/utils.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -654,7 +654,11 @@ def safe_reject(error):
654654
nonlocal iteration_resolved, iteration_error
655655
if not iteration_resolved:
656656
iteration_resolved = True
657-
iteration_error = error
657+
# Ensure error is a proper exception fixes TypeError: exceptions must derive from BaseException
658+
if isinstance(error, BaseException):
659+
iteration_error = error
660+
else:
661+
iteration_error = Exception(str(error))
658662

659663
try:
660664
callback_returned = callback(safe_resolve, safe_reject, interval_handle)

0 commit comments

Comments
 (0)