diff --git a/README.md b/README.md index 21fac2b..54fa4e4 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ Main utility function that creates a complete story with title and image. - `title`: Generated story title - `content`: The story content - `temperature`: Temperature value used for generation -- `image`: URL to the generated image (Note: this link expires, so download it) +- `image`: Image data URL for the generated image (base64 for GPT image models) **Example:** ```typescript @@ -86,13 +86,13 @@ Generates a title for the story. #### `story.generateImage(size?, model?)` -Generates an image for the story using DALL-E. +Generates an image for the story using OpenAI's GPT image models. **Parameters:** -- `size` (ImageSize, optional): Image size. Defaults to `"1024x1024"`. Options: `'256x256' | '512x512' | '1024x1024' | '1792x1024' | '1024x1792'` -- `model` (Model, optional): DALL-E model. Defaults to `"dall-e-3"`. Options: `'dall-e-2' | 'dall-e-3'` +- `size` (ImageSize, optional): Image size. Defaults to `"1024x1024"`. Options: `'1024x1024' | '1536x1024' | '1024x1536'` +- `model` (Model, optional): GPT image model. Defaults to `"gpt-image-1-mini"` -**Returns:** `Promise` - URL to the generated image +**Returns:** `Promise` - PNG data URL to the generated image ### `verifyPrompt(prompt, openai, chatModel?)` @@ -123,7 +123,7 @@ if (result.validStory) { ### `ImageGenerator` Class -Class for generating images using OpenAI's DALL-E. +Class for generating images using OpenAI's GPT image models. #### Constructor: `new ImageGenerator(openai, logger)` @@ -137,10 +137,10 @@ Generates a single image from a text prompt. **Parameters:** - `prompt` (string): The text prompt describing the image -- `size` (ImageSize, optional): Image size. Defaults to `"512x512"` -- `model` (Model, optional): DALL-E model. Defaults to `"dall-e-3"` +- `size` (ImageSize, optional): Image size. Defaults to `"1024x1024"` +- `model` (Model, optional): GPT image model. Defaults to `"gpt-image-1-mini"` -**Returns:** `Promise` - URL to the generated image +**Returns:** `Promise` - PNG data URL to the generated image **Example:** ```typescript @@ -149,7 +149,7 @@ import { OpenAI } from "openai"; const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY }); const imageGen = new ImageGenerator(openai, console); -const imageUrl = await imageGen.generateImage("A sunset over mountains", "1024x1024", "dall-e-3"); +const imageUrl = await imageGen.generateImage("A sunset over mountains", "1024x1024", "gpt-image-1-mini"); ``` #### `imageGenerator.generateImages(prompt, numberOfImages, size?, model?)` @@ -159,10 +159,10 @@ Generates multiple images from a text prompt. **Parameters:** - `prompt` (string): The text prompt describing the images - `numberOfImages` (number): Number of images to generate (1-5) -- `size` (ImageSize, optional): Image size. Defaults to `"512x512"` -- `model` (Model, optional): DALL-E model. Defaults to `"dall-e-3"` +- `size` (ImageSize, optional): Image size. Defaults to `"1024x1024"` +- `model` (Model, optional): GPT image model. Defaults to `"gpt-image-1-mini"` -**Returns:** `Promise` - Array of URLs to the generated images +**Returns:** `Promise` - Array of PNG data URLs to the generated images ### `ChatAssistant` Class @@ -202,7 +202,7 @@ console.log("Argentina size is:", followUp.answer.content); ## Important Notes - **Temperature with gpt-5-mini**: The `gpt-5-mini` model only supports the default temperature value (1). When using this model, the library automatically omits the temperature parameter to avoid API errors. -- **Image URLs**: Generated image URLs are temporary and will expire. Make sure to download the images if you need to persist them. +- **Image data URLs**: GPT image models return base64 data URLs. If you need a persistent file, decode and save the image data. - **API Key**: You need a valid OpenAI API key to use this library. Set it as an environment variable or pass it directly to the OpenAI client. ## Development diff --git a/e2e-test.mjs b/e2e-test.mjs index f1068f6..34b5bb3 100644 --- a/e2e-test.mjs +++ b/e2e-test.mjs @@ -116,11 +116,14 @@ async function testImageGeneration() { const imageUrl = await imageGen.generateImage( "A simple illustration of a sunset over mountains", "1024x1024", - "dall-e-3" + "gpt-image-1-mini" ); - - if (!imageUrl || typeof imageUrl !== "string" || !imageUrl.startsWith("http")) { - throw new Error("Invalid image URL"); + + const isHttpUrl = imageUrl.startsWith("http"); + const isDataUrl = imageUrl.startsWith("data:image/"); + + if (!imageUrl || typeof imageUrl !== "string" || (!isHttpUrl && !isDataUrl)) { + throw new Error("Invalid image URL or data URL"); } console.log(`✅ ImageGenerator.generateImage passed`); diff --git a/package-lock.json b/package-lock.json index 8e320aa..b61dcd1 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,7 +9,7 @@ "version": "0.0.0-updated-via-gh-releases", "license": "MIT", "dependencies": { - "openai": "^6.6.0" + "openai": "^6.37.0" }, "devDependencies": { "@eslint/js": "^9.2.0", @@ -1224,9 +1224,9 @@ } }, "node_modules/openai": { - "version": "6.6.0", - "resolved": "https://registry.npmjs.org/openai/-/openai-6.6.0.tgz", - "integrity": "sha512-1yWk4cBsHF5Bq9TreHYOHY7pbqdlT74COnm8vPx7WKn36StS+Hyk8DdAitnLaw67a5Cudkz5EmlFQjSrNnrA2w==", + "version": "6.37.0", + "resolved": "https://registry.npmjs.org/openai/-/openai-6.37.0.tgz", + "integrity": "sha512-0H5dEGFmmLv6KSd0W1w2nyL8WsLkX6yoLeQpU+dZAOuGcany5qkYQMmj35ZrKgb6yiyYqpUzFOpR8mZQkgqeEQ==", "license": "Apache-2.0", "bin": { "openai": "bin/cli" diff --git a/package.json b/package.json index 0e01261..3541d7f 100644 --- a/package.json +++ b/package.json @@ -40,6 +40,6 @@ "typescript-eslint": "^7.9.0" }, "dependencies": { - "openai": "^6.6.0" + "openai": "^6.37.0" } } diff --git a/src/image.ts b/src/image.ts index 58557e8..a28b2f9 100644 --- a/src/image.ts +++ b/src/image.ts @@ -1,11 +1,12 @@ import type { OpenAI } from "openai"; +import type { ImageModel } from "openai/resources/images"; import type { ILogger } from "./types"; -export type ImageSize = '256x256' | '512x512' | '1024x1024' | '1792x1024' | '1024x1792' -export type Model = 'dall-e-2' | 'dall-e-3'; +export type ImageSize = '1024x1024' | '1536x1024' | '1024x1536'; +export type Model = Extract; /** - * Class used to generate images using OpenAI's DALL-E + * Class used to generate images using OpenAI's GPT image models */ export class ImageGenerator { /** @@ -19,11 +20,11 @@ export class ImageGenerator { /** * Generates a single image from a text prompt * @param prompt The text prompt describing the image to generate - * @param size Optional, the size of the image. Defaults to "512x512" - * @param model Optional, the DALL-E model to use. Defaults to "dall-e-3" - * @returns A Promise that resolves to the URL of the generated image + * @param size Optional, the size of the image. Defaults to "1024x1024" + * @param model Optional, the GPT image model to use. Defaults to "gpt-image-1-mini" + * @returns A Promise that resolves to the PNG data URL of the generated image */ - public async generateImage(prompt: string, size: ImageSize = "512x512", model: Model = "dall-e-3"): Promise { + public async generateImage(prompt: string, size: ImageSize = "1024x1024", model: Model = "gpt-image-1-mini"): Promise { const image = await this.generateImages(prompt, 1, size, model); return image[0]!; } @@ -32,19 +33,22 @@ export class ImageGenerator { * Generates multiple images from a text prompt * @param prompt The text prompt describing the images to generate * @param numberOfImages The number of images to generate (1-5) - * @param size Optional, the size of the images. Defaults to "512x512" - * @param model Optional, the DALL-E model to use. Defaults to "dall-e-3" - * @returns A Promise that resolves to an array of URLs for the generated images + * @param size Optional, the size of the images. Defaults to "1024x1024" + * @param model Optional, the GPT image model to use. Defaults to "gpt-image-1-mini" + * @returns A Promise that resolves to an array of PNG data URLs for the generated images */ - public async generateImages(prompt: string, numberOfImages: 1 | 2 | 3 | 4 | 5, size: ImageSize = "512x512", model: Model = "dall-e-3"): Promise { + public async generateImages(prompt: string, numberOfImages: 1 | 2 | 3 | 4 | 5, size: ImageSize = "1024x1024", model: Model = "gpt-image-1-mini"): Promise { + const outputFormat = "png" as const; const response = await this.openai.images.generate({ model, prompt, n: numberOfImages, size, + output_format: outputFormat, }); const { data } = response; + const mimeType = `image/${outputFormat}`; this.logger.log("Got image!", data); @@ -55,11 +59,13 @@ export class ImageGenerator { const imageUrls = new Array(numberOfImages); for (let i = 0; i < data.length; i++) { - const url = data[i]?.url; - if (!url) { - throw new Error("Image URL is null"); + const imageData = data[i]; + const base64 = imageData?.b64_json; + if (!base64) { + throw new Error("Image data is missing"); } - imageUrls[i] = url; + + imageUrls[i] = `data:${mimeType};base64,${base64}`; } return imageUrls; diff --git a/src/index.ts b/src/index.ts index 913c89b..920dbdd 100644 --- a/src/index.ts +++ b/src/index.ts @@ -12,8 +12,8 @@ export type StoryPayload = { content: string; /** Temperature used to generate the story */ temperature: number; - /** URL for the story image. - * This link expires so be sure to download it */ + /** Image data URL for the story image. + * For GPT image models, this is a base64 data URL. */ image: string; } diff --git a/src/story.ts b/src/story.ts index 74fca3d..b87c077 100644 --- a/src/story.ts +++ b/src/story.ts @@ -82,14 +82,14 @@ export class Story { } /** - * Generates an image for the story using DALL-E + * Generates an image for the story using OpenAI's GPT image models * @param size Optional, the size of the image. Defaults to "1024x1024" - * @param model Optional, the DALL-E model to use. Defaults to "dall-e-3" - * @returns A Promise that resolves to the URL of the generated image + * @param model Optional, the GPT image model to use. Defaults to "gpt-image-1-mini" + * @returns A Promise that resolves to the data URL of the generated image */ - async generateImage(size: ImageSize = "1024x1024", model: Model = "dall-e-3"): Promise { + async generateImage(size: ImageSize = "1024x1024", model: Model = "gpt-image-1-mini"): Promise { this.logger.log("Generating image prompts"); - const imgPrompt = "Based on the previous story, write a prompt for an image generation service Dall-E. " + + const imgPrompt = "Based on the previous story, write a prompt for an OpenAI image generation model. " + "Keep the prompt detailed and tell the system to use a particular art style referring to a particular artist/painter. " + "Make the prompt be less than 400 characters. " + "Respond only with the prompt. No other text is needed.";