diff --git a/CodenameOne/src/com/codename1/ai/AnthropicClient.java b/CodenameOne/src/com/codename1/ai/AnthropicClient.java index f01735444c..c28584f777 100644 --- a/CodenameOne/src/com/codename1/ai/AnthropicClient.java +++ b/CodenameOne/src/com/codename1/ai/AnthropicClient.java @@ -24,22 +24,25 @@ import com.codename1.util.AsyncResource; -/// Anthropic /v1/messages client. Wire format differs from OpenAI in -/// three important ways: system messages live in a top-level `system` -/// string rather than a role; image parts use `{type:"image", source: -/// {type:"base64", media_type, data}}`; tool calls stream argument -/// JSON via `input_json_delta` events. +/// Anthropic Messages client. /// -/// This is currently a scaffold -- the full request/response mapping -/// is tracked as a follow-up. The class compiles and registers under -/// `LlmClient.anthropic(...)` so app code using the API can be built; -/// runtime calls throw a clear `UnsupportedOperationException`. -class AnthropicClient extends LlmClient { - private final String apiKey; +/// Talks to `https://api.anthropic.com/v1/chat/completions`, the +/// official OpenAI-compatible Messages shim. The wire format on that +/// endpoint is the same `/v1/chat/completions` shape that OpenAI and +/// every OpenAI-compatible provider speaks, so this client inherits the +/// full streaming, tool-call, response-format and image-attachment +/// implementation from `OpenAiClient`. Only the provider name, default +/// model, and the embeddings shape (Anthropic does not publish a +/// first-party embeddings endpoint) are overridden here. +/// +/// Authentication uses `Authorization: Bearer sk-ant-...` -- identical +/// header layout to OpenAI -- which is why the inherited request +/// configuration works without modification. +class AnthropicClient extends OpenAiClient { AnthropicClient(String apiKey, String baseUrl) { - super(baseUrl); - this.apiKey = apiKey; + super(apiKey, baseUrl); + setDefaultModel("claude-sonnet-4-5"); } @Override @@ -47,30 +50,14 @@ public String getProvider() { return "anthropic"; } - @Override - public AsyncResource chat(ChatRequest req) { - AsyncResource r = new AsyncResource(); - r.error(new UnsupportedOperationException( - "AnthropicClient is not yet implemented in this release. " - + "Use LlmClient.openai(...) or run the model behind an OpenAI-compatible proxy.")); - return r; - } - - @Override - public AsyncResource chatStream(ChatRequest req, StreamingListener listener) { - return chat(req); - } - @Override public AsyncResource embed(EmbeddingRequest req) { AsyncResource r = new AsyncResource(); r.error(new UnsupportedOperationException( "Anthropic does not publish a first-party embeddings endpoint. " - + "Use a Voyage AI key via LlmClient.localOpenAiCompatible(\"https://api.voyageai.com/v1\", key, model).")); + + "Use a Voyage AI key via LlmClient.localOpenAiCompatible(" + + "\"https://api.voyageai.com/v1\", key, \"voyage-3\") or " + + "LlmClient.openai(...) with text-embedding-3-small.")); return r; } - - String getApiKey() { - return apiKey; - } } diff --git a/CodenameOne/src/com/codename1/ai/GeminiClient.java b/CodenameOne/src/com/codename1/ai/GeminiClient.java index 930aad4f8c..87c823b745 100644 --- a/CodenameOne/src/com/codename1/ai/GeminiClient.java +++ b/CodenameOne/src/com/codename1/ai/GeminiClient.java @@ -22,56 +22,29 @@ */ package com.codename1.ai; -import com.codename1.util.AsyncResource; - -/// Google Gemini client. The native wire format diverges from OpenAI's: -/// system messages live in `systemInstruction`, content is split into -/// `parts` with `inline_data` / `text`, tool calls arrive atomically -/// at stream end rather than fragment-by-fragment. +/// Google Gemini client. +/// +/// Talks to `https://generativelanguage.googleapis.com/v1beta/openai/`, +/// Google's official OpenAI-compatible shim. The endpoint accepts the +/// standard `/chat/completions` and `/embeddings` shape -- including +/// streaming, tool calls, multi-modal image parts, and structured +/// JSON output -- so this client inherits the full +/// implementation from `OpenAiClient` and only overrides the provider +/// name and default model. /// -/// Google publishes an OpenAI-compatibility endpoint at -/// `https://generativelanguage.googleapis.com/v1beta/openai/` that -/// works with [LlmClient#localOpenAiCompatible] today; this dedicated -/// client (which handles the native shape end-to-end) is a follow-up. -class GeminiClient extends LlmClient { - private final String apiKey; +/// Authentication uses `Authorization: Bearer `, +/// identical to the OpenAI header layout. Models are addressed by +/// their public identifiers (`gemini-2.0-flash`, `gemini-2.5-pro`, +/// `gemini-2.5-flash`, ...). +class GeminiClient extends OpenAiClient { GeminiClient(String apiKey, String baseUrl) { - super(baseUrl); - this.apiKey = apiKey; + super(apiKey, baseUrl); + setDefaultModel("gemini-2.0-flash"); } @Override public String getProvider() { return "gemini"; } - - @Override - public AsyncResource chat(ChatRequest req) { - AsyncResource r = new AsyncResource(); - r.error(new UnsupportedOperationException( - "GeminiClient (native) is not yet implemented in this release. " - + "Use LlmClient.localOpenAiCompatible(" - + "\"https://generativelanguage.googleapis.com/v1beta/openai\", apiKey, model) " - + "to reach Gemini through Google's OpenAI-compatible shim.")); - return r; - } - - @Override - public AsyncResource chatStream(ChatRequest req, StreamingListener listener) { - return chat(req); - } - - @Override - public AsyncResource embed(EmbeddingRequest req) { - AsyncResource r = new AsyncResource(); - r.error(new UnsupportedOperationException( - "GeminiClient.embed is not yet implemented. Use the OpenAI-compatible shim " - + "or LlmClient.openai(...) with text-embedding-3-small.")); - return r; - } - - String getApiKey() { - return apiKey; - } } diff --git a/CodenameOne/src/com/codename1/ai/LlmClient.java b/CodenameOne/src/com/codename1/ai/LlmClient.java index 73333c86c3..fea905929a 100644 --- a/CodenameOne/src/com/codename1/ai/LlmClient.java +++ b/CodenameOne/src/com/codename1/ai/LlmClient.java @@ -58,15 +58,16 @@ public abstract class LlmClient { // uses. // // Versions reflect the providers' production REST shapes as of - // mid-2026: - // - OpenAI Chat Completions -- /v1 (stable) - // - Anthropic Messages -- /v1 (stable) - // - Google Gemini (native) -- /v1beta (only path - // that exposes streaming generateContent and tool calls today) - // - Ollama OpenAI-compat shim -- /v1 + // mid-2026. Anthropic and Gemini both publish an OpenAI-compatible + // `/chat/completions` endpoint, which is what this client targets + // -- one shared wire format, three providers. + // - OpenAI Chat Completions -- /v1 + // - Anthropic Messages compat -- /v1 + // - Google Gemini OpenAI compat -- /v1beta/openai + // - Ollama OpenAI-compat shim -- /v1 public static final String DEFAULT_OPENAI_URL = "https://api.openai.com/v1"; public static final String DEFAULT_ANTHROPIC_URL = "https://api.anthropic.com/v1"; - public static final String DEFAULT_GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta"; + public static final String DEFAULT_GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta/openai"; public static final String DEFAULT_OLLAMA_URL = "http://localhost:11434/v1"; private String baseUrl; diff --git a/docs/developer-guide/Ai-And-Speech.asciidoc b/docs/developer-guide/Ai-And-Speech.asciidoc new file mode 100644 index 0000000000..3aa719bc89 --- /dev/null +++ b/docs/developer-guide/Ai-And-Speech.asciidoc @@ -0,0 +1,610 @@ +== AI, Chat UI, and Speech + +[[ai-and-speech-section,AI And Speech Section]] +Codename One ships a portable LLM client, a streaming chat component, +speech-to-text and text-to-speech APIs, and a family of cn1lib bridges +that wire ML Kit into iOS and Android builds. All the public surface +lives next to the rest of the framework: the simulator, the cloud +builder, and your CI pipeline all run the same code. + +This chapter introduces every piece in turn: + +* `com.codename1.ai` -- provider-agnostic chat, embeddings, tool calls, + and image generation. +* `com.codename1.components.ChatView` -- a theme-aware chat surface that + streams tokens in place. +* `com.codename1.media.SpeechRecognizer` and + `com.codename1.media.TextToSpeech` -- on-device speech-to-text and + speech synthesis. +* `com.codename1.security.SecureStorage` non-prompting overloads -- + silent reads for secrets the network layer needs on every call, such + as LLM API keys. +* The `cn1-ai-mlkit-*` cn1libs -- ML Kit barcode, document, and face + detection bridges with the native build-time scanner already wired up. + +NOTE: Each async call in this chapter returns an +`com.codename1.util.AsyncResource`. Use `.ready(...)` for the success +path, `.except(...)` for errors, and `.cancel()` to abort an in-flight +request. The streaming chat call also dispatches deltas through a +`StreamingListener`; both listener callbacks and `ready` callbacks fire +on the EDT. + +=== The `com.codename1.ai` package + +The `LlmClient` class is the single entry point. Static factories return +a configured client for each supported provider. Every client speaks the +same `ChatRequest` and `ChatResponse` value types, so the call site does +not change when you swap providers: + +[source,java] +---- +import com.codename1.ai.*; + +// OpenAI (also drives Ollama, vLLM, llama.cpp, and other +// OpenAI-compatible endpoints). +LlmClient openai = LlmClient.openai(apiKey); + +// Local Ollama on the default port (http://localhost:11434). +LlmClient ollama = LlmClient.ollama("llama3.2"); + +// Any OpenAI-compatible endpoint (Together, Groq, Fireworks, vLLM, ...). +LlmClient together = LlmClient.localOpenAiCompatible( + "https://api.together.xyz/v1", + apiKey, + "meta-llama/Llama-3.3-70B-Instruct-Turbo"); + +// Anthropic and Google Gemini, both via their OpenAI-compatible +// endpoints. The wire format is identical; only the base URL, +// default model, and auth differ. +LlmClient anthropic = LlmClient.anthropic(apiKey); +LlmClient gemini = LlmClient.gemini(apiKey); +---- + +All five factories return the same `LlmClient` API. OpenAI, Ollama, +vLLM, and `llama.cpp` share the canonical wire format. Anthropic's +`https://api.anthropic.com/v1/chat/completions` and Google's +`https://generativelanguage.googleapis.com/v1beta/openai/chat/completions` +implement the same shape, so the framework speaks to all three through +one network layer. Default models pick a sensible production-grade +target per provider (`gpt-4o-mini`, `claude-sonnet-4-5`, +`gemini-2.0-flash`, `llama3.2`); override per request with +`ChatRequest.builder().model(...)`. + +==== A first chat + +`ChatRequest` is a builder. The bare minimum is a model identifier and +one user message: + +[source,java] +---- +ChatRequest req = ChatRequest.builder() + .model("gpt-4o-mini") + .addMessage(ChatMessage.system("Reply in haiku.")) + .addMessage(ChatMessage.user("Describe a Codename One app.")) + .temperature(0.7f) + .maxTokens(200) + .build(); + +openai.chat(req).ready(resp -> { + Dialog.show("Reply", resp.getText(), "OK", null); +}).except(err -> { + Log.e(err); +}); +---- + +`ChatResponse.getText()` concatenates every text part of the assistant +message. `getFinishReason()` returns one of `"stop"`, `"length"`, +`"tool_calls"`, `"content_filter"`, or `"error"`. `getUsage()` returns +prompt, completion, and total token counts when the provider reports +them; the fields return -1 when the provider omits them. + +==== Streaming tokens + +`chatStream(...)` opens an SSE connection and dispatches deltas through +a `StreamingListener` on the EDT. The returned `AsyncResource` resolves +to the final aggregated `ChatResponse` when the stream completes; +calling `cancel()` on the resource closes the underlying socket: + +[source,java] +---- +StringBuilder buffer = new StringBuilder(); + +openai.chatStream(req, new StreamingListener.Adapter() { + @Override + public void onContentDelta(String delta) { + buffer.append(delta); + chatView.appendToLastMessage(delta); + } + + @Override + public void onError(Throwable t) { + Log.e(t); + } +}).ready(resp -> { + Log.p("Total tokens: " + resp.getUsage().getTotalTokens()); +}); +---- + +The decoder reassembles fragmented SSE deltas before invoking the +listener, so `onContentDelta` always receives a complete token chunk. +Tool-call fragments are reassembled the same way and surfaced through +`onToolCallDelta(index, id, name, argumentsFragment)`; `name` is non-null +on the first fragment and `id` is present on the first call. + +==== Tool calling + +`Tool` describes a callable function with a JSON schema. Pass a +`ToolHandler` to make it executable; the model can then call the tool +and the handler returns the JSON result that gets fed back into the +conversation: + +[source,java] +---- +Tool weather = new Tool( + "get_weather", + "Return the current weather for a city.", + "{\"type\":\"object\",\"properties\":{" + + "\"city\":{\"type\":\"string\"}}," + + "\"required\":[\"city\"]}", + argumentsJson -> { + Map args = JsonHelper.parseObject(argumentsJson); + return "{\"tempC\": 22, \"city\": \"" + args.get("city") + "\"}"; + }); + +ChatRequest req = ChatRequest.builder() + .model("gpt-4o-mini") + .addMessage(ChatMessage.user("What is the weather in Tel Aviv?")) + .tools(Collections.singletonList(weather)) + .toolChoice(ToolChoice.AUTO) + .build(); + +openai.chat(req).ready(resp -> { + for (ToolCall call : resp.getToolCalls()) { + String result = call.execute(Collections.singletonList(weather)); + // Feed the tool result back as a new turn and call chat() again. + } +}); +---- + +`ToolChoice` covers the four standard modes: + +* `ToolChoice.AUTO` -- the model picks at will (default). +* `ToolChoice.NONE` -- the model must not call any tool. +* `ToolChoice.REQUIRED` -- the model must call exactly one tool. +* `ToolChoice.named("get_weather")` -- force a specific tool. + +==== Structured output + +Constrain a response to JSON with `ResponseFormat.JSON_OBJECT`. The +client adds the provider-specific flag, and `ChatResponse.getText()` +returns a string the runtime can hand to `JSONParser`: + +[source,java] +---- +ChatRequest req = ChatRequest.builder() + .model("gpt-4o-mini") + .responseFormat(ResponseFormat.JSON_OBJECT) + .addMessage(ChatMessage.system( + "Return a JSON object with keys city and population.")) + .addMessage(ChatMessage.user("Tel Aviv")) + .build(); +---- + +==== Multi-modal messages + +Attach an image to a user message by adding an `ImagePart`. The part +accepts either inline bytes plus a MIME type or a remote HTTPS URL: + +[source,java] +---- +ImagePart photo = new ImagePart(receiptBytes, "image/jpeg"); +ChatMessage msg = ChatMessage.userWithImage( + "Extract the line items from this receipt.", photo); + +ChatRequest req = ChatRequest.builder() + .model("gpt-4o") + .addMessage(msg) + .build(); +---- + +==== Embeddings + +`embed(EmbeddingRequest)` returns one `Embedding` vector per input. Use +the vectors for semantic search, deduplication, or downstream +clustering: + +[source,java] +---- +EmbeddingRequest req = EmbeddingRequest.builder() + .model("text-embedding-3-small") + .addInput("a cat sat on the mat") + .addInput("a feline rested on the rug") + .build(); + +openai.embed(req).ready(resp -> { + float[] v0 = resp.getData().get(0).getVector(); + float[] v1 = resp.getData().get(1).getVector(); + // Compute cosine similarity, persist to Storage, etc. +}); +---- + +==== Image generation + +`ImageGenerator` mirrors the LLM client shape. The OpenAI factory drives +DALL-E 3 today; the on-device factory routes through the optional +`cn1-ai-stablediffusion` cn1lib (when present in the consumer project): + +[source,java] +---- +ImageGenerator gen = ImageGenerator.openai(apiKey); +GenerateImageRequest req = new GenerateImageRequest( + "A pastel watercolor of a Tel Aviv beach at sunset"); +req.setSize("1024x1024"); +req.setQuality("hd"); + +gen.generate(req).ready(image -> { + Label preview = new Label(image); + Display.getInstance().getCurrent().add(preview).revalidate(); +}); +---- + +DALL-E 3 supports `count = 1` only. Larger batches require a different +underlying model; pass it via `setModel(...)`. + +==== Conversation persistence + +`ConversationStore` wraps `Storage` to serialize a list of +`ChatMessage` values to JSON under a named key: + +[source,java] +---- +ConversationStore store = new ConversationStore("chat-history"); +List history = store.load(); // empty list on first call + +history.add(ChatMessage.user("Hello")); +ChatResponse resp = openai.chat( + ChatRequest.builder().model("gpt-4o-mini").messages(history).build()) + .get(); // blocking helper, EDT-safe +history.add(resp.getAssistantMessage()); +store.save(history); +---- + +`Tokenizer.estimateMessages(history)` returns a rough token count so you +can trim the oldest turns before the conversation outgrows the model's +context window. + +==== Prompt templates + +`PromptTemplate` does simple `{placeholder}` substitution. Unknown +placeholders pass through unchanged so partially-filled templates are +safe to log: + +[source,java] +---- +PromptTemplate t = PromptTemplate.of( + "Translate the following from {source} to {target}: {text}"); +t.put("source", "English"); +t.put("target", "French"); +t.put("text", "good morning"); + +ChatMessage user = t.asUser(); // wraps the rendered string +---- + +==== Retry policy + +`RetryPolicy.exponentialBackoff()` returns a sensible default (four +attempts, 500ms initial delay, 30s cap, jitter). Wrap any chat call in a +retry loop yourself, or attach the policy to a higher-level wrapper. +`LlmException.getRetryAfterSeconds()` returns the provider's +`Retry-After` header value when present, or -1, so the policy can honor +rate-limit hints. + +==== Simulator redirect for offline development + +The JavaSE simulator pings `localhost:11434` at startup. When Ollama is +running, the simulator exports the flag `cn1.ai.ollamaDetected=true`. A +second system property, `cn1.ai.simulatorRedirect`, controls whether the +simulator transparently routes `LlmClient.openai(...)` calls to the +local Ollama endpoint instead: + +[cols="1,3", options="header"] +|=== +| Value | Behavior + +| `disabled` | The default on a device. Calls go to the configured +provider, even in the simulator. +| `auto` | The default in the simulator. If Ollama is reachable, route +OpenAI calls through it. Otherwise behave like `disabled`. +| `ollama` | Force redirect to Ollama. Use this in offline-only +development environments. +|=== + +The redirect target is also configurable: `cn1.ai.ollamaUrl` defaults to +`http://localhost:11434/v1`, and `cn1.ai.ollamaModel` defaults to +`llama3.2`. Unchanged production code therefore runs offline against a +local model without any conditional wiring at the call site. + +==== Storing the API key + +WARNING: Never hard-code a provider API key in source, ship it in a +bundled resource, or commit it to git. Mobile binaries are trivially +reverse-engineered; any key embedded in the app is a key your users +can extract. Fetch the key from a server endpoint that the user +authenticates against, then cache it locally with the non-prompting +`SecureStorage` overloads: + +[source,java] +---- +SecureStorage store = SecureStorage.getInstance(); +store.set("openai.key", apiKeyFromServer); // returns false when unsupported + +String key = store.get("openai.key"); // returns null when absent +LlmClient client = LlmClient.openai(key); +---- + +=== The `ChatView` component + +`ChatView` is a scrollable, theme-aware chat surface that handles the +list of bubbles, the streaming append, the typing indicator, and the +input strip in one component. Drop it into a `Form` with a single +`BorderLayout.CENTER`: + +.`ChatView` rendered in the JavaSE simulator under the iOS Modern theme +image::img/chat-view.png[ChatView in the JavaSE simulator, 320] + +The component exposes thread-safe `addMessage`, `appendToLastMessage`, +and `setTypingIndicatorVisible` methods, so background callbacks from +`chatStream(...)` can mutate the view directly: + +[source,java] +---- +Form chat = new Form("Assistant", new BorderLayout()); +ChatView view = new ChatView(); +chat.add(BorderLayout.CENTER, view); + +view.addMessage(ChatMessage.assistant("How can I help?")); + +view.setOnSend(e -> { + String text = view.getInput().getText(); + view.getInput().clear(); + view.addMessage(ChatMessage.user(text)); + view.setTypingIndicatorVisible(true); + + ChatBubble streaming = view.beginAssistantStream(); + ChatRequest req = ChatRequest.builder() + .model("gpt-4o-mini") + .messages(view.getHistory()) + .build(); + + LlmClient.openai(apiKey).chatStream(req, new StreamingListener.Adapter() { + @Override public void onContentDelta(String d) { + view.appendToLastMessage(d); + } + }).ready(resp -> view.setTypingIndicatorVisible(false)); +}); +chat.show(); +---- + +`ChatBubble` and `ChatInput` are public, so subclass them for custom +rendering. Override `ChatView.createBubble(message)` to swap in a +custom subclass when the view builds the message list. + +==== Theming + +`ChatView` exposes the following UIIDs out of the box: + +[cols="1,2", options="header"] +|=== +| UIID | Applies to +| `ChatView` | The outer container. +| `ChatViewMessages` | The scrollable message column. +| `ChatBubbleUser` | The container for a user message. +| `ChatBubbleAssistant` | The container for an assistant message. +| `ChatBubbleSystem` | The container for a system message. +| `ChatBubbleText` | The inner `TextArea` of every bubble. +| `ChatTypingIndicator` | The animated typing dots. +| `ChatInput` | The input strip. +| `ChatInputField` | The text field inside the strip. +| `ChatSendButton` | The send button. +| `ChatAttachButton` | The attach button. +| `ChatVoiceButton` | The voice button. +|=== + +Style them from `theme.css` to align with the rest of the app. Hide the +voice or attach button by leaving its listener unset; the corresponding +`Button` instance is hidden when no `ActionListener` is registered. + +==== One-call binding to an LLM + +`LlmChatBinding.bind(view, client, baseRequest)` wires the input bar +directly to `chatStream(...)`. Every send replays the conversation +history, dispatches the response into the view, and updates the typing +indicator. Use it for prototypes, or as a reference implementation when +you need a custom send pipeline: + +[source,java] +---- +LlmChatBinding.bind(view, + LlmClient.openai(apiKey), + ChatRequest.builder().model("gpt-4o-mini").build()); +---- + +=== Speech recognition and TextToSpeech + +The new media APIs route through `Display` and into the implementation +hooks on `CodenameOneImplementation`, so the call surface is identical +on every platform. The default implementation returns `false` from +`isSupported()` and is a no-op for `recognize`/`speak`. + +==== `SpeechRecognizer` + +[source,java] +---- +import com.codename1.media.*; + +if (!SpeechRecognizer.isSupported()) { + Dialog.show("Unavailable", "Speech is not supported on this device.", + "OK", null); + return; +} + +RecognitionOptions opts = new RecognitionOptions() + .setLanguageTag("en-US") + .setPartialResults(true) + .setContinuous(false) + .setMaxResults(3); + +SpeechRecognizer.recognize(opts, new RecognitionCallback.Adapter() { + @Override public void onPartialResult(String transcript) { + chatView.getInput().setText(transcript); + } + @Override public void onResult(String transcript, float confidence, + String[] alternatives) { + chatView.addMessage(ChatMessage.user(transcript)); + } +}); +---- + +iOS uses `SFSpeechRecognizer`, Android uses `android.speech.SpeechRecognizer`, +and the JavaSE simulator stays unsupported unless the optional +`cn1-ai-whisper` cn1lib is on the classpath. Call `stop()` to end an +active session early; partial results stop firing immediately. + +==== `TextToSpeech` + +[source,java] +---- +if (TextToSpeech.isSupported()) { + TtsOptions opts = new TtsOptions() + .setLanguageTag("fr-FR") + .setRate(1.0f); + TextToSpeech.speak("Bonjour", opts); +} +---- + +iOS uses `AVSpeechSynthesizer`, Android uses +`android.speech.tts.TextToSpeech`, and the JavaSE simulator falls back +to `say` on macOS, `espeak` on Linux, and the platform `SAPI` bridge on +Windows. +`getAvailableVoices()` returns the platform-specific voice identifiers +when the OS exposes them; `setVoiceId(...)` accepts any of those +strings, or `null` to use the default voice for the configured +language. + +=== ML Kit cn1libs + +Three cn1libs ship with the framework today, each backed by ML Kit on +both platforms. The build-time scanner picks up the `com.codename1.ai.*` +class references in your code and injects the matching Pods, Swift +Packages, Gradle dependencies, `Info.plist` usage strings, and Android +permissions; you don't edit build hints by hand. + +[cols="1,1,2", options="header"] +|=== +| cn1lib | Public API | Provides + +| `cn1-ai-mlkit-barcode` | `BarcodeScanner.scan(byte[])` | Decode QR, +EAN, Code 128, and the other ML Kit-supported barcode formats from an +image. +| `cn1-ai-mlkit-docscan` | `DocumentScanner.scanToFile(byte[])` | +Capture and crop document photos. On iOS this routes through `VisionKit` +and on Android through the Google Play Services document scanner. +| `cn1-ai-mlkit-face` | `FaceDetector.detect(byte[])` | Detect faces and +return packed `int[]` bounding rectangles (four ints per face: x, y, +width, height). +|=== + +==== Adding a cn1lib + +Add the dependency to `common/pom.xml` with `pom` so Maven +pulls in the per-platform classifier jars: + +[source,xml] +---- + + com.codenameone + cn1-ai-mlkit-barcode-lib + ${cn1.version} + pom + +---- + +==== Example: Scanning a barcode + +[source,java] +---- +import com.codename1.ai.mlkit.barcode.BarcodeScanner; + +Capture.capturePhoto(new ActionListener() { + @Override public void actionPerformed(ActionEvent evt) { + String path = (String) evt.getSource(); + try (InputStream in = FileSystemStorage.getInstance().openInputStream(path)) { + byte[] bytes = Util.readInputStream(in); + BarcodeScanner.scan(bytes).ready(values -> { + for (String v : values) { + Log.p("Detected: " + v); + } + }); + } catch (IOException ex) { + Log.e(ex); + } + } +}); +---- + +==== Example: Face detection + +[source,java] +---- +import com.codename1.ai.mlkit.face.FaceDetector; + +FaceDetector.detect(jpegBytes).ready(rects -> { + for (int i = 0; i < rects.length; i += 4) { + Log.p("Face at (" + rects[i] + "," + rects[i + 1] + + ") size " + rects[i + 2] + "x" + rects[i + 3]); + } +}); +---- + +NOTE: ML Kit barcode and face detection ship as small Pods or Gradle +dependencies that add a few megabytes to the binary. The document +scanner depends on Google Play Services on Android; on devices without +Play Services the call returns an error through `AsyncResource.except`. + +==== Putting it all together + +The pieces compose well. The following loop captures a photo, +extracts text with the multi-modal `gpt-4o` model, speaks the result, +and streams the same text into a `ChatView`: + +[source,java] +---- +Capture.capturePhoto(evt -> { + String path = (String) evt.getSource(); + byte[] bytes = readAllBytes(path); + ImagePart img = new ImagePart(bytes, "image/jpeg"); + + ChatRequest req = ChatRequest.builder() + .model("gpt-4o") + .addMessage(ChatMessage.userWithImage( + "Describe the photo in one sentence.", img)) + .build(); + + chatView.addMessage(req.getMessages().get(0)); + ChatBubble streaming = chatView.beginAssistantStream(); + StringBuilder full = new StringBuilder(); + + LlmClient.openai(apiKey).chatStream(req, new StreamingListener.Adapter() { + @Override public void onContentDelta(String d) { + full.append(d); + chatView.appendToLastMessage(d); + } + }).ready(resp -> { + TextToSpeech.speak(full.toString()); + }); +}); +---- + +The same code runs unchanged in the simulator when Ollama is detected, +on Android with the multi-modal native client, and on iOS through the +cloud build pipeline. diff --git a/docs/developer-guide/developer-guide.asciidoc b/docs/developer-guide/developer-guide.asciidoc index 22a3fdab94..b6d1faf3a5 100644 --- a/docs/developer-guide/developer-guide.asciidoc +++ b/docs/developer-guide/developer-guide.asciidoc @@ -96,6 +96,8 @@ include::Annotation-Component-Binding.asciidoc[] include::Annotation-SQLite-ORM.asciidoc[] +include::Ai-And-Speech.asciidoc[] + include::Near-Field-Communication.asciidoc[] include::Network-Connectivity.asciidoc[] diff --git a/docs/developer-guide/img/chat-view.png b/docs/developer-guide/img/chat-view.png new file mode 100644 index 0000000000..2037a05e51 Binary files /dev/null and b/docs/developer-guide/img/chat-view.png differ diff --git a/docs/developer-guide/languagetool-accept.txt b/docs/developer-guide/languagetool-accept.txt index fb2bb66105..b221666bbc 100644 --- a/docs/developer-guide/languagetool-accept.txt +++ b/docs/developer-guide/languagetool-accept.txt @@ -530,3 +530,15 @@ webauthn # dictionary. [Dd]ao [Dd]aos + +# ----------------------------------------------------------------------------- +# AI / LLM provider, product, and model names used in the AI, Chat UI, and +# Speech chapter. LanguageTool's en_US dictionary either doesn't recognise +# these or mis-spells them. +# ----------------------------------------------------------------------------- +Ollama +[Vv]LLM +[Ll]lama +[Aa]nthropic +SAPI +espeak diff --git a/scripts/hellocodenameone/common/src/test/java/com/codenameone/examples/hellocodenameone/ChatViewDevGuideScreenshotTest.java b/scripts/hellocodenameone/common/src/test/java/com/codenameone/examples/hellocodenameone/ChatViewDevGuideScreenshotTest.java new file mode 100644 index 0000000000..66e3de74d0 --- /dev/null +++ b/scripts/hellocodenameone/common/src/test/java/com/codenameone/examples/hellocodenameone/ChatViewDevGuideScreenshotTest.java @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2026, Codename One and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Codename One designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Codename One through http://www.codenameone.com/ if you + * need additional information or have any questions. + */ +package com.codenameone.examples.hellocodenameone; + +import com.codename1.ai.ChatMessage; +import com.codename1.components.ChatView; +import com.codename1.io.Storage; +import com.codename1.io.Util; +import com.codename1.testing.AbstractTest; +import com.codename1.testing.TestUtils; +import com.codename1.ui.Display; +import com.codename1.ui.Form; +import com.codename1.ui.Image; +import com.codename1.ui.layouts.BorderLayout; +import com.codename1.ui.plaf.UIManager; +import com.codename1.ui.util.ImageIO; +import com.codename1.ui.util.Resources; + +import java.io.InputStream; +import java.io.OutputStream; + +/** + * Builds the ChatView screenshot embedded in the developer guide's "AI, + * Chat UI, and Speech" chapter. + * + * The conversation is hard-coded so the capture is reproducible. Stored + * under the override name `chat-view.png` -- the developer-guide build + * picks it up from Storage / `~/.cn1` and copies the result into + * `docs/developer-guide/img/`. + */ +public class ChatViewDevGuideScreenshotTest extends AbstractTest { + private static final String STORAGE_KEY = "chat-view.png"; + private static final long FORM_TIMEOUT_MS = 5000L; + + @Override + public boolean shouldExecuteOnEDT() { + return true; + } + + @Override + public boolean runTest() throws Exception { + installModernTheme(); + + Form chat = new Form("Assistant", new BorderLayout()); + ChatView view = new ChatView(); + chat.add(BorderLayout.CENTER, view); + + view.addMessage(ChatMessage.system("AI Travel Assistant")); + view.addMessage(ChatMessage.user( + "Plan a 3-day Lisbon trip in November under 900 euro.")); + view.addMessage(ChatMessage.assistant( + "Sure! Day 1: Alfama walking tour + fado dinner. " + + "Day 2: Belem + LX Factory. Day 3: Sintra day trip. " + + "Estimated total: 820 euro (flights from Madrid).")); + view.addMessage(ChatMessage.user("Any vegetarian dinner spots?")); + view.addMessage(ChatMessage.assistant( + "Yes - in Alfama try Boi-Cavalo (modern Portuguese, " + + "vegan menu) and Ao 26 (vegan classics).")); + view.setTypingIndicatorVisible(true); + + chat.show(); + TestUtils.waitForFormTitle("Assistant", FORM_TIMEOUT_MS); + + // Let the layout finish a frame before grabbing pixels. + TestUtils.waitFor(200); + + return saveScreenshot(chat); + } + + private void installModernTheme() throws java.io.IOException { + String platform = Display.getInstance().getPlatformName(); + String resourceName; + if ("ios".equals(platform)) { + resourceName = "/iOSModernTheme.res"; + } else if ("and".equals(platform)) { + resourceName = "/AndroidMaterialTheme.res"; + } else { + return; + } + InputStream in = openResource(resourceName); + if (in == null) { + return; + } + try { + Resources r = Resources.open(in); + String[] names = r.getThemeResourceNames(); + if (names == null || names.length == 0) { + return; + } + UIManager.getInstance().setThemeProps(r.getTheme(names[0])); + UIManager.getInstance().refreshTheme(); + } finally { + Util.cleanup(in); + } + } + + private InputStream openResource(String resourceName) { + InputStream in = Display.getInstance().getResourceAsStream(getClass(), resourceName); + if (in != null) { + return in; + } + return ChatViewDevGuideScreenshotTest.class.getResourceAsStream(resourceName); + } + + private boolean saveScreenshot(Form chat) throws Exception { + Image screenshot = Image.createImage(chat.getWidth(), chat.getHeight()); + chat.paintComponent(screenshot.getGraphics(), true); + + Storage storage = Storage.getInstance(); + if (storage.exists(STORAGE_KEY)) { + storage.deleteStorageFile(STORAGE_KEY); + } + ImageIO io = ImageIO.getImageIO(); + assertNotNull(io, "PNG image support is required for the dev-guide screenshot."); + OutputStream out = null; + try { + out = storage.createOutputStream(STORAGE_KEY); + io.save(screenshot, out, ImageIO.FORMAT_PNG, 1); + } finally { + Util.cleanup(out); + } + return true; + } +} diff --git a/scripts/initializr/common/src/main/resources/skill/SKILL.md b/scripts/initializr/common/src/main/resources/skill/SKILL.md index d6f730b088..447d5ad114 100644 --- a/scripts/initializr/common/src/main/resources/skill/SKILL.md +++ b/scripts/initializr/common/src/main/resources/skill/SKILL.md @@ -33,6 +33,7 @@ This skill teaches you how to write code for a Codename One (CN1) cross-platform - `references/mobile-adaptability.md` — Density-independent units (mm), `convertToPixels`, `LayeredLayout` for responsive design, `Display.isTablet()`, font scaling. - `references/native-interfaces.md` — Authoring native interfaces for iOS/Android/JavaScript/Desktop with `cn1:generate-native-interfaces` and platform callbacks. - `references/cn1libs.md` — Creating, packaging, and consuming Codename One libraries (Maven and legacy `.cn1lib`). +- `references/ai-and-speech.md` — LLM client (`com.codename1.ai`), `ChatView`, `SpeechRecognizer`, `TextToSpeech`, non-prompting `SecureStorage` overloads, the ML Kit cn1libs, and the simulator's offline Ollama redirect. Read this when the user asks for chat, voice, embeddings, image generation, barcode/document/face detection, or wants to store an LLM API key. - `references/snapshot-builds.md` — Edge case: compiling against a Codename One SNAPSHOT from git. - `references/debugging.md` — `jdb`-attach workflow for an agent: start the simulator paused, set breakpoints, dump locals, drive the session non-interactively from a script. - `tools/` — runnable Java 17 single-file utilities. `tools/IsApiSupported.java` answers "is this `java.*` class in the CN1 subset?"; `tools/IsCssValid.java` answers "does this `theme.css` compile?". Run with `java tools/.java `. @@ -292,6 +293,10 @@ If you cannot run the simulator (e.g. headless environment), **say so explicitly | "Why does the compliance check fail" / Java/IO/networking | `references/java-api-subset.md` | | "I need to call a native iOS/Android/JS/desktop API" | `references/native-interfaces.md` | | "How do I create / consume a cn1lib" | `references/cn1libs.md` | +| "Add a chatbot" / "Integrate OpenAI/Ollama/Anthropic" / "Stream LLM tokens" / "Generate an image" / "Embed text" | `references/ai-and-speech.md` | +| "Read voice input" / "Speak text aloud" / "Add a voice button to my chat" | `references/ai-and-speech.md` | +| "Scan a barcode" / "Detect a face" / "Crop a document photo" via ML Kit | `references/ai-and-speech.md` | +| "Store an LLM API key" / non-prompting SecureStorage | `references/ai-and-speech.md` | | "Build against a Codename One SNAPSHOT from git" | `references/snapshot-builds.md` | | "Debug a faulty screen — attach `jdb` to the simulator" | `references/debugging.md` | | Quick yes/no check: "is this `java.*` class supported", "does my `theme.css` compile" | `tools/` directory — `java tools/IsApiSupported.java ` / `java tools/IsCssValid.java ` | diff --git a/scripts/initializr/common/src/main/resources/skill/references/ai-and-speech.md b/scripts/initializr/common/src/main/resources/skill/references/ai-and-speech.md new file mode 100644 index 0000000000..330d62b3b5 --- /dev/null +++ b/scripts/initializr/common/src/main/resources/skill/references/ai-and-speech.md @@ -0,0 +1,423 @@ +# AI, Chat UI, and Speech Reference + +Codename One ships a portable LLM client, a streaming chat component, speech recognition, text-to-speech, and ML Kit cn1lib bridges. All of it sits in the cross-platform `common/` module — the same call site runs on iOS, Android, JavaSE, and (where the backend supports it) JavaScript. + +**Read this reference when** the user asks to integrate an LLM, build a chat UI, voice input, voice output, image generation, embeddings, on-device barcode/face/document scanning, or wants to store an API key. + +## Core APIs at a glance + +| Concern | Class | Module | +| --- | --- | --- | +| Chat / embeddings / image generation | `com.codename1.ai.LlmClient` | core (built-in) | +| Streaming-aware chat UI | `com.codename1.components.ChatView` | core (built-in) | +| Speech-to-text | `com.codename1.media.SpeechRecognizer` | core (built-in) | +| Text-to-speech | `com.codename1.media.TextToSpeech` | core (built-in) | +| Silent secret storage (LLM API keys, etc.) | Single-arg overloads on `com.codename1.security.SecureStorage` | core (built-in) | +| Barcode scanning | `com.codename1.ai.mlkit.barcode.BarcodeScanner` | cn1lib `cn1-ai-mlkit-barcode` | +| Document scanning | `com.codename1.ai.mlkit.docscan.DocumentScanner` | cn1lib `cn1-ai-mlkit-docscan` | +| Face detection | `com.codename1.ai.mlkit.face.FaceDetector` | cn1lib `cn1-ai-mlkit-face` | + +The build-time scanner in the Codename One Maven plugin (`AiDependencyTable`) picks up references to any `com.codename1.ai.*` or `com.codename1.media.{Speech,Tts}*` class and automatically wires Pods (iOS), Swift Packages (iOS SPM), Gradle dependencies (Android), `Info.plist` usage strings, and Android permissions. You don't edit `codenameone_settings.properties` build hints for these classes. + +## LlmClient — chat, embeddings, image generation + +```java +import com.codename1.ai.*; + +// OpenAI. Also drives Ollama, vLLM, llama.cpp, Together, Groq, +// Fireworks etc. via shared wire format. +LlmClient client = LlmClient.openai(apiKey); + +// Local Ollama on http://localhost:11434 +LlmClient local = LlmClient.ollama("llama3.2"); + +// Any OpenAI-compatible endpoint +LlmClient together = LlmClient.localOpenAiCompatible( + "https://api.together.xyz/v1", apiKey, "meta-llama/Llama-3.3-70B-Instruct-Turbo"); + +// Anthropic + Gemini route through their OpenAI-compatible endpoints +// (Anthropic at /v1/chat/completions, Gemini at /v1beta/openai/ +// chat/completions). Same ChatRequest / ChatResponse value types, +// same streaming, same tool-call API. +LlmClient claude = LlmClient.anthropic(apiKey); // default model: claude-sonnet-4-5 +LlmClient gemini = LlmClient.gemini(apiKey); // default model: gemini-2.0-flash +``` + +Default models per provider when `ChatRequest.builder().model(...)` is not called: + +| Provider | Default model | Override via | +| --- | --- | --- | +| OpenAI | `gpt-4o-mini` | `ChatRequest.builder().model("gpt-4o")` etc. | +| Anthropic | `claude-sonnet-4-5` | `ChatRequest.builder().model("claude-opus-4-1")` etc. | +| Gemini | `gemini-2.0-flash` | `ChatRequest.builder().model("gemini-2.5-pro")` etc. | +| Ollama | `llama3.2` | `LlmClient.ollama("qwen2.5-7b")` or per request | + +`ChatRequest` is a fluent builder. `chat()` returns the response, `chatStream()` streams deltas: + +```java +ChatRequest req = ChatRequest.builder() + .model("gpt-4o-mini") + .addMessage(ChatMessage.system("Reply in haiku.")) + .addMessage(ChatMessage.user("Describe a Codename One app.")) + .temperature(0.7f) + .maxTokens(200) + .build(); + +client.chat(req).ready(resp -> Log.p(resp.getText())); + +// Streaming +client.chatStream(req, new StreamingListener.Adapter() { + @Override public void onContentDelta(String delta) { + chatView.appendToLastMessage(delta); + } +}).ready(resp -> Log.p("usage=" + resp.getUsage().getTotalTokens())); +``` + +All callbacks fire on the EDT. `AsyncResource.cancel()` closes the socket on a streaming call. + +### Roles, message parts, multi-modal + +```java +ChatMessage.system("You are a tour guide."); +ChatMessage.user("Describe Paris."); +ChatMessage.assistant("Paris is…"); + +// Multi-modal: attach an image +ImagePart photo = new ImagePart(jpegBytes, "image/jpeg"); // inline +ImagePart byUrl = new ImagePart("https://example.com/x.png"); // remote +ChatMessage withImg = ChatMessage.userWithImage("Describe the photo.", photo); +``` + +### Tool calling + +```java +Tool weather = new Tool( + "get_weather", + "Return the current weather for a city.", + "{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}}," + + "\"required\":[\"city\"]}", + argsJson -> "{\"tempC\":22}"); // ToolHandler + +ChatRequest req = ChatRequest.builder() + .model("gpt-4o-mini") + .addMessage(ChatMessage.user("Weather in Paris?")) + .tools(Collections.singletonList(weather)) + .toolChoice(ToolChoice.AUTO) // .NONE | .REQUIRED | .named("x") + .build(); + +client.chat(req).ready(resp -> { + for (ToolCall call : resp.getToolCalls()) { + String result = call.execute(Collections.singletonList(weather)); + // Feed result back as ChatMessage.toolResult(call.getId(), result) + // and call chat() again. + } +}); +``` + +### Structured output + +```java +ChatRequest req = ChatRequest.builder() + .model("gpt-4o-mini") + .responseFormat(ResponseFormat.JSON_OBJECT) + .addMessage(ChatMessage.system("Return JSON with keys city, population.")) + .addMessage(ChatMessage.user("Tel Aviv")) + .build(); +``` + +### Embeddings + +```java +EmbeddingRequest req = EmbeddingRequest.builder() + .model("text-embedding-3-small") + .addInput("a cat sat on the mat") + .build(); + +client.embed(req).ready(resp -> { + float[] v = resp.getData().get(0).getVector(); + // store, compare cosine similarity, etc. +}); +``` + +### Image generation + +```java +ImageGenerator gen = ImageGenerator.openai(apiKey); // DALL-E 3 +GenerateImageRequest req = new GenerateImageRequest("Watercolor of a beach at sunset"); +req.setSize("1024x1024"); +gen.generate(req).ready(image -> form.add(new Label(image)).revalidate()); +``` + +`ImageGenerator.onDevice()` routes to the optional `cn1-ai-stablediffusion` cn1lib for on-device Stable Diffusion. That cn1lib carries multi-GB native blobs; the build server flips `cn1.ai.requiresBigUpload` and asks you to build locally if your project bundles it. + +### Conversation persistence, retries, prompts, tokens + +| Class | Purpose | +| --- | --- | +| `ConversationStore(key)` | JSON-serialize a `List` to/from `Storage` | +| `PromptTemplate.of("Translate {text} to {lang}")` | Trivial `{placeholder}` substitution | +| `Tokenizer.estimate(text)` / `Tokenizer.estimateMessages(history)` | Approximate token count | +| `RetryPolicy.exponentialBackoff()` | 4 attempts, 500ms→30s, jitter; honors `Retry-After` from `LlmException.getRetryAfterSeconds()` | +| `SafetyFilter.check(messages)` | Returns `null` to allow, non-null reason to block; pre-flight gate | + +### LlmException + +Single checked exception extending `IOException`. Use `LlmException.getType()` to switch on: + +``` +AUTH, RATE_LIMIT, INVALID_REQUEST, CONTEXT_LENGTH, +MODEL_OVERLOADED, SERVER, NETWORK, UNKNOWN +``` + +`getHttpStatus()`, `getProviderErrorCode()`, `getRawBody()`, `getRetryAfterSeconds()` are also exposed for logging and retry decisions. + +### Simulator redirect (offline Ollama) + +The JavaSE simulator probes `localhost:11434` at startup. Two system properties drive the redirect: + +| Property | Default | Effect | +| --- | --- | --- | +| `cn1.ai.simulatorRedirect` | `auto` in simulator, `disabled` on device | `auto` redirects OpenAI calls to local Ollama when Ollama is reachable. `ollama` forces redirect. `disabled` always hits the configured provider. | +| `cn1.ai.ollamaUrl` | `http://localhost:11434/v1` | Override the Ollama endpoint URL | +| `cn1.ai.ollamaModel` | `llama3.2` | Override the local model name | +| `cn1.ai.ollamaDetected` | (read-only) | `true` if the startup probe found Ollama | + +Production code calling `LlmClient.openai(...)` runs unchanged in the simulator against the local model. No API charges, no network round-trip. + +## ChatView — streaming chat surface + +```java +import com.codename1.components.*; + +Form chat = new Form("Assistant", new BorderLayout()); +ChatView view = new ChatView(); +chat.add(BorderLayout.CENTER, view); + +view.addMessage(ChatMessage.assistant("How can I help?")); + +view.setOnSend(e -> { + String text = view.getInput().getText(); + view.getInput().clear(); + view.addMessage(ChatMessage.user(text)); + view.setTypingIndicatorVisible(true); + ChatBubble streaming = view.beginAssistantStream(); + + ChatRequest req = ChatRequest.builder() + .model("gpt-4o-mini").messages(view.getHistory()).build(); + + LlmClient.openai(apiKey).chatStream(req, new StreamingListener.Adapter() { + @Override public void onContentDelta(String d) { + view.appendToLastMessage(d); + } + }).ready(resp -> view.setTypingIndicatorVisible(false)); +}); +chat.show(); +``` + +The component is thread-safe: `addMessage`, `appendToLastMessage`, and `setTypingIndicatorVisible` marshal through `Display.callSerially` internally. + +### One-line wiring + +```java +LlmChatBinding.bind(view, + LlmClient.openai(apiKey), + ChatRequest.builder().model("gpt-4o-mini").build()); +``` + +This wires the input bar to `chatStream(...)` and replays the conversation history on every turn — fine for prototypes, replace with a custom send pipeline when you need tool calls, structured output, or analytics. + +### Theming + +CSS-style UIIDs: + +``` +ChatView — outer container +ChatViewMessages — scrollable column +ChatBubbleUser — user bubble container +ChatBubbleAssistant — assistant bubble container +ChatBubbleSystem — system bubble container +ChatBubbleText — TextArea inside every bubble +ChatTypingIndicator — animated dots +ChatInput — input strip +ChatInputField — text field +ChatSendButton — send button +ChatAttachButton — attach button (hidden when setOnAttach not called) +ChatVoiceButton — voice button (hidden when setOnVoice not called) +``` + +Override `ChatView.createBubble(message)` to plug in a `ChatBubble` subclass with custom rendering. + +## SpeechRecognizer + +iOS uses `SFSpeechRecognizer`, Android uses `android.speech.SpeechRecognizer`. JavaSE is unsupported unless the optional `cn1-ai-whisper` cn1lib is on the classpath. + +```java +import com.codename1.media.*; + +if (!SpeechRecognizer.isSupported()) { /* degrade gracefully */ return; } + +RecognitionOptions opts = new RecognitionOptions() + .setLanguageTag("en-US") // BCP-47 + .setPartialResults(true) + .setContinuous(false) + .setMaxResults(3); + +SpeechRecognizer.recognize(opts, new RecognitionCallback.Adapter() { + @Override public void onPartialResult(String t) { chatView.getInput().setText(t); } + @Override public void onResult(String t, float confidence, String[] alternatives) { + chatView.addMessage(ChatMessage.user(t)); + } +}); + +// SpeechRecognizer.stop() // end the active session +``` + +The build-time scanner adds the `NSSpeechRecognitionUsageDescription` and `NSMicrophoneUsageDescription` `Info.plist` strings, and Android `RECORD_AUDIO` permission, automatically. + +## TextToSpeech + +iOS uses `AVSpeechSynthesizer`, Android uses `android.speech.tts.TextToSpeech`, JavaSE falls back to `say` on macOS, `espeak` on Linux, and SAPI on Windows. + +```java +import com.codename1.media.*; + +if (TextToSpeech.isSupported()) { + TtsOptions opts = new TtsOptions() + .setLanguageTag("fr-FR") + .setRate(1.0f) + .setPitch(1.0f) + .setVolume(1.0f); + TextToSpeech.speak("Bonjour", opts); +} + +// TextToSpeech.stop(); // cancel current utterance +// TextToSpeech.getAvailableVoices();// platform voice identifiers +``` + +## SecureStorage — non-prompting overloads for LLM keys + +**Never hard-code a provider API key in source, ship it in a bundled resource, or commit it to git.** Mobile binaries are trivially reverse-engineered; any key embedded in the app is a key your users can extract. Fetch the key from a server endpoint the user authenticates against, then cache it locally with the non-prompting `SecureStorage` overloads: + +```java +SecureStorage store = SecureStorage.getInstance(); +store.set("openai.key", apiKeyFromServer); // returns false when unsupported +String key = store.get("openai.key"); // returns null when absent +LlmClient client = LlmClient.openai(key); +``` + +Base class returns `null` / `false` on platforms without an implementation, so you can wire this in without a platform check. + +## ML Kit cn1libs + +Three cn1libs, each backed by ML Kit on iOS and Android. Drop the `pom` dependency in `common/pom.xml` and the build-time scanner takes care of Pods, Gradle deps, permissions, and usage strings. + +```xml + + com.codenameone + cn1-ai-mlkit-barcode-lib + ${cn1.version} + pom + +``` + +### Barcode scanner + +```java +import com.codename1.ai.mlkit.barcode.BarcodeScanner; + +Capture.capturePhoto(evt -> { + String path = (String) evt.getSource(); + byte[] bytes = readAllBytes(path); + BarcodeScanner.scan(bytes).ready(values -> { + for (String v : values) Log.p("Detected: " + v); + }); +}); +``` + +Decodes QR, EAN, Code 128, and the other ML Kit-supported formats. + +### Document scanner + +```java +import com.codename1.ai.mlkit.docscan.DocumentScanner; + +DocumentScanner.scanToFile(jpegBytes).ready(filePath -> { + // filePath points to the cropped, corrected document image +}); +``` + +iOS uses `VisionKit`. Android uses the Google Play Services document scanner — on devices without Play Services, the call resolves through `AsyncResource.except(...)`. + +### Face detector + +```java +import com.codename1.ai.mlkit.face.FaceDetector; + +FaceDetector.detect(jpegBytes).ready(rects -> { + for (int i = 0; i < rects.length; i += 4) { + int x = rects[i], y = rects[i + 1], w = rects[i + 2], h = rects[i + 3]; + // draw rectangle, crop, etc. + } +}); +``` + +Returns a packed `int[]` — four ints per face. + +## Common patterns + +### Capture photo → describe via multi-modal LLM → speak the result + +```java +Capture.capturePhoto(evt -> { + String path = (String) evt.getSource(); + byte[] bytes = readAllBytes(path); + ImagePart img = new ImagePart(bytes, "image/jpeg"); + + ChatRequest req = ChatRequest.builder() + .model("gpt-4o") + .addMessage(ChatMessage.userWithImage("Describe the photo.", img)) + .build(); + + chatView.addMessage(req.getMessages().get(0)); + chatView.beginAssistantStream(); + StringBuilder full = new StringBuilder(); + + LlmClient.openai(apiKey).chatStream(req, new StreamingListener.Adapter() { + @Override public void onContentDelta(String d) { + full.append(d); + chatView.appendToLastMessage(d); + } + }).ready(resp -> TextToSpeech.speak(full.toString())); +}); +``` + +### Voice-driven turn + +```java +view.setOnVoice(e -> SpeechRecognizer.recognize( + new RecognitionOptions().setPartialResults(true), + new RecognitionCallback.Adapter() { + @Override public void onPartialResult(String t) { + view.getInput().setText(t); + } + @Override public void onResult(String t, float c, String[] alts) { + view.getInput().setText(t); + // Trigger setOnSend manually if you want auto-send + } + })); +``` + +### Offline development against Ollama + +1. `brew install ollama && ollama pull llama3.2` +2. `ollama serve` (default port `11434`) +3. Run the simulator. Production code calling `LlmClient.openai(...)` is automatically redirected. Override with `-Dcn1.ai.simulatorRedirect=ollama -Dcn1.ai.ollamaModel=llama3.2`. + +## What NOT to do + +- **Don't reach for `Class.forName(...)` to discover providers.** Obfuscation renames classes in shipped builds; reflective name lookups work in the simulator but fail in production. The factory methods on `LlmClient` and `ImageGenerator` already give you the indirection you need. +- **Don't store an API key in source.** Use `SecureStorage.get("openai.key")` (single-arg overload) or pull it from a server-side proxy. Hard-coded keys leak through reverse-engineered binaries. +- **Don't call `chatStream` from a tight UI loop.** A streaming call holds an HTTP connection until the response completes; one per user turn is correct, one per keystroke is a bug. +- **Don't mutate `ChatView` on a non-EDT thread without going through the documented mutators.** `addMessage`, `appendToLastMessage`, and `setTypingIndicatorVisible` are thread-safe; arbitrary `view.add(...)` calls are not. +- **Don't assume the document scanner works on every Android device.** It requires Google Play Services. Wrap the call and fall back to `Capture.capturePhoto(...)` if the scanner returns an error. +- **Don't ship a project that bundles `cn1-ai-stablediffusion` to the cloud build server without checking.** The cn1lib carries multi-GB native blobs; the build will reject the upload with a `cn1.ai.requiresBigUpload` hint. Build locally for those projects. diff --git a/scripts/initializr/common/src/test/java/com/codename1/initializr/model/GeneratorModelMatrixTest.java b/scripts/initializr/common/src/test/java/com/codename1/initializr/model/GeneratorModelMatrixTest.java index 76b848c134..004665d483 100644 --- a/scripts/initializr/common/src/test/java/com/codename1/initializr/model/GeneratorModelMatrixTest.java +++ b/scripts/initializr/common/src/test/java/com/codename1/initializr/model/GeneratorModelMatrixTest.java @@ -144,6 +144,7 @@ private void validateClaudeSkillBundled() throws Exception { ".agent-skills/codename-one/references/cn1libs.md", ".agent-skills/codename-one/references/snapshot-builds.md", ".agent-skills/codename-one/references/debugging.md", + ".agent-skills/codename-one/references/ai-and-speech.md", ".agent-skills/codename-one/tools/README.md", ".agent-skills/codename-one/tools/IsApiSupported.java", ".agent-skills/codename-one/tools/IsCssValid.java"