From 3c7bcedf0aa8458251c5abbb8abd00ab2b987360 Mon Sep 17 00:00:00 2001 From: woody_lin Date: Wed, 7 Jan 2026 10:56:58 +0800 Subject: [PATCH] docs: update LLM model demo to remove OpenAI API key requirement and simplify prerequisites --- src/content/docs/demos/04-llm-model.mdx | 212 ++---------------------- 1 file changed, 12 insertions(+), 200 deletions(-) diff --git a/src/content/docs/demos/04-llm-model.mdx b/src/content/docs/demos/04-llm-model.mdx index 91a4109..c487bc1 100644 --- a/src/content/docs/demos/04-llm-model.mdx +++ b/src/content/docs/demos/04-llm-model.mdx @@ -12,11 +12,10 @@ This guide demonstrates how to deploy a Large Language Model (LLM) in your Otter Ensure you have the following: - **Python 3.8+**: For running the test scripts -- **OpenAI API Key**: Obtain from [OpenAI Platform](https://platform.openai.com/api-keys) -- **Python Libraries**: `requests` and `openai` +- **Python Libraries**: `requests` ```bash -pip install requests openai +pip install requests ``` ## Deploy LLM Model @@ -60,26 +59,20 @@ The model deployment may take several minutes depending on the model size and av Once your LLM model is deployed and ready, you can test it using Python with OpenAI API integration. ### Connection Information -Before running the test scripts, you'll need: -- **OpenAI API Key**: Your API key from OpenAI -- **Model Name**: The model you created (e.g., `llm-demo`) -- **API Base URL**: Optional, if using a custom endpoint - - - +Before running the test scripts, you'll need to find the following information from the `/scope//models/llm` page: +- **Service URL**: The URL information from the Service card +- **Name**: The `name` field in the model table +- **Model Name**: The `Model Name` field in the model table - ```python import requests @@ -87,18 +80,18 @@ import json # Configuration SERVICE_URL = "" # e.g., http://localhost:8000 -MODEL_NAME = "" # e.g., llm-demo -MODEL_ID = "RedHatAI/Llama-3.2-1B-Instruct-FP8" +NAME = "" # e.g., llm-demo +MODEL_NAME = "" def ask_question(question): """Send a simple question to the LLM and get a response.""" headers = { - "OtterScale-Model-Name": MODEL_NAME, + "OtterScale-Model-Name": NAME, "Content-Type": "application/json" } payload = { - "model": MODEL_ID, + "model": MODEL_NAME, "prompt": question } @@ -120,184 +113,3 @@ answer = ask_question(question) print(f"Q: {question}") print(f"A: {answer}") ``` - - - - - -```python -import requests -import json - -# Configuration -SERVICE_URL = "" # e.g., http://localhost:8000 -MODEL_NAME = "" # e.g., llm-demo -MODEL_ID = "RedHatAI/Llama-3.2-1B-Instruct-FP8" - -class LLMChat: - def __init__(self, service_url, model_name, model_id): - self.service_url = service_url - self.model_name = model_name - self.model_id = model_id - self.conversation = [] - - def add_message(self, role, content): - """Add a message to the conversation history.""" - self.conversation.append({"role": role, "content": content}) - - def send_message(self, user_message): - """Send a user message and get a response.""" - self.add_message("user", user_message) - - headers = { - "OtterScale-Model-Name": self.model_name, - "Content-Type": "application/json" - } - - # Build context from conversation history - context = "\n".join([f"{msg['role']}: {msg['content']}" for msg in self.conversation]) - - payload = { - "model": self.model_id, - "prompt": context - } - - try: - response = requests.post( - f"{self.service_url}/v1/chat", - headers=headers, - json=payload - ) - response.raise_for_status() - result = response.json() - assistant_response = result.get("response", str(result)) - self.add_message("assistant", assistant_response) - return assistant_response - except Exception as e: - return f"✗ Error: {str(e)}" - - def clear_history(self): - """Clear conversation history.""" - self.conversation = [] - -# Test -chat = LLMChat(service_url=SERVICE_URL, model_name=MODEL_NAME, model_id=MODEL_ID) - -# First message -response1 = chat.send_message("What are the three main colors of the French flag?") -print(f"Q: What are the three main colors of the French flag?") -print(f"A: {response1}\n") - -# Follow-up message (maintains context) -response2 = chat.send_message("Which one represents liberty?") -print(f"Q: Which one represents liberty?") -print(f"A: {response2}\n") -``` - - - - - -```python -import requests -import json - -# Configuration -SERVICE_URL = "" # e.g., http://localhost:8000 -MODEL_NAME = "" # e.g., llm-demo -MODEL_ID = "RedHatAI/Llama-3.2-1B-Instruct-FP8" - -class LLMDemo: - def __init__(self, service_url, model_name, model_id): - self.service_url = service_url - self.model_name = model_name - self.model_id = model_id - self.conversation = [] - - def test_connection(self): - """Test if the service is accessible.""" - try: - headers = { - "OtterScale-Model-Name": self.model_name, - "Content-Type": "application/json" - } - payload = { - "model": self.model_id, - "prompt": "Hello" - } - response = requests.post( - f"{self.service_url}/v1/chat", - headers=headers, - json=payload, - timeout=10 - ) - response.raise_for_status() - print("✓ Connection successful!") - return True - except Exception as e: - print(f"✗ Connection failed: {str(e)}") - return False - - def send_message(self, user_message): - """Send a message and get a response.""" - self.conversation.append({"role": "user", "content": user_message}) - - headers = { - "OtterScale-Model-Name": self.model_name, - "Content-Type": "application/json" - } - - # Build context from conversation history - context = "\n".join([f"{msg['role']}: {msg['content']}" for msg in self.conversation]) - - payload = { - "model": self.model_id, - "prompt": context - } - - try: - response = requests.post( - f"{self.service_url}/v1/chat", - headers=headers, - json=payload - ) - response.raise_for_status() - result = response.json() - assistant_response = result.get("response", str(result)) - self.conversation.append({"role": "assistant", "content": assistant_response}) - return assistant_response - except Exception as e: - return f"✗ Error: {str(e)}" - - def clear_history(self): - """Clear conversation history.""" - self.conversation = [] - -# Usage -if __name__ == "__main__": - demo = LLMDemo(service_url=SERVICE_URL, model_name=MODEL_NAME, model_id=MODEL_ID) - - # Test connection - demo.test_connection() - - # Start conversation - response1 = demo.send_message("Tell me about artificial intelligence in 2 sentences.") - print(f"Q: Tell me about artificial intelligence in 2 sentences.") - print(f"A: {response1}\n") - - # Follow-up questions - response2 = demo.send_message("What are the main applications?") - print(f"Q: What are the main applications?") - print(f"A: {response2}\n") - - response3 = demo.send_message("How does machine learning fit into this?") - print(f"Q: How does machine learning fit into this?") - print(f"A: {response3}") -``` - - - - - \ No newline at end of file