From b488cfc23ecccdaed91ac5f0fb9111d2aa9937b4 Mon Sep 17 00:00:00 2001
From: Daniel Garman <danielrgarman@gmail.com>
Date: Wed, 16 Jul 2025 14:04:09 +0000
Subject: [PATCH 1/3] switch references to github models API

---
 .env-sample                                      |  1 +
 .../python/llamaindex/rag_getting_started.ipynb  |  2 +-
 samples/js/azure_ai_inference/embeddings.js      |  8 ++++----
 samples/js/openai/embeddings.js                  | 16 ++++++++--------
 samples/python/azure_ai_inference/embeddings.py  |  4 ++--
 .../azure_ai_inference/getting_started.ipynb     |  4 ++--
 samples/python/mistralai/getting_started.ipynb   |  4 ++--
 samples/python/openai/embeddings.py              |  2 +-
 .../openai/embeddings_getting_started.ipynb      |  6 +++---
 9 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/.env-sample b/.env-sample
index d7dfb66..b7b3ffe 100644
--- a/.env-sample
+++ b/.env-sample
@@ -1,2 +1,3 @@
 # get your pat token from: https://github.com/settings/tokens?type=beta
+# if creating a new token, ensure it has `models: read` permissions
 GITHUB_TOKEN="github_pat_****"
diff --git a/cookbooks/python/llamaindex/rag_getting_started.ipynb b/cookbooks/python/llamaindex/rag_getting_started.ipynb
index 829a079..9d5647f 100644
--- a/cookbooks/python/llamaindex/rag_getting_started.ipynb
+++ b/cookbooks/python/llamaindex/rag_getting_started.ipynb
@@ -60,7 +60,7 @@
     "    raise ValueError(\"GITHUB_TOKEN is not set\")\n",
     "\n",
     "os.environ[\"OPENAI_API_KEY\"] = os.getenv(\"GITHUB_TOKEN\")\n",
-    "os.environ[\"OPENAI_BASE_URL\"] = \"https://models.inference.ai.azure.com/\""
+    "os.environ[\"OPENAI_BASE_URL\"] = \"https://models.github.ai/\""
    ]
   },
   {
diff --git a/samples/js/azure_ai_inference/embeddings.js b/samples/js/azure_ai_inference/embeddings.js
index c988d78..caf6815 100644
--- a/samples/js/azure_ai_inference/embeddings.js
+++ b/samples/js/azure_ai_inference/embeddings.js
@@ -3,7 +3,7 @@ import { isUnexpected } from "@azure-rest/ai-inference";
 import { AzureKeyCredential } from "@azure/core-auth";
 
 const token = process.env["GITHUB_TOKEN"];
-const endpoint = "https://models.inference.ai.azure.com";
+const endpoint = "https://models.github.ai";
 
 /* By using the Azure AI Inference SDK, you can easily experiment with different models
    by modifying the value of `modelName` in the code below. For this code sample
@@ -32,9 +32,9 @@ export async function main() {
   for (const item of response.body.data) {
     let length = item.embedding.length;
     console.log(
-	  `data[${item.index}]: length=${length}, ` +
-	  `[${item.embedding[0]}, ${item.embedding[1]}, ` +
-	  `..., ${item.embedding[length - 2]}, ${item.embedding[length -1]}]`);
+      `data[${item.index}]: length=${length}, ` +
+      `[${item.embedding[0]}, ${item.embedding[1]}, ` +
+      `..., ${item.embedding[length - 2]}, ${item.embedding[length - 1]}]`);
   }
   console.log(response.body.usage);
 }
diff --git a/samples/js/openai/embeddings.js b/samples/js/openai/embeddings.js
index 2b8ecf4..fa7062d 100644
--- a/samples/js/openai/embeddings.js
+++ b/samples/js/openai/embeddings.js
@@ -1,7 +1,7 @@
 import OpenAI from "openai";
 
 const token = process.env["GITHUB_TOKEN"];
-const endpoint = "https://models.inference.ai.azure.com";
+const endpoint = "https://models.github.ai";
 
 /* Pick one of the OpenAI embeddings models from the GitHub Models service */
 const modelName = "text-embedding-3-small";
@@ -11,16 +11,16 @@ export async function main() {
   const client = new OpenAI({ baseURL: endpoint, apiKey: token });
 
   const response = await client.embeddings.create({
-	input: ["first phrase", "second phrase", "third phrase"],
-	model: modelName     
+    input: ["first phrase", "second phrase", "third phrase"],
+    model: modelName
   });
 
   for (const item of response.data) {
-	let length = item.embedding.length;
-	console.log(
-		`data[${item.index}]: length=${length}, ` +
-		`[${item.embedding[0]}, ${item.embedding[1]}, ` +
-		`..., ${item.embedding[length - 2]}, ${item.embedding[length -1]}]`);
+    let length = item.embedding.length;
+    console.log(
+      `data[${item.index}]: length=${length}, ` +
+      `[${item.embedding[0]}, ${item.embedding[1]}, ` +
+      `..., ${item.embedding[length - 2]}, ${item.embedding[length - 1]}]`);
   }
   console.log(response.usage);
 }
diff --git a/samples/python/azure_ai_inference/embeddings.py b/samples/python/azure_ai_inference/embeddings.py
index a0c6f46..3166897 100644
--- a/samples/python/azure_ai_inference/embeddings.py
+++ b/samples/python/azure_ai_inference/embeddings.py
@@ -4,13 +4,13 @@
 from azure.core.credentials import AzureKeyCredential
 
 token = os.environ["GITHUB_TOKEN"]
-endpoint = "https://models.inference.ai.azure.com"
+endpoint = "https://models.github.ai"
 
 # By using the Azure AI Inference SDK, you can easily experiment with different models
 # by modifying the value of `modelName` in the code below. For this code sample
 # you need an embedding model. The following embedding models are
 # available in the GitHub Models service:
-# 
+#
 # Cohere: Cohere-embed-v3-english, Cohere-embed-v3-multilingual
 # Azure OpenAI: text-embedding-3-small, text-embedding-3-large
 model_name = "text-embedding-3-small"
diff --git a/samples/python/azure_ai_inference/getting_started.ipynb b/samples/python/azure_ai_inference/getting_started.ipynb
index 3469ff0..356e565 100644
--- a/samples/python/azure_ai_inference/getting_started.ipynb
+++ b/samples/python/azure_ai_inference/getting_started.ipynb
@@ -58,7 +58,7 @@
     "    raise ValueError(\"GITHUB_TOKEN is not set\")\n",
     "\n",
     "github_token = os.environ[\"GITHUB_TOKEN\"]\n",
-    "endpoint = \"https://models.inference.ai.azure.com\"\n",
+    "endpoint = \"https://models.github.ai\"\n",
     "\n",
     "\n",
     "# Create a client\n",
@@ -117,7 +117,7 @@
     "    # Optional parameters\n",
     "    temperature=1.,\n",
     "    max_tokens=1000,\n",
-    "    top_p=1.    \n",
+    "    top_p=1.\n",
     ")\n",
     "\n",
     "print(response.choices[0].message.content)"
diff --git a/samples/python/mistralai/getting_started.ipynb b/samples/python/mistralai/getting_started.ipynb
index 021c6ce..b0f7863 100644
--- a/samples/python/mistralai/getting_started.ipynb
+++ b/samples/python/mistralai/getting_started.ipynb
@@ -58,7 +58,7 @@
     "    raise ValueError(\"GITHUB_TOKEN is not set\")\n",
     "\n",
     "github_token = os.environ[\"GITHUB_TOKEN\"]\n",
-    "endpoint = \"https://models.inference.ai.azure.com\"\n",
+    "endpoint = \"https://models.github.ai\"\n",
     "\n",
     "# Pick one of the Mistral models from the GitHub Models service\n",
     "model_name = \"Mistral-large\"\n",
@@ -99,7 +99,7 @@
     "    # Optional parameters\n",
     "    temperature=1.,\n",
     "    max_tokens=1000,\n",
-    "    top_p=1.    \n",
+    "    top_p=1.\n",
     ")\n",
     "\n",
     "print(response.choices[0].message.content)"
diff --git a/samples/python/openai/embeddings.py b/samples/python/openai/embeddings.py
index d00db43..05ee717 100644
--- a/samples/python/openai/embeddings.py
+++ b/samples/python/openai/embeddings.py
@@ -2,7 +2,7 @@
 from openai import OpenAI
 
 token = os.environ["GITHUB_TOKEN"]
-endpoint = "https://models.inference.ai.azure.com"
+endpoint = "https://models.github.ai"
 
 # Pick one of the OpenAI embeddings models from the GitHub Models service
 model_name = "text-embedding-3-small"
diff --git a/samples/python/openai/embeddings_getting_started.ipynb b/samples/python/openai/embeddings_getting_started.ipynb
index 6aa103e..66bcced 100644
--- a/samples/python/openai/embeddings_getting_started.ipynb
+++ b/samples/python/openai/embeddings_getting_started.ipynb
@@ -55,7 +55,7 @@
     "    raise ValueError(\"GITHUB_TOKEN is not set\")\n",
     "\n",
     "os.environ[\"OPENAI_API_KEY\"] = os.getenv(\"GITHUB_TOKEN\")\n",
-    "os.environ[\"OPENAI_BASE_URL\"] = \"https://models.inference.ai.azure.com/\"\n",
+    "os.environ[\"OPENAI_BASE_URL\"] = \"https://models.github.ai/\"\n",
     "\n",
     "client = OpenAI()\n"
    ]
@@ -77,7 +77,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "model_name = \"text-embedding-3-small\" \n",
+    "model_name = \"text-embedding-3-small\"\n",
     "\n",
     "response = client.embeddings.create(\n",
     "    model=model_name,\n",
@@ -105,7 +105,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "model_name = \"text-embedding-3-small\" \n",
+    "model_name = \"text-embedding-3-small\"\n",
     "inputs = [\"Hello, world!\", \"How are you?\", \"What's the weather like?\"]\n",
     "\n",
     "response = client.embeddings.create(\n",

From ae28b9e2b30b85b838246801f066582b09e57678 Mon Sep 17 00:00:00 2001
From: Daniel Garman <danielrgarman@gmail.com>
Date: Wed, 16 Jul 2025 18:49:35 +0000
Subject: [PATCH 2/3] fix the embedding models

---
 samples/js/azure_ai_inference/embeddings.js             | 2 +-
 samples/js/openai/embeddings.js                         | 2 +-
 samples/python/azure_ai_inference/embeddings.py         | 2 +-
 samples/python/azure_ai_inference/getting_started.ipynb | 2 +-
 samples/python/mistralai/getting_started.ipynb          | 2 +-
 samples/python/openai/embeddings.py                     | 2 +-
 samples/python/openai/embeddings_getting_started.ipynb  | 2 +-
 samples/python/openai/multi_turn.py                     | 5 ++++-
 8 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/samples/js/azure_ai_inference/embeddings.js b/samples/js/azure_ai_inference/embeddings.js
index caf6815..8199fbe 100644
--- a/samples/js/azure_ai_inference/embeddings.js
+++ b/samples/js/azure_ai_inference/embeddings.js
@@ -3,7 +3,7 @@ import { isUnexpected } from "@azure-rest/ai-inference";
 import { AzureKeyCredential } from "@azure/core-auth";
 
 const token = process.env["GITHUB_TOKEN"];
-const endpoint = "https://models.github.ai";
+const endpoint = "https://models.github.ai/inference";
 
 /* By using the Azure AI Inference SDK, you can easily experiment with different models
    by modifying the value of `modelName` in the code below. For this code sample
diff --git a/samples/js/openai/embeddings.js b/samples/js/openai/embeddings.js
index fa7062d..4911cdc 100644
--- a/samples/js/openai/embeddings.js
+++ b/samples/js/openai/embeddings.js
@@ -1,7 +1,7 @@
 import OpenAI from "openai";
 
 const token = process.env["GITHUB_TOKEN"];
-const endpoint = "https://models.github.ai";
+const endpoint = "https://models.github.ai/inference";
 
 /* Pick one of the OpenAI embeddings models from the GitHub Models service */
 const modelName = "text-embedding-3-small";
diff --git a/samples/python/azure_ai_inference/embeddings.py b/samples/python/azure_ai_inference/embeddings.py
index 3166897..e04e7dd 100644
--- a/samples/python/azure_ai_inference/embeddings.py
+++ b/samples/python/azure_ai_inference/embeddings.py
@@ -4,7 +4,7 @@
 from azure.core.credentials import AzureKeyCredential
 
 token = os.environ["GITHUB_TOKEN"]
-endpoint = "https://models.github.ai"
+endpoint = "https://models.github.ai/inference"
 
 # By using the Azure AI Inference SDK, you can easily experiment with different models
 # by modifying the value of `modelName` in the code below. For this code sample
diff --git a/samples/python/azure_ai_inference/getting_started.ipynb b/samples/python/azure_ai_inference/getting_started.ipynb
index 356e565..3bc4c1a 100644
--- a/samples/python/azure_ai_inference/getting_started.ipynb
+++ b/samples/python/azure_ai_inference/getting_started.ipynb
@@ -58,7 +58,7 @@
     "    raise ValueError(\"GITHUB_TOKEN is not set\")\n",
     "\n",
     "github_token = os.environ[\"GITHUB_TOKEN\"]\n",
-    "endpoint = \"https://models.github.ai\"\n",
+    "endpoint = \"https://models.github.ai/inference\"\n",
     "\n",
     "\n",
     "# Create a client\n",
diff --git a/samples/python/mistralai/getting_started.ipynb b/samples/python/mistralai/getting_started.ipynb
index b0f7863..b0fd08a 100644
--- a/samples/python/mistralai/getting_started.ipynb
+++ b/samples/python/mistralai/getting_started.ipynb
@@ -58,7 +58,7 @@
     "    raise ValueError(\"GITHUB_TOKEN is not set\")\n",
     "\n",
     "github_token = os.environ[\"GITHUB_TOKEN\"]\n",
-    "endpoint = \"https://models.github.ai\"\n",
+    "endpoint = \"https://models.github.ai/inference\"\n",
     "\n",
     "# Pick one of the Mistral models from the GitHub Models service\n",
     "model_name = \"Mistral-large\"\n",
diff --git a/samples/python/openai/embeddings.py b/samples/python/openai/embeddings.py
index 05ee717..533aa39 100644
--- a/samples/python/openai/embeddings.py
+++ b/samples/python/openai/embeddings.py
@@ -2,7 +2,7 @@
 from openai import OpenAI
 
 token = os.environ["GITHUB_TOKEN"]
-endpoint = "https://models.github.ai"
+endpoint = "https://models.github.ai/inference"
 
 # Pick one of the OpenAI embeddings models from the GitHub Models service
 model_name = "text-embedding-3-small"
diff --git a/samples/python/openai/embeddings_getting_started.ipynb b/samples/python/openai/embeddings_getting_started.ipynb
index 66bcced..03d04a8 100644
--- a/samples/python/openai/embeddings_getting_started.ipynb
+++ b/samples/python/openai/embeddings_getting_started.ipynb
@@ -55,7 +55,7 @@
     "    raise ValueError(\"GITHUB_TOKEN is not set\")\n",
     "\n",
     "os.environ[\"OPENAI_API_KEY\"] = os.getenv(\"GITHUB_TOKEN\")\n",
-    "os.environ[\"OPENAI_BASE_URL\"] = \"https://models.github.ai/\"\n",
+    "os.environ[\"OPENAI_BASE_URL\"] = \"https://models.github.ai/inference\"\n",
     "\n",
     "client = OpenAI()\n"
    ]
diff --git a/samples/python/openai/multi_turn.py b/samples/python/openai/multi_turn.py
index edcef4d..46cbeeb 100644
--- a/samples/python/openai/multi_turn.py
+++ b/samples/python/openai/multi_turn.py
@@ -16,6 +16,9 @@
 client = OpenAI(
     base_url=endpoint,
     api_key=token,
+    default_headers={
+        "x-ms-useragent": "github-models-sample",
+    }
 )
 
 # Call the chat completion API
@@ -42,4 +45,4 @@
 )
 
 # Print the response
-print(response.choices[0].message.content)
\ No newline at end of file
+print(response.choices[0].message.content)

From 393fceeed08b2858c7c11a5e8e9cf5224b5efa6c Mon Sep 17 00:00:00 2001
From: Daniel Garman <garman@github.com>
Date: Wed, 16 Jul 2025 15:20:19 -0400
Subject: [PATCH 3/3] Update
 cookbooks/python/llamaindex/rag_getting_started.ipynb

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 cookbooks/python/llamaindex/rag_getting_started.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cookbooks/python/llamaindex/rag_getting_started.ipynb b/cookbooks/python/llamaindex/rag_getting_started.ipynb
index 9d5647f..76bb5b9 100644
--- a/cookbooks/python/llamaindex/rag_getting_started.ipynb
+++ b/cookbooks/python/llamaindex/rag_getting_started.ipynb
@@ -60,7 +60,7 @@
     "    raise ValueError(\"GITHUB_TOKEN is not set\")\n",
     "\n",
     "os.environ[\"OPENAI_API_KEY\"] = os.getenv(\"GITHUB_TOKEN\")\n",
-    "os.environ[\"OPENAI_BASE_URL\"] = \"https://models.github.ai/\""
+    "os.environ[\"OPENAI_BASE_URL\"] = \"https://models.github.ai/inference\""
    ]
   },
   {