feat: add embeddings package (#9)

jpoehnelt · web-flow · commit 2f6afbbcfe8a · 2025-03-28T13:17:30.000-06:00
diff --git a/packages/embeddings/README.md b/packages/embeddings/README.md
@@ -0,0 +1,3 @@
+# Embeddings
+
+This package provides functions for generating embeddings using Vertex AI and calculating similarity between embeddings in Apps Script.
diff --git a/packages/embeddings/package.json b/packages/embeddings/package.json
@@ -0,0 +1,18 @@
+{
+  "name": "@repository/embeddings",
+  "version": "0.1.0",
+  "scripts": {
+    "check": "tsc --noEmit",
+    "test": "vitest"
+  },
+  "author": "Justin Poehnelt <jpoehnelt@google.com>",
+  "license": "Apache-2.0",
+  "devDependencies": {
+    "@types/google-apps-script": "^1.0.97",
+    "vitest": "^3.0.9"
+  },
+  "type": "module",
+  "private": true,
+  "main": "./src/index.ts",
+  "types": "./src/index.ts"
+}
diff --git a/packages/embeddings/src/index.test.ts b/packages/embeddings/src/index.test.ts
@@ -0,0 +1,120 @@
+import { beforeEach, describe, expect, it, vi } from "vitest";
+import { getTextEmbeddings, similarity, similarityEmoji } from "./index.js";
+
+// Mock Google Apps Script global objects
+global.ScriptApp = {
+	getOAuthToken: vi.fn().mockReturnValue("mock-token"),
+} as unknown as typeof ScriptApp;
+global.PropertiesService = {
+	getScriptProperties: vi.fn().mockReturnValue({
+		getProperty: vi
+			.fn()
+			.mockImplementation((key) =>
+				key === "PROJECT_ID" ? "mock-project-id" : null,
+			),
+	}),
+} as unknown as typeof PropertiesService;
+
+const fetchAll = vi.fn();
+global.UrlFetchApp = { fetchAll } as unknown as typeof UrlFetchApp;
+
+describe("similarity", () => {
+	it("calculates cosine similarity correctly", () => {
+		// Parallel vectors (should be 1.0)
+		expect(similarity([1, 2, 3], [2, 4, 6])).toBeCloseTo(1.0);
+
+		// Orthogonal vectors (should be 0.0)
+		expect(similarity([1, 0, 0], [0, 1, 0])).toBeCloseTo(0.0);
+
+		// Opposite vectors (should be -1.0)
+		expect(similarity([1, 2, 3], [-1, -2, -3])).toBeCloseTo(-1.0);
+	});
+
+	it("throws an error when vectors have different lengths", () => {
+		expect(() => similarity([1, 2, 3, 4], [1, 2, 3])).toThrow(
+			"Vectors must have the same length",
+		);
+	});
+});
+
+describe("similarityEmoji", () => {
+	it("returns the correct emoji based on similarity value", () => {
+		expect(similarityEmoji(1.0)).toBe("🔥"); // Very high (>=0.9)
+		expect(similarityEmoji(0.8)).toBe("✅"); // High (>=0.7 and <0.9)
+		expect(similarityEmoji(0.6)).toBe("👍"); // Medium (>=0.5 and <0.7)
+		expect(similarityEmoji(0.4)).toBe("🤔"); // Low (>=0.3 and <0.5)
+		expect(similarityEmoji(0.2)).toBe("❌"); // Very low (<0.3)
+	});
+});
+
+describe("getEmbeddings", () => {
+	const mockResponse = {
+		getResponseCode: vi.fn().mockReturnValue(200),
+		getContentText: vi.fn().mockReturnValue(
+			JSON.stringify({
+				predictions: [{ embeddings: { values: [0.1, 0.2, 0.3] } }],
+			}),
+		),
+	};
+
+	beforeEach(() => {
+		vi.clearAllMocks();
+		fetchAll.mockReturnValue([mockResponse]);
+	});
+
+	it("handles single string input", () => {
+		const result = getTextEmbeddings("test text");
+
+		expect(fetchAll).toHaveBeenCalledTimes(1);
+		const requests = fetchAll.mock.calls[0][0];
+		expect(requests).toHaveLength(1);
+
+		const payload = JSON.parse(requests[0].payload);
+		expect(payload.instances[0].content).toBe("test text");
+
+		expect(result).toEqual([[0.1, 0.2, 0.3]]);
+	});
+
+	it("handles array of strings input", () => {
+		const mockResponses = [
+			{
+				getResponseCode: vi.fn().mockReturnValue(200),
+				getContentText: vi.fn().mockReturnValue(
+					JSON.stringify({
+						predictions: [{ embeddings: { values: [0.1, 0.2, 0.3] } }],
+					}),
+				),
+			},
+			{
+				getResponseCode: vi.fn().mockReturnValue(200),
+				getContentText: vi.fn().mockReturnValue(
+					JSON.stringify({
+						predictions: [{ embeddings: { values: [0.4, 0.5, 0.6] } }],
+					}),
+				),
+			},
+		];
+
+		fetchAll.mockReturnValue(mockResponses);
+
+		const result = getTextEmbeddings(["text1", "text2"]);
+		expect(result).toEqual([
+			[0.1, 0.2, 0.3],
+			[0.4, 0.5, 0.6],
+		]);
+	});
+
+	it("uses custom parameters", () => {
+		// Test custom parameters
+		getTextEmbeddings("test", {
+			model: "custom-model",
+			parameters: {},
+			projectId: "custom-project",
+			region: "custom-region",
+		});
+
+		const requests = fetchAll.mock.calls[0][0];
+		expect(requests[0].url).toContain("custom-region");
+		expect(requests[0].url).toContain("custom-model");
+	});
+});
diff --git a/packages/embeddings/src/index.ts b/packages/embeddings/src/index.ts
@@ -0,0 +1,199 @@
+const MODEL_ID = "text-embedding-005";
+const REGION = "us-central1";
+
+interface Parameters {
+	autoTruncate?: boolean;
+	outputDimensionality?: number;
+}
+
+interface Instance {
+	task_type?:
+		| "RETRIEVAL_DOCUMENT"
+		| "RETRIEVAL_QUERY"
+		| "SEMANTIC_SIMILARITY"
+		| "CLASSIFICATION"
+		| "CLUSTERING"
+		| "QUESTION_ANSWERING"
+		| "FACT_VERIFICATION"
+		| "CODE_RETRIEVAL_QUERY";
+	title?: string;
+	content: string;
+}
+
+/**
+ * Options for generating embeddings.
+ */
+interface Options {
+	/**
+	 * The project ID that the model is in.
+	 * @default 'PropertiesService.getScriptProperties().getProperty("PROJECT_ID")'
+	 */
+	projectId?: string;
+
+	/**
+	 * The ID of the model to use.
+	 * @default 'text-embedding-005'.
+	 */
+	model?: string;
+
+	/**
+	 * Additional parameters to pass to the model.
+	 */
+	parameters?: Parameters;
+
+	/**
+	 * The region that the model is in.
+	 * @default 'us-central1'
+	 */
+	region?: string;
+
+	/**
+	 * The OAuth token to use to authenticate the request.
+	 * @default `ScriptApp.getOAuthToken()`
+	 */
+	token?: string;
+}
+
+const getProjectId = (): string => {
+	const projectId =
+		PropertiesService.getScriptProperties().getProperty("PROJECT_ID");
+	if (!projectId) {
+		throw new Error("PROJECT_ID not found in script properties");
+	}
+
+	return projectId;
+};
+
+/**
+ * Generate embeddings for the given text content.
+ *
+ * @param content - The text content to generate embeddings for.
+ * @param options - Options for the embeddings generation.
+ * @returns The generated embeddings.
+ *
+ * @see https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api
+ */
+export function getTextEmbeddings(
+	contentOrContentArray: string | string[],
+	options: Options = {},
+): number[][] {
+	const inputs = Array.isArray(contentOrContentArray)
+		? contentOrContentArray
+		: [contentOrContentArray];
+
+	return getBatchedEmbeddings(
+		inputs.map((content) => ({ content })),
+		options,
+	);
+}
+
+/**
+ * Generate embeddings for the given instances in parallel UrlFetchApp requests.
+ *
+ * @param instances - The instances to generate embeddings for.
+ * @param options - Options for the embeddings generation.
+ * @returns The generated embeddings.
+ *
+ * @see https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api
+ */
+export function getBatchedEmbeddings(
+	instances: Instance[],
+	{
+		parameters = {},
+		model = MODEL_ID,
+		projectId = getProjectId(),
+		region = REGION,
+		token = ScriptApp.getOAuthToken(),
+	}: Options = {},
+): number[][] {
+	const chunks = chunkArray(instances, 5);
+	const requests = chunks.map((instances) => ({
+		url: `https://${region}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${region}/publishers/google/models/${model}:predict`,
+		method: "post" as const,
+		headers: {
+			Authorization: `Bearer ${token}`,
+			"Content-Type": "application/json",
+		},
+		muteHttpExceptions: true,
+		contentType: "application/json",
+		payload: JSON.stringify({
+			instances,
+			parameters,
+		}),
+	}));
+
+	const responses = UrlFetchApp.fetchAll(requests);
+
+	const results = responses.map((response) => {
+		if (response.getResponseCode() !== 200) {
+			throw new Error(response.getContentText());
+		}
+
+		return JSON.parse(response.getContentText());
+	});
+
+	return results.flatMap((result) =>
+		result.predictions.map(
+			(prediction: { embeddings: { values: number[] } }) =>
+				prediction.embeddings.values,
+		),
+	);
+}
+
+/**
+ * Calculates the dot product of two vectors.
+ * @param x - The first vector.
+ * @param y - The second vector.
+ */
+function dotProduct_(x: number[], y: number[]): number {
+	let result = 0;
+	for (let i = 0, l = Math.min(x.length, y.length); i < l; i += 1) {
+		result += x[i] * y[i];
+	}
+	return result;
+}
+
+/**
+ * Calculates the magnitude of a vector.
+ * @param x - The vector.
+ */
+function magnitude(x: number[]): number {
+	let result = 0;
+	for (let i = 0, l = x.length; i < l; i += 1) {
+		result += x[i] ** 2;
+	}
+	return Math.sqrt(result);
+}
+
+/**
+ * Calculates the cosine similarity between two vectors.
+ * @param x - The first vector.
+ * @param y - The second vector.
+ * @returns The cosine similarity value between -1 and 1.
+ */
+export function similarity(x: number[], y: number[]): number {
+	if (x.length !== y.length) {
+		throw new Error("Vectors must have the same length");
+	}
+	return dotProduct_(x, y) / (magnitude(x) * magnitude(y));
+}
+
+/**
+ * Returns an emoji representing the similarity value.
+ * @param value - The similarity value.
+ */
+export const similarityEmoji = (value: number): string => {
+	if (value >= 0.9) return "🔥"; // Very high similarity
+	if (value >= 0.7) return "✅"; // High similarity
+	if (value >= 0.5) return "👍"; // Medium similarity
+	if (value >= 0.3) return "🤔"; // Low similarity
+	return "❌"; // Very low similarity
+};
+
+function chunkArray<T>(array: T[], size: number): T[][] {
+	const chunks: T[][] = [];
+	for (let i = 0; i < array.length; i += size) {
+		chunks.push(array.slice(i, i + size));
+	}
+	return chunks;
+}
diff --git a/packages/embeddings/tsconfig.json b/packages/embeddings/tsconfig.json
@@ -0,0 +1,14 @@
+{
+	"compilerOptions": {
+		"module": "NodeNext",
+		"target": "ES2022",
+		"lib": ["esnext"],
+		"strict": true,
+		"esModuleInterop": true,
+		"skipLibCheck": true,
+		"types": ["@types/google-apps-script"],
+		"experimentalDecorators": true
+	},
+	"include": ["src/**/*.ts"],
+	"exclude": ["node_modules", "dist"]
+}
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# Embeddings`
	`2`	`+`
	`3`	`+This package provides functions for generating embeddings using Vertex AI and calculating similarity between embeddings in Apps Script.`