diff --git a/README.md b/README.md index 95a814f..f4a7fb3 100644 --- a/README.md +++ b/README.md @@ -103,7 +103,7 @@ After the test completes, you can run `npx playwright show-report` to see a deta - **Core Execution** — `runSteps()` and `runUserFlow()` for flexible test orchestration in natural language, with smart caching and auto-healing - **Multi-Model Assertion Engine** — Consensus-based validation using Claude and Gemini, with an arbiter model to resolve disagreements -- **Redis-Based Step Caching** — Cache-first execution with AI fallback and automatic self-healing when cached steps fail +- **Pluggable Step Caching** — Cache-first execution with AI fallback and automatic self-healing. Supports Redis, file-based, or custom cache backends. - **Configurable AI Models** — 8 dedicated model slots for step execution, assertions, extraction, and more - **AI Gateway Support** — Route requests through Vercel AI Gateway, OpenRouter, Cloudflare AI Gateway, or connect directly to provider SDKs - **Dynamic Placeholders** — Inject values at runtime with `{{run.*}}`, `{{global.*}}`, `{{data.*}}`, and `{{email.*}}` expressions for repeatable and data-driven tests @@ -184,6 +184,8 @@ configure({ | Variable | Required | Default | Description | |----------|----------|---------|-------------| +| `CACHE_PROVIDER` | No | - | Cache backend: `redis`, `file`, or `none`. Falls back to Redis if `REDIS_URL` is set. | +| `CACHE_DIR` | No | `.passmark-cache` | Directory for file-based cache (when `CACHE_PROVIDER=file`) | | `REDIS_URL` | No | - | Redis connection URL for step caching and global state | | `ANTHROPIC_API_KEY` | Yes | - | Anthropic API key for Claude models | | `GOOGLE_GENERATIVE_AI_API_KEY` | Yes | - | Google API key for Gemini models | @@ -212,7 +214,35 @@ All models are configurable via `configure({ ai: { models: { ... } } })`: ## Caching -Passmark caches successful step actions in Redis. On subsequent runs, cached steps execute directly without AI calls, dramatically reducing latency and cost. +Passmark caches successful step actions so that subsequent runs execute directly without AI calls, dramatically reducing latency and cost. The cache backend is pluggable — choose between Redis, file-based, or no caching at all. + +### Cache Providers + +Set the `CACHE_PROVIDER` environment variable to select a backend: + +| Provider | `CACHE_PROVIDER` | Additional Config | Description | +|----------|-------------------|-------------------|-------------| +| **Redis** | `redis` | `REDIS_URL` | Uses Redis via ioredis. Default when `REDIS_URL` is set. | +| **File** | `file` | `CACHE_DIR` (optional, defaults to `.passmark-cache`) | JSON files on disk. No external dependencies — great for local development and CI. | +| **None** | `none` | — | Disables caching entirely. Every step uses AI execution. | + +For backwards compatibility, if `CACHE_PROVIDER` is not set, Passmark will use Redis when `REDIS_URL` is present, otherwise caching is disabled. + +### Custom Cache Store + +You can implement a custom cache backend by conforming to the `CacheStore` interface: + +```typescript +import { CacheStore } from "passmark"; + +interface CacheStore { + hgetall(key: string): Promise>; + hset(key: string, values: Record): Promise; + expire(key: string, seconds: number): Promise; +} +``` + +### Caching Behavior - Steps are cached by `userFlow` + `step.description` - Set `bypassCache: true` on individual steps or the entire run to force AI execution diff --git a/dist/assertion.d.ts b/dist/assertion.d.ts new file mode 100644 index 0000000..9fcf4ad --- /dev/null +++ b/dist/assertion.d.ts @@ -0,0 +1,29 @@ +import { AssertionOptions } from "./types"; +/** + * Multi-model consensus assertion engine. + * Runs Claude and Gemini in parallel; if they disagree, a third model (arbiter) makes the final call. + * An assertion passes only if both models agree (or the arbiter decides). + * Automatically retries failed assertions once with a fresh page snapshot. + * + * @param options - Assertion configuration + * @param options.page - The Playwright page instance to take snapshots from + * @param options.assertion - Natural language assertion to validate (e.g. "The cart shows 3 items") + * @param options.expect - Playwright expect function, used to fail the test on assertion failure + * @param options.effort - "low" (default) or "high" — high enables thinking mode for deeper analysis + * @param options.images - Optional base64 screenshot images to provide to the models + * @param options.failSilently - When true, returns the result without failing the test + * @param options.test - Playwright test instance for attaching metadata + * @returns A string summary of the assertion result + * @throws Fails the Playwright test via expect when assertion fails (unless failSilently is true) + * + * @example + * ```typescript + * await assert({ + * page, + * assertion: "The dashboard shows 3 active projects", + * expect, + * effort: "high", + * }); + * ``` + */ +export declare const assert: ({ page, assertion, test, expect, effort, images, failSilently, maxRetries, onRetry, }: AssertionOptions) => Promise; diff --git a/dist/assertion.js b/dist/assertion.js new file mode 100644 index 0000000..8c7c1e9 --- /dev/null +++ b/dist/assertion.js @@ -0,0 +1,290 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.assert = void 0; +const ai_1 = require("ai"); +const zod_1 = require("zod"); +const config_1 = require("./config"); +const constants_1 = require("./constants"); +const logger_1 = require("./logger"); +const models_1 = require("./models"); +const utils_1 = require("./utils"); +const assertionSchema = zod_1.z.object({ + assertionPassed: zod_1.z.boolean().describe("Indicates whether the assertion passed or not."), + confidenceScore: zod_1.z + .number() + .describe("Confidence score of the assertion, between 0 and 100."), + reasoning: zod_1.z + .string() + .describe("Brief explanation of the reasoning behind your decision - explain why the assertion passed or failed."), +}); +/** + * Multi-model consensus assertion engine. + * Runs Claude and Gemini in parallel; if they disagree, a third model (arbiter) makes the final call. + * An assertion passes only if both models agree (or the arbiter decides). + * Automatically retries failed assertions once with a fresh page snapshot. + * + * @param options - Assertion configuration + * @param options.page - The Playwright page instance to take snapshots from + * @param options.assertion - Natural language assertion to validate (e.g. "The cart shows 3 items") + * @param options.expect - Playwright expect function, used to fail the test on assertion failure + * @param options.effort - "low" (default) or "high" — high enables thinking mode for deeper analysis + * @param options.images - Optional base64 screenshot images to provide to the models + * @param options.failSilently - When true, returns the result without failing the test + * @param options.test - Playwright test instance for attaching metadata + * @returns A string summary of the assertion result + * @throws Fails the Playwright test via expect when assertion fails (unless failSilently is true) + * + * @example + * ```typescript + * await assert({ + * page, + * assertion: "The dashboard shows 3 active projects", + * expect, + * effort: "high", + * }); + * ``` + */ +const assert = async ({ page, assertion, test, expect, effort = "low", images, failSilently, maxRetries = 1, onRetry = (retryCount, previousResult) => { }, }) => { + const thinkingEnabled = effort === "high"; + const runFullAssertion = async () => { + const snapshot = await (0, utils_1.safeSnapshot)(page); + const imageContent = images + ? images.map((image) => ({ type: "image", image })) + : [ + { + type: "image", + image: (await (0, utils_1.resolvePage)(page).screenshot({ fullPage: false })).toString("base64"), + }, + ]; + const basePrompt = ` +You are an AI-powered QA Agent designed to test web applications. + +You have access to the following information. Based on this information, you'll tell us whether the assertion provided below should pass or not. +${!images + ? ` +- An accessibility snapshot of the current page, which provides a detailed structure of the DOM +- A screenshot of the current page` + : "- Screenshots from various stages of the user flow"} + +${!images + ? ` + +${snapshot} + +` + : ""} + + +${assertion} + + + +- First use the attached screenshot(s) to visually inspect the page and try to verify the assertion. +- Only if the screenshot is not sufficient, use the accessibility snapshot (if supplied) to verify the assertion. +- Don't create additional assertion conditions on your own - only consider the exact assertion provided above. +- The assertion should pass if either the screenshot or the accessibility snapshot supports it. +- Don't be overly strict or pedantic about exact wording. Focus on the intent and objective of the assertion rather than literal text matching. +- Think like a practical QA tester - if the core functionality or state being asserted is present, the assertion should pass even if minor details differ. + + + + The output should contain the following information: + - \`assertionPassed\`: A boolean indicating whether the assertion passed or not. + - \`confidenceScore\`: A number between 0 and 100 indicating the confidence score of the assertion. + - \`reasoning\`: A brief string explaining the reasoning behind the assertion. + + +Never hallucinate. Be truthful and if you are not sure, use a low confidence score. +`; + const messages = [ + { + role: "user", + content: [ + { + type: "text", + text: basePrompt, + }, + ...imageContent, + ], + }, + ]; + // Claude assertion function + const getClaudeAssertion = async () => { + // First get Claude's text response with thinking if enabled + const { text } = await (0, ai_1.generateText)({ + model: (0, models_1.resolveModel)((0, config_1.getModelId)("assertionPrimary")), + temperature: 0, + providerOptions: thinkingEnabled + ? { + anthropic: { + thinking: { type: "enabled", budgetTokens: constants_1.THINKING_BUDGET_DEFAULT }, + }, + openrouter: { + reasoning: { max_tokens: constants_1.THINKING_BUDGET_DEFAULT }, + }, + } + : undefined, + messages, + }); + // Convert Claude's response to structured format using Haiku + const { output } = await (0, ai_1.generateText)({ + model: (0, models_1.resolveModel)((0, config_1.getModelId)("assertionPrimary")), + temperature: 0.1, + prompt: `Convert the following text output into a valid JSON object with the specified properties:\n\n${text}`, + output: ai_1.Output.object({ schema: assertionSchema }), + }); + return output; + }; + // Gemini assertion function + const getGeminiAssertion = async () => { + const { output } = await (0, ai_1.generateText)({ + model: (0, models_1.resolveModel)((0, config_1.getModelId)("assertionSecondary")), + temperature: 0, + providerOptions: thinkingEnabled + ? { + google: { + thinkingConfig: { + thinkingBudget: constants_1.THINKING_BUDGET_DEFAULT, + }, + }, + openrouter: { + reasoning: { max_tokens: constants_1.THINKING_BUDGET_DEFAULT }, + }, + } + : undefined, + messages, + output: ai_1.Output.object({ schema: assertionSchema }), + }); + return output; + }; + // Arbiter function using Gemini 2.5 Pro with thinking enabled + const getArbiterDecision = async (claudeResult, geminiResult) => { + const arbiterPrompt = ` +You are an AI arbiter tasked with resolving a disagreement between two AI models about an assertion. + +Claude's Assessment: +- Assertion Passed: ${claudeResult.assertionPassed} +- Confidence: ${claudeResult.confidenceScore}% +- Reasoning: ${claudeResult.reasoning} + +Gemini's Assessment: +- Assertion Passed: ${geminiResult.assertionPassed} +- Confidence: ${geminiResult.confidenceScore}% +- Reasoning: ${geminiResult.reasoning} + +${!images + ? ` + +${snapshot} + +` + : ""} + + +${assertion} + + +Please carefully review the evidence (screenshot and accessibility snapshot (when provided)) and make the final determination. Consider both models' reasoning but make your own independent assessment. + + +- Make your own independent evaluation based on the evidence +- Don't simply pick one model's answer - analyze the situation yourself +- Provide clear reasoning for your decision +- Be decisive - this is the final answer +- First use the attached screenshot(s) to visually inspect the page and try to verify the assertion. +- Only if the screenshot is not sufficient, use the accessibility snapshot (if supplied) to verify the assertion. +- Don't create additional assertion conditions on your own - only consider the exact assertion provided above. +- The assertion should pass if either the screenshot or the accessibility snapshot supports it. +- Don't be overly strict or pedantic about exact wording. Focus on the intent and objective of the assertion rather than literal text matching. +- Think like a practical QA tester - if the core functionality or state being asserted is present, the assertion should pass even if minor details differ. + +`; + const arbiterMessages = [ + { + role: "user", + content: [ + { + type: "text", + text: arbiterPrompt, + }, + ...imageContent, + ], + }, + ]; + const { output } = await (0, ai_1.generateText)({ + model: (0, models_1.resolveModel)((0, config_1.getModelId)("assertionArbiter")), + temperature: 0, + providerOptions: { + google: { + thinkingConfig: { + thinkingBudget: constants_1.THINKING_BUDGET_DEFAULT, + }, + }, + openrouter: { + reasoning: { max_tokens: constants_1.THINKING_BUDGET_DEFAULT }, + }, + }, + messages: arbiterMessages, + output: ai_1.Output.object({ schema: assertionSchema }), + }); + return output; + }; + const runAssertion = async (attempt = 0) => { + try { + // Run both models in parallel for speed optimization + const [claudeResult, geminiResult] = await Promise.all([ + (0, utils_1.withTimeout)(getClaudeAssertion(), constants_1.ASSERTION_MODEL_TIMEOUT), + (0, utils_1.withTimeout)(getGeminiAssertion(), constants_1.ASSERTION_MODEL_TIMEOUT), + ]); + // Check if models disagree on assertionPassed + if (claudeResult.assertionPassed !== geminiResult.assertionPassed) { + logger_1.logger.debug("Models disagree on assertion result, consulting arbiter..."); + const arbiterResult = await (0, utils_1.withTimeout)(getArbiterDecision(claudeResult, geminiResult), constants_1.ASSERTION_MODEL_TIMEOUT); + return { + assertionPassed: arbiterResult.assertionPassed, + confidenceScore: arbiterResult.confidenceScore, + reasoning: arbiterResult.reasoning, + }; + } + // Assertion passes only if both models agree it should pass + const assertionPassed = claudeResult.assertionPassed && geminiResult.assertionPassed; + // Calculate average confidence score + const confidenceScore = (claudeResult.confidenceScore + geminiResult.confidenceScore) / 2; + // For now take Gemini's reasoning for simplicity + const reasoning = geminiResult.reasoning; + return { + assertionPassed, + confidenceScore: Math.round(confidenceScore), + reasoning, + }; + } + catch (error) { + if (attempt < 1) { + logger_1.logger.debug("Retrying assertion due to error..."); + return await runAssertion(attempt + 1); + } + logger_1.logger.error({ err: error }, "Error running assertions after multiple retries"); + throw error; + } + }; + return await runAssertion(); + }; + // Run assertion with retry on failure + let result = await runFullAssertion(); + for (let retry = 0; retry < maxRetries && !result.assertionPassed; retry++) { + logger_1.logger.debug("Assertion failed, retrying with fresh snapshot and screenshot..."); + onRetry(retry, result); + result = await runFullAssertion(); + } + const { assertionPassed, reasoning } = result; + test?.info().annotations.push({ + type: "AI Summary", + description: reasoning, + }); + const expectStatus = assertionPassed ? "✅ passed" : "❌ failed"; + if (!failSilently) { + expect(assertionPassed, reasoning).toBe(true); + } + return `${reasoning}\n\n[Assertion ${expectStatus}]`; +}; +exports.assert = assert; diff --git a/dist/cache.d.ts b/dist/cache.d.ts new file mode 100644 index 0000000..4c43ccc --- /dev/null +++ b/dist/cache.d.ts @@ -0,0 +1,10 @@ +/** + * Interface for a hash-based cache store. + * Implementations must support hash get/set and key expiration. + */ +export interface CacheStore { + hgetall(key: string): Promise>; + hset(key: string, values: Record): Promise; + expire(key: string, seconds: number): Promise; +} +export declare const cache: CacheStore | null; diff --git a/dist/cache.js b/dist/cache.js new file mode 100644 index 0000000..79767fa --- /dev/null +++ b/dist/cache.js @@ -0,0 +1,156 @@ +"use strict"; +var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + var desc = Object.getOwnPropertyDescriptor(m, k); + if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { + desc = { enumerable: true, get: function() { return m[k]; } }; + } + Object.defineProperty(o, k2, desc); +}) : (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + o[k2] = m[k]; +})); +var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { + Object.defineProperty(o, "default", { enumerable: true, value: v }); +}) : function(o, v) { + o["default"] = v; +}); +var __importStar = (this && this.__importStar) || (function () { + var ownKeys = function(o) { + ownKeys = Object.getOwnPropertyNames || function (o) { + var ar = []; + for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; + return ar; + }; + return ownKeys(o); + }; + return function (mod) { + if (mod && mod.__esModule) return mod; + var result = {}; + if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); + __setModuleDefault(result, mod); + return result; + }; +})(); +Object.defineProperty(exports, "__esModule", { value: true }); +exports.cache = void 0; +const logger_1 = require("./logger"); +// ============================================================================= +// Redis Store +// ============================================================================= +class RedisStore { + client; + constructor(url) { + // eslint-disable-next-line @typescript-eslint/no-require-imports + const Redis = require("ioredis"); + this.client = new Redis(url); + } + async hgetall(key) { + return this.client.hgetall(key); + } + async hset(key, values) { + await this.client.hset(key, values); + } + async expire(key, seconds) { + await this.client.expire(key, seconds); + } +} +// ============================================================================= +// File Store +// ============================================================================= +const fs = __importStar(require("fs")); +const path = __importStar(require("path")); +class FileStore { + dir; + constructor(dir) { + this.dir = dir; + if (!fs.existsSync(dir)) { + fs.mkdirSync(dir, { recursive: true }); + } + } + filePath(key) { + // Encode key to a safe filename + const safeKey = encodeURIComponent(key); + return path.join(this.dir, `${safeKey}.json`); + } + read(key) { + const fp = this.filePath(key); + if (!fs.existsSync(fp)) + return null; + try { + const raw = JSON.parse(fs.readFileSync(fp, "utf-8")); + // Check expiration + if (raw.expiresAt && Date.now() > raw.expiresAt) { + fs.unlinkSync(fp); + return null; + } + return raw; + } + catch { + return null; + } + } + write(key, entry) { + const fp = this.filePath(key); + fs.writeFileSync(fp, JSON.stringify(entry), "utf-8"); + } + async hgetall(key) { + const entry = this.read(key); + return entry?.data ?? {}; + } + async hset(key, values) { + const existing = this.read(key); + const merged = { ...(existing?.data ?? {}), ...values }; + this.write(key, { data: merged, expiresAt: existing?.expiresAt }); + } + async expire(key, seconds) { + const existing = this.read(key); + if (!existing) + return; + this.write(key, { ...existing, expiresAt: Date.now() + seconds * 1000 }); + } +} +// ============================================================================= +// Factory +// ============================================================================= +/** + * Creates the cache store based on environment variables. + * + * CACHE_PROVIDER selects the backend: + * - "redis" (default when REDIS_URL is set): uses Redis via ioredis + * - "file": uses JSON files on disk at CACHE_DIR (defaults to .passmark-cache) + * - "none": disables caching entirely + * + * For backwards compatibility, if CACHE_PROVIDER is not set: + * - If REDIS_URL is set → uses Redis + * - Otherwise → caching is disabled (null) + */ +function createCacheStore() { + const provider = process.env.CACHE_PROVIDER?.toLowerCase(); + if (provider === "none") { + logger_1.logger.warn("Cache provider set to 'none'. Caching is disabled."); + return null; + } + if (provider === "file") { + const dir = process.env.CACHE_DIR || ".passmark-cache"; + logger_1.logger.info(`Using file-based cache at: ${dir}`); + return new FileStore(dir); + } + if (provider === "redis" || (!provider && process.env.REDIS_URL)) { + if (!process.env.REDIS_URL) { + logger_1.logger.warn("CACHE_PROVIDER is 'redis' but REDIS_URL is not set. Caching is disabled."); + return null; + } + logger_1.logger.info("Using Redis cache."); + return new RedisStore(process.env.REDIS_URL); + } + if (provider) { + logger_1.logger.warn(`Unknown CACHE_PROVIDER '${provider}'. Caching is disabled.`); + return null; + } + // No CACHE_PROVIDER and no REDIS_URL + logger_1.logger.warn("No cache provider configured. Set CACHE_PROVIDER=redis|file|none or REDIS_URL. " + + "Step caching, global placeholders, and project data are disabled."); + return null; +} +exports.cache = createCacheStore(); diff --git a/dist/config.d.ts b/dist/config.d.ts new file mode 100644 index 0000000..033c829 --- /dev/null +++ b/dist/config.d.ts @@ -0,0 +1,69 @@ +export type EmailProvider = { + /** Domain for generating test emails (e.g. "emailsink.dev") */ + domain: string; + /** + * Function to extract content from an email. + * Called with the email address and a prompt describing what to extract. + * Should return the extracted string value. + */ + extractContent: (params: { + email: string; + prompt: string; + }) => Promise; +}; +export type AIGateway = "vercel" | "openrouter" | "cloudflare" | "none"; +export type ModelConfig = { + /** Model for executing individual steps. Default: google/gemini-3-flash */ + stepExecution?: string; + /** Model for running user flows (low effort). Default: google/gemini-3-flash-preview */ + userFlowLow?: string; + /** Model for running user flows (high effort). Default: google/gemini-3.1-pro-preview */ + userFlowHigh?: string; + /** Model for assertions (primary). Default: anthropic/claude-haiku-4.5 */ + assertionPrimary?: string; + /** Model for assertions (secondary). Default: google/gemini-3-flash */ + assertionSecondary?: string; + /** Model for assertion arbiter. Default: google/gemini-3.1-pro-preview */ + assertionArbiter?: string; + /** Model for data extraction, wait conditions, and lightweight tasks. Default: google/gemini-2.5-flash */ + utility?: string; +}; +export declare const DEFAULT_MODELS: Required; +type Config = { + email?: EmailProvider; + ai?: { + gateway?: AIGateway; + models?: ModelConfig; + }; + /** Base path for file uploads. Default: "./uploads" */ + uploadBasePath?: string; +}; +/** + * Sets global configuration for Passmark. Call once before using any functions. + * Subsequent calls merge with existing config (does not reset unset fields). + * + * @param config - Configuration options for AI gateway, models, email, and uploads + * + * @example + * ```typescript + * configure({ + * ai: { gateway: "none", models: { stepExecution: "google/gemini-3-flash" } }, + * email: { domain: "test.com", extractContent: async ({ email, prompt }) => "..." }, + * }); + * ``` + */ +export declare function configure(config: Config): void; +/** + * Returns the current global configuration. + */ +export declare function getConfig(): Config; +/** + * Returns the configured model ID for a given use case, falling back to the default. + * + * @param key - The model use case key (e.g. "stepExecution", "utility") + * @returns The model identifier string (e.g. "google/gemini-3-flash") + */ +export declare function getModelId(key: keyof ModelConfig): string; +/** @internal Reset config to empty state. Used for testing only. */ +export declare function resetConfig(): void; +export {}; diff --git a/dist/config.js b/dist/config.js new file mode 100644 index 0000000..3f6bac8 --- /dev/null +++ b/dist/config.js @@ -0,0 +1,53 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.DEFAULT_MODELS = void 0; +exports.configure = configure; +exports.getConfig = getConfig; +exports.getModelId = getModelId; +exports.resetConfig = resetConfig; +exports.DEFAULT_MODELS = { + stepExecution: "google/gemini-3-flash", + userFlowLow: "google/gemini-3-flash", + userFlowHigh: "google/gemini-3.1-pro-preview", + assertionPrimary: "anthropic/claude-haiku-4.5", + assertionSecondary: "google/gemini-3-flash", + assertionArbiter: "google/gemini-3.1-pro-preview", + utility: "google/gemini-2.5-flash", +}; +let globalConfig = {}; +/** + * Sets global configuration for Passmark. Call once before using any functions. + * Subsequent calls merge with existing config (does not reset unset fields). + * + * @param config - Configuration options for AI gateway, models, email, and uploads + * + * @example + * ```typescript + * configure({ + * ai: { gateway: "none", models: { stepExecution: "google/gemini-3-flash" } }, + * email: { domain: "test.com", extractContent: async ({ email, prompt }) => "..." }, + * }); + * ``` + */ +function configure(config) { + globalConfig = { ...globalConfig, ...config }; +} +/** + * Returns the current global configuration. + */ +function getConfig() { + return globalConfig; +} +/** + * Returns the configured model ID for a given use case, falling back to the default. + * + * @param key - The model use case key (e.g. "stepExecution", "utility") + * @returns The model identifier string (e.g. "google/gemini-3-flash") + */ +function getModelId(key) { + return getConfig().ai?.models?.[key] ?? exports.DEFAULT_MODELS[key]; +} +/** @internal Reset config to empty state. Used for testing only. */ +function resetConfig() { + globalConfig = {}; +} diff --git a/dist/constants.d.ts b/dist/constants.d.ts new file mode 100644 index 0000000..f6993d5 --- /dev/null +++ b/dist/constants.d.ts @@ -0,0 +1,19 @@ +export declare const LOCATOR_ACTION_TIMEOUT = 2000; +export declare const CACHED_ACTION_TIMEOUT = 5000; +export declare const STOP_DELAY = 3000; +export declare const SNAPSHOT_TIMEOUT = 5000; +export declare const DOM_STABILIZATION_IDLE = 500; +export declare const DOM_STABILIZATION_TIMEOUT = 5000; +export declare const INITIAL_DOM_STABILIZATION_IDLE = 3000; +export declare const ASSERTION_MODEL_TIMEOUT = 35000; +export declare const STEP_EXECUTION_TIMEOUT = 180000; +export declare const WAIT_CONDITION_TIMEOUT = 120000; +export declare const WAIT_CONDITION_INITIAL_INTERVAL = 1000; +export declare const WAIT_CONDITION_MAX_INTERVAL = 10000; +export declare const EMAIL_INITIAL_WAIT = 5000; +export declare const EMAIL_RETRY_DELAY = 60000; +export declare const STEP_EXECUTION_MAX_STEPS = 25; +export declare const USER_FLOW_MAX_STEPS = 50; +export declare const MAX_RETRIES = 3; +export declare const THINKING_BUDGET_DEFAULT = 1024; +export declare const GLOBAL_VALUES_TTL_SECONDS = 86400; diff --git a/dist/constants.js b/dist/constants.js new file mode 100644 index 0000000..54af458 --- /dev/null +++ b/dist/constants.js @@ -0,0 +1,26 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.GLOBAL_VALUES_TTL_SECONDS = exports.THINKING_BUDGET_DEFAULT = exports.MAX_RETRIES = exports.USER_FLOW_MAX_STEPS = exports.STEP_EXECUTION_MAX_STEPS = exports.EMAIL_RETRY_DELAY = exports.EMAIL_INITIAL_WAIT = exports.WAIT_CONDITION_MAX_INTERVAL = exports.WAIT_CONDITION_INITIAL_INTERVAL = exports.WAIT_CONDITION_TIMEOUT = exports.STEP_EXECUTION_TIMEOUT = exports.ASSERTION_MODEL_TIMEOUT = exports.INITIAL_DOM_STABILIZATION_IDLE = exports.DOM_STABILIZATION_TIMEOUT = exports.DOM_STABILIZATION_IDLE = exports.SNAPSHOT_TIMEOUT = exports.STOP_DELAY = exports.CACHED_ACTION_TIMEOUT = exports.LOCATOR_ACTION_TIMEOUT = void 0; +// Timeouts (milliseconds) +exports.LOCATOR_ACTION_TIMEOUT = 2000; +exports.CACHED_ACTION_TIMEOUT = 5000; +exports.STOP_DELAY = 3000; +exports.SNAPSHOT_TIMEOUT = 5000; +exports.DOM_STABILIZATION_IDLE = 500; +exports.DOM_STABILIZATION_TIMEOUT = 5000; +exports.INITIAL_DOM_STABILIZATION_IDLE = 3000; +exports.ASSERTION_MODEL_TIMEOUT = 35000; +exports.STEP_EXECUTION_TIMEOUT = 180000; +exports.WAIT_CONDITION_TIMEOUT = 120000; +exports.WAIT_CONDITION_INITIAL_INTERVAL = 1000; +exports.WAIT_CONDITION_MAX_INTERVAL = 10000; +exports.EMAIL_INITIAL_WAIT = 5000; +exports.EMAIL_RETRY_DELAY = 60000; +// Limits +exports.STEP_EXECUTION_MAX_STEPS = 25; +exports.USER_FLOW_MAX_STEPS = 50; +exports.MAX_RETRIES = 3; +// Thinking budgets (tokens) +exports.THINKING_BUDGET_DEFAULT = 1024; +// Cache +exports.GLOBAL_VALUES_TTL_SECONDS = 86400; diff --git a/dist/data-cache.d.ts b/dist/data-cache.d.ts new file mode 100644 index 0000000..a503bbe --- /dev/null +++ b/dist/data-cache.d.ts @@ -0,0 +1,134 @@ +import { Step } from "./types"; +/** + * Local placeholders that are fresh for each runSteps call. + * These values are NOT persisted and are regenerated every time. + */ +export type LocalPlaceholders = { + "{{run.shortid}}": string; + "{{run.fullName}}": string; + "{{run.email}}": string; + "{{run.dynamicEmail}}": string; + "{{run.phoneNumber}}": string; +}; +/** + * Global placeholders that are shared across all tests within an execution. + * These values are persisted to the cache and loaded for subsequent runSteps calls + * with the same executionId. + */ +export type GlobalPlaceholders = { + "{{global.shortid}}": string; + "{{global.fullName}}": string; + "{{global.email}}": string; + "{{global.dynamicEmail}}": string; + "{{global.phoneNumber}}": string; +}; +/** + * Project data placeholders for {{data.key}} syntax. + * These are stored in the cache and managed via project settings. + */ +export type ProjectDataPlaceholders = Record; +export type AssertionItem = { + assertion: string; + effort?: "low" | "high"; + images?: string[]; +}; +export type ProcessPlaceholdersResult = { + processedSteps: Step[]; + processedAssertions?: AssertionItem[]; + localValues: LocalPlaceholders; + globalValues?: GlobalPlaceholders; + projectDataValues?: ProjectDataPlaceholders; +}; +/** + * Pattern to match email extraction placeholders. + * Format: {{email.:}} or {{email.::}} + * Examples: + * - {{email.otp:get the 6 digit verification code}} + * - {{email.otp:get the 6 digit verification code:sandeep@bug0.ai}} + * - {{email.link:get the magic link url}} + * - {{email.code:get the confirmation code}} + */ +export declare const EMAIL_EXTRACTION_PATTERN: RegExp; +export declare const LOCAL_PLACEHOLDER_KEYS: (keyof LocalPlaceholders)[]; +export declare const GLOBAL_PLACEHOLDER_KEYS: (keyof GlobalPlaceholders)[]; +/** + * Fetches global values from the cache for a given execution ID. + * Returns null if no values exist. + */ +export declare function getGlobalValues(executionId: string): Promise | null>; +/** + * Saves global values to the cache for a given execution ID. + * Sets a 24-hour TTL on the key. + */ +export declare function saveGlobalValues(executionId: string, values: GlobalPlaceholders): Promise; +/** + * Fetches project data from the cache for a given project ID. + * Returns an empty object if no data exists. + */ +export declare function getProjectData(projectId: string): Promise; +/** + * Generates local values for placeholders. + * These are fresh for each runSteps call. + */ +export declare function generateLocalValues(): Promise; +/** + * Generates global values, reusing any existing values provided. + * Only generates values for keys that don't exist in existingValues. + */ +export declare function generateGlobalValues(existingValues: Partial | null): Promise; +/** + * Checks if any text contains global placeholders. + */ +export declare function containsGlobalPlaceholder(text: string): boolean; +/** + * Scans steps for any global placeholders. + * Returns true if any step description, data value, or script contains a global placeholder. + */ +export declare function stepsContainGlobalPlaceholders(steps: { + description: string; + data?: Record; + script?: string; + waitUntil?: string; +}[]): boolean; +/** + * Scans assertions for any global placeholders. + */ +export declare function assertionsContainGlobalPlaceholders(assertions?: { + assertion: string; +}[]): boolean; +/** + * Checks if any text contains project data placeholders. + */ +export declare function containsProjectDataPlaceholder(text: string): boolean; +/** + * Scans steps for any project data placeholders. + * Returns true if any step description, data value, or script contains a project data placeholder. + */ +export declare function stepsContainProjectDataPlaceholders(steps: { + description: string; + data?: Record; + script?: string; + waitUntil?: string; +}[]): boolean; +/** + * Replaces dynamic placeholders in a string with their corresponding values. + * Handles {{run.*}}, {{global.*}}, and {{data.*}} placeholders. + */ +export declare function replacePlaceholders(text: string, localValues: LocalPlaceholders, globalValues?: GlobalPlaceholders, projectDataValues?: ProjectDataPlaceholders): string; +/** + * Processes steps and assertions to replace dynamic placeholders with consistent values. + * Handles {{run.*}} placeholders (fresh per call), {{global.*}} placeholders + * (shared across execution via cache), and {{data.*}} placeholders (project data from cache). + * Returns the processed steps and assertions along with the generated values. + */ +export declare function processPlaceholders(steps: Step[], assertions?: AssertionItem[], executionId?: string, projectId?: string): Promise; +/** + * Gets the dynamic email to use for email extraction. + * Prefers global email if available, otherwise falls back to local email. + */ +export declare function getDynamicEmail(localValues: LocalPlaceholders, globalValues?: GlobalPlaceholders): string; +/** + * Resolves email extraction placeholders in step data. + * This should be called just before step execution to ensure emails have arrived. + */ +export declare function resolveEmailPlaceholders(step: Step, dynamicEmail: string): Promise; diff --git a/dist/data-cache.js b/dist/data-cache.js new file mode 100644 index 0000000..e10290e --- /dev/null +++ b/dist/data-cache.js @@ -0,0 +1,388 @@ +"use strict"; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.GLOBAL_PLACEHOLDER_KEYS = exports.LOCAL_PLACEHOLDER_KEYS = exports.EMAIL_EXTRACTION_PATTERN = void 0; +exports.getGlobalValues = getGlobalValues; +exports.saveGlobalValues = saveGlobalValues; +exports.getProjectData = getProjectData; +exports.generateLocalValues = generateLocalValues; +exports.generateGlobalValues = generateGlobalValues; +exports.containsGlobalPlaceholder = containsGlobalPlaceholder; +exports.stepsContainGlobalPlaceholders = stepsContainGlobalPlaceholders; +exports.assertionsContainGlobalPlaceholders = assertionsContainGlobalPlaceholders; +exports.containsProjectDataPlaceholder = containsProjectDataPlaceholder; +exports.stepsContainProjectDataPlaceholders = stepsContainProjectDataPlaceholders; +exports.replacePlaceholders = replacePlaceholders; +exports.processPlaceholders = processPlaceholders; +exports.getDynamicEmail = getDynamicEmail; +exports.resolveEmailPlaceholders = resolveEmailPlaceholders; +const errors_1 = require("./errors"); +const shortid_1 = __importDefault(require("shortid")); +const config_1 = require("./config"); +const email_1 = require("./email"); +const constants_1 = require("./constants"); +const logger_1 = require("./logger"); +const cache_1 = require("./cache"); +const utils_1 = require("./utils"); +// ============================================================================= +// Constants +// ============================================================================= +/** + * Pattern to match email extraction placeholders. + * Format: {{email.:}} or {{email.::}} + * Examples: + * - {{email.otp:get the 6 digit verification code}} + * - {{email.otp:get the 6 digit verification code:sandeep@bug0.ai}} + * - {{email.link:get the magic link url}} + * - {{email.code:get the confirmation code}} + */ +exports.EMAIL_EXTRACTION_PATTERN = /\{\{email\.(\w+):([^:}]+)(?::([^}]+))?\}\}/; +exports.LOCAL_PLACEHOLDER_KEYS = [ + "{{run.shortid}}", + "{{run.fullName}}", + "{{run.email}}", + "{{run.dynamicEmail}}", + "{{run.phoneNumber}}", +]; +exports.GLOBAL_PLACEHOLDER_KEYS = [ + "{{global.shortid}}", + "{{global.fullName}}", + "{{global.email}}", + "{{global.dynamicEmail}}", + "{{global.phoneNumber}}", +]; +/** Pattern to detect any global placeholder in text */ +const GLOBAL_PLACEHOLDER_PATTERN = /\{\{global\.\w+\}\}/; +/** Pattern to detect any project data placeholder in text */ +const PROJECT_DATA_PLACEHOLDER_PATTERN = /\{\{data\.(\w+)\}\}/g; +// ============================================================================= +// Cache Operations (Global Values) +// ============================================================================= +/** + * Generates a cache key for storing global values for an execution. + */ +function getCacheKey(executionId) { + return `execution:${executionId}:globals`; +} +/** + * Fetches global values from the cache for a given execution ID. + * Returns null if no values exist. + */ +async function getGlobalValues(executionId) { + if (!cache_1.cache) + return null; + const key = getCacheKey(executionId); + const values = await cache_1.cache.hgetall(key); + if (!values || Object.keys(values).length === 0) { + return null; + } + return values; +} +/** + * Saves global values to the cache for a given execution ID. + * Sets a 24-hour TTL on the key. + */ +async function saveGlobalValues(executionId, values) { + if (!cache_1.cache) + return; + const key = getCacheKey(executionId); + // Save all values as a hash + await cache_1.cache.hset(key, values); + // Set TTL + await cache_1.cache.expire(key, constants_1.GLOBAL_VALUES_TTL_SECONDS); + logger_1.logger.debug(`Saved global values to cache for execution: ${executionId}`); +} +// ============================================================================= +// Cache Operations (Project Data) +// ============================================================================= +/** + * Generates a cache key for storing project data. + */ +function getProjectDataCacheKey(projectId) { + return `project:${projectId}:data`; +} +/** + * Fetches project data from the cache for a given project ID. + * Returns an empty object if no data exists. + */ +async function getProjectData(projectId) { + if (!cache_1.cache) + return {}; + const key = getProjectDataCacheKey(projectId); + const values = await cache_1.cache.hgetall(key); + if (!values || Object.keys(values).length === 0) { + return {}; + } + return values; +} +// ============================================================================= +// Value Generation +// ============================================================================= +/** + * Generates local values for placeholders. + * These are fresh for each runSteps call. + */ +async function generateLocalValues() { + const { faker } = await import("@faker-js/faker"); + const { v4: uuidv4 } = await import("uuid"); + const emailDomain = (0, config_1.getConfig)().email?.domain; + return { + "{{run.shortid}}": shortid_1.default.generate(), + "{{run.fullName}}": faker.person.fullName(), + "{{run.email}}": faker.internet.email(), + "{{run.dynamicEmail}}": emailDomain ? `e2e-tester-${uuidv4()}@${emailDomain}` : "", + "{{run.phoneNumber}}": (0, utils_1.generatePhoneNumber)(), + }; +} +/** + * Generates global values, reusing any existing values provided. + * Only generates values for keys that don't exist in existingValues. + */ +async function generateGlobalValues(existingValues) { + const { faker } = await import("@faker-js/faker"); + const { v4: uuidv4 } = await import("uuid"); + const emailDomain = (0, config_1.getConfig)().email?.domain; + return { + "{{global.shortid}}": existingValues?.["{{global.shortid}}"] ?? shortid_1.default.generate(), + "{{global.fullName}}": existingValues?.["{{global.fullName}}"] ?? faker.person.fullName(), + "{{global.email}}": existingValues?.["{{global.email}}"] ?? faker.internet.email(), + "{{global.dynamicEmail}}": existingValues?.["{{global.dynamicEmail}}"] ?? + (emailDomain ? `e2e-tester-${uuidv4()}@${emailDomain}` : ""), + "{{global.phoneNumber}}": existingValues?.["{{global.phoneNumber}}"] ?? (0, utils_1.generatePhoneNumber)(), + }; +} +// ============================================================================= +// Placeholder Detection +// ============================================================================= +/** + * Checks if any text contains global placeholders. + */ +function containsGlobalPlaceholder(text) { + return GLOBAL_PLACEHOLDER_PATTERN.test(text); +} +/** + * Scans steps for any global placeholders. + * Returns true if any step description, data value, or script contains a global placeholder. + */ +function stepsContainGlobalPlaceholders(steps) { + for (const step of steps) { + if (containsGlobalPlaceholder(step.description)) { + return true; + } + if (step.data) { + for (const value of Object.values(step.data)) { + if (containsGlobalPlaceholder(value)) { + return true; + } + } + } + if (step.script && containsGlobalPlaceholder(step.script)) { + return true; + } + if (step.waitUntil && containsGlobalPlaceholder(step.waitUntil)) { + return true; + } + } + return false; +} +/** + * Scans assertions for any global placeholders. + */ +function assertionsContainGlobalPlaceholders(assertions) { + if (!assertions) + return false; + for (const item of assertions) { + if (containsGlobalPlaceholder(item.assertion)) { + return true; + } + } + return false; +} +/** + * Checks if any text contains project data placeholders. + */ +function containsProjectDataPlaceholder(text) { + // Reset lastIndex since we're using a global regex + PROJECT_DATA_PLACEHOLDER_PATTERN.lastIndex = 0; + return PROJECT_DATA_PLACEHOLDER_PATTERN.test(text); +} +/** + * Scans steps for any project data placeholders. + * Returns true if any step description, data value, or script contains a project data placeholder. + */ +function stepsContainProjectDataPlaceholders(steps) { + for (const step of steps) { + if (containsProjectDataPlaceholder(step.description)) { + return true; + } + if (step.data) { + for (const value of Object.values(step.data)) { + if (containsProjectDataPlaceholder(value)) { + return true; + } + } + } + if (step.script && containsProjectDataPlaceholder(step.script)) { + return true; + } + if (step.waitUntil && containsProjectDataPlaceholder(step.waitUntil)) { + return true; + } + } + return false; +} +// ============================================================================= +// Placeholder Replacement +// ============================================================================= +/** + * Replaces dynamic placeholders in a string with their corresponding values. + * Handles {{run.*}}, {{global.*}}, and {{data.*}} placeholders. + */ +function replacePlaceholders(text, localValues, globalValues, projectDataValues) { + let result = text; + // Throw if dynamicEmail placeholders are used without an email provider configured + const dynamicEmailPlaceholders = ["{{run.dynamicEmail}}", "{{global.dynamicEmail}}"]; + for (const placeholder of dynamicEmailPlaceholders) { + if (result.includes(placeholder) && !(0, config_1.getConfig)().email) { + throw new errors_1.ConfigurationError(`Email provider not configured. Call configure({ email: ... }) before using ${placeholder}.`); + } + } + // Replace {{run.*}} placeholders + for (const [placeholder, value] of Object.entries(localValues)) { + result = result.split(placeholder).join(value); + } + // Replace {{global.*}} placeholders + if (globalValues) { + for (const [placeholder, value] of Object.entries(globalValues)) { + result = result.split(placeholder).join(value); + } + } + // Replace {{data.key}} placeholders + if (projectDataValues) { + result = result.replace(/\{\{data\.(\w+)\}\}/g, (match, key) => { + const value = projectDataValues[key]; + if (value === undefined) { + logger_1.logger.warn(`[ProjectData] Placeholder ${match} not found`); + return match; + } + return value; + }); + } + return result; +} +// ============================================================================= +// Main Processing Function +// ============================================================================= +/** + * Processes steps and assertions to replace dynamic placeholders with consistent values. + * Handles {{run.*}} placeholders (fresh per call), {{global.*}} placeholders + * (shared across execution via cache), and {{data.*}} placeholders (project data from cache). + * Returns the processed steps and assertions along with the generated values. + */ +async function processPlaceholders(steps, assertions, executionId, projectId) { + // Check if global placeholders are used without executionId + const hasGlobalPlaceholders = stepsContainGlobalPlaceholders(steps) || assertionsContainGlobalPlaceholders(assertions); + if (hasGlobalPlaceholders && !executionId) { + throw new errors_1.ValidationError("{{global.*}} placeholders require an executionId. " + + "Please provide executionId in runSteps options to use global placeholders."); + } + // Check if project data placeholders are used without projectId + const hasProjectDataPlaceholders = stepsContainProjectDataPlaceholders(steps); + if (hasProjectDataPlaceholders && !projectId) { + throw new errors_1.ValidationError("{{data.*}} placeholders require a projectId. " + + "Please provide projectId in runSteps options to use project data placeholders."); + } + // Generate fresh run values (always new per runSteps call) + const localValues = await generateLocalValues(); + // Handle global values if executionId is provided + let globalValues; + if (executionId) { + // Try to load existing global values from Redis + const existingGlobalValues = await getGlobalValues(executionId); + // Generate global values, reusing existing ones + globalValues = await generateGlobalValues(existingGlobalValues); + // Save global values back to Redis (updates TTL and adds any new values) + await saveGlobalValues(executionId, globalValues); + logger_1.logger.debug({ globalValues }, `Using global values for execution ${executionId}`); + } + // Fetch project data if projectId is provided + let projectDataValues; + if (projectId) { + projectDataValues = await getProjectData(projectId); + logger_1.logger.debug({ projectDataValues }, `Using project data for project ${projectId}`); + } + // Deep clone and process steps + // Note: Email extraction placeholders ({{email.xxx:prompt}}) are NOT resolved here. + // They are resolved lazily in runSteps just before each step executes. + const processedSteps = steps.map((step) => { + const processedStep = { ...step }; + if (processedStep.data) { + processedStep.data = { ...processedStep.data }; + for (const key in processedStep.data) { + processedStep.data[key] = replacePlaceholders(processedStep.data[key], localValues, globalValues, projectDataValues); + } + } + // Process script placeholders if present + if (processedStep.script) { + processedStep.script = replacePlaceholders(processedStep.script, localValues, globalValues, projectDataValues); + } + // Process waitUntil placeholders if present + if (processedStep.waitUntil) { + processedStep.waitUntil = replacePlaceholders(processedStep.waitUntil, localValues, globalValues, projectDataValues); + } + return processedStep; + }); + // Process assertions if provided + let processedAssertions; + if (assertions) { + processedAssertions = assertions.map((assertionItem) => ({ + ...assertionItem, + assertion: replacePlaceholders(assertionItem.assertion, localValues, globalValues, projectDataValues), + })); + } + return { + processedSteps, + processedAssertions, + localValues, + globalValues, + projectDataValues, + }; +} +/** + * Gets the dynamic email to use for email extraction. + * Prefers global email if available, otherwise falls back to local email. + */ +function getDynamicEmail(localValues, globalValues) { + return globalValues?.["{{global.dynamicEmail}}"] || localValues["{{run.dynamicEmail}}"]; +} +/** + * Resolves email extraction placeholders in step data. + * This should be called just before step execution to ensure emails have arrived. + */ +async function resolveEmailPlaceholders(step, dynamicEmail) { + if (!step.data) + return step; + const resolvedData = { ...step.data }; + let hasEmailExtraction = false; + for (const key in resolvedData) { + const value = resolvedData[key]; + const match = value.match(exports.EMAIL_EXTRACTION_PATTERN); + if (match) { + hasEmailExtraction = true; + const [fullMatch, extractType, prompt, explicitEmail] = match; + const targetEmail = explicitEmail?.trim() || dynamicEmail; + logger_1.logger.debug(`Extracting ${extractType} from ${targetEmail} with prompt: "${prompt}"`); + const extractedValue = await (0, email_1.extractEmailContent)({ + email: targetEmail, + prompt: prompt.trim(), + }); + resolvedData[key] = value.replace(fullMatch, extractedValue); + } + } + if (hasEmailExtraction) { + return { ...step, data: resolvedData }; + } + return step; +} diff --git a/dist/email.d.ts b/dist/email.d.ts new file mode 100644 index 0000000..99e1011 --- /dev/null +++ b/dist/email.d.ts @@ -0,0 +1,46 @@ +/** + * Generates a unique test email address using the configured email provider's domain. + * + * @param options - Optional email generation parameters + * @param options.prefix - Email prefix before the timestamp. Default: "test.user" + * @param options.timestamp - Timestamp for uniqueness. Default: Date.now() + * @returns Email address in the format `prefix.timestamp@domain` + * @throws If no email provider is configured via `configure()` + * + * @example + * ```typescript + * const email = generateEmail(); // "test.user.1711234567890@emailsink.dev" + * const custom = generateEmail({ prefix: "signup" }); // "signup.1711234567890@emailsink.dev" + * ``` + */ +export declare const generateEmail: ({ prefix, timestamp, }?: { + prefix?: string; + timestamp?: number; +}) => string; +/** + * Extracts content from an email using the configured email provider. + * Waits for the email to arrive, then polls the provider with retries. + * + * @param options - Extraction configuration + * @param options.email - The email address to extract content from + * @param options.prompt - Natural language prompt describing what to extract (e.g. "get the 6 digit verification code") + * @param options.maxRetries - Maximum number of extraction attempts. Default: 3 + * @param options.retryDelayMs - Delay between retries in milliseconds. Default: 60000 (1 minute) + * @returns The extracted content as a string + * @throws If no email provider is configured via `configure()` + * @throws If content cannot be extracted after all retry attempts + * + * @example + * ```typescript + * const otp = await extractEmailContent({ + * email: "test.user.123@emailsink.dev", + * prompt: "get the 6 digit verification code", + * }); + * ``` + */ +export declare function extractEmailContent({ email, prompt, maxRetries, retryDelayMs, }: { + email: string; + prompt: string; + maxRetries?: number; + retryDelayMs?: number; +}): Promise; diff --git a/dist/email.js b/dist/email.js new file mode 100644 index 0000000..dd116c7 --- /dev/null +++ b/dist/email.js @@ -0,0 +1,77 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.generateEmail = void 0; +exports.extractEmailContent = extractEmailContent; +const errors_1 = require("./errors"); +const config_1 = require("./config"); +const constants_1 = require("./constants"); +const logger_1 = require("./logger"); +function getEmailProvider() { + const provider = (0, config_1.getConfig)().email; + if (!provider) { + throw new errors_1.ConfigurationError("Email provider not configured. Call configure({ email: ... }) before using email features."); + } + return provider; +} +/** + * Generates a unique test email address using the configured email provider's domain. + * + * @param options - Optional email generation parameters + * @param options.prefix - Email prefix before the timestamp. Default: "test.user" + * @param options.timestamp - Timestamp for uniqueness. Default: Date.now() + * @returns Email address in the format `prefix.timestamp@domain` + * @throws If no email provider is configured via `configure()` + * + * @example + * ```typescript + * const email = generateEmail(); // "test.user.1711234567890@emailsink.dev" + * const custom = generateEmail({ prefix: "signup" }); // "signup.1711234567890@emailsink.dev" + * ``` + */ +const generateEmail = ({ prefix = "test.user", timestamp = Date.now(), } = {}) => { + const { domain } = getEmailProvider(); + return `${prefix}.${timestamp}@${domain}`; +}; +exports.generateEmail = generateEmail; +/** + * Extracts content from an email using the configured email provider. + * Waits for the email to arrive, then polls the provider with retries. + * + * @param options - Extraction configuration + * @param options.email - The email address to extract content from + * @param options.prompt - Natural language prompt describing what to extract (e.g. "get the 6 digit verification code") + * @param options.maxRetries - Maximum number of extraction attempts. Default: 3 + * @param options.retryDelayMs - Delay between retries in milliseconds. Default: 60000 (1 minute) + * @returns The extracted content as a string + * @throws If no email provider is configured via `configure()` + * @throws If content cannot be extracted after all retry attempts + * + * @example + * ```typescript + * const otp = await extractEmailContent({ + * email: "test.user.123@emailsink.dev", + * prompt: "get the 6 digit verification code", + * }); + * ``` + */ +async function extractEmailContent({ email, prompt, maxRetries = constants_1.MAX_RETRIES, retryDelayMs = constants_1.EMAIL_RETRY_DELAY, }) { + const provider = getEmailProvider(); + // Add an initial delay before the first attempt to allow email to arrive + logger_1.logger.info(`Initial wait before extracting email content for ${email}...`); + await new Promise((resolve) => setTimeout(resolve, constants_1.EMAIL_INITIAL_WAIT)); + for (let attempt = 1; attempt <= maxRetries; attempt++) { + logger_1.logger.debug(`Waiting for email content (attempt ${attempt}/${maxRetries})...`); + try { + const result = await provider.extractContent({ email, prompt }); + logger_1.logger.info(`Successfully extracted email content: ${result}`); + return result; + } + catch (error) { + logger_1.logger.warn(`Error fetching email content (attempt ${attempt}): ${error}`); + } + if (attempt < maxRetries) { + await new Promise((resolve) => setTimeout(resolve, retryDelayMs)); + } + } + throw new errors_1.AIModelError(`Failed to extract email content after ${maxRetries} attempts. Email: ${email}, Prompt: ${prompt}`); +} diff --git a/dist/errors.d.ts b/dist/errors.d.ts new file mode 100644 index 0000000..d2abcc2 --- /dev/null +++ b/dist/errors.d.ts @@ -0,0 +1,61 @@ +/** + * errors.ts + * + * Custom error hierarchy for Passmark. + * All Passmark errors extend PassmarkError so callers can distinguish + * framework errors from generic runtime errors with a simple instanceof check. + * + * Usage: + * import { StepExecutionError, AIModelError } from "./errors"; + * + * try { ... } + * catch (e) { + * if (e instanceof StepExecutionError) { ... } + * } + */ +export declare class PassmarkError extends Error { + /** Machine-readable error code, stable across versions. */ + readonly code: string; + constructor(message: string, code: string); +} +/** + * Thrown when a test step fails during AI or cached execution. + * + * Replaces: throw new Error(errorDescription) in index.ts + */ +export declare class StepExecutionError extends PassmarkError { + readonly stepDescription: string; + constructor(message: string, stepDescription: string); +} +/** + * Thrown when an AI model call fails or the provider is misconfigured. + * + * Replaces: throw new Error(`Unknown AI provider: ${provider}`) in models.ts + */ +export declare class AIModelError extends PassmarkError { + constructor(message: string); +} +/** + * Thrown when a Redis operation fails or cache is unavailable. + * + * For future use as Redis error handling gets more granular. + */ +export declare class CacheError extends PassmarkError { + constructor(message: string); +} +/** + * Thrown when required environment variables or configuration are missing. + * + * Replaces: throw new Error("GOOGLE_GENERATIVE_AI_API_KEY isn't set...") in models.ts + */ +export declare class ConfigurationError extends PassmarkError { + constructor(message: string); +} +/** + * Thrown when input fails validation (e.g. a script step has no script). + * + * Replaces: throw new Error(`Script step ${step.description} has no script content.`) + */ +export declare class ValidationError extends PassmarkError { + constructor(message: string); +} diff --git a/dist/errors.js b/dist/errors.js new file mode 100644 index 0000000..d433228 --- /dev/null +++ b/dist/errors.js @@ -0,0 +1,91 @@ +"use strict"; +/** + * errors.ts + * + * Custom error hierarchy for Passmark. + * All Passmark errors extend PassmarkError so callers can distinguish + * framework errors from generic runtime errors with a simple instanceof check. + * + * Usage: + * import { StepExecutionError, AIModelError } from "./errors"; + * + * try { ... } + * catch (e) { + * if (e instanceof StepExecutionError) { ... } + * } + */ +Object.defineProperty(exports, "__esModule", { value: true }); +exports.ValidationError = exports.ConfigurationError = exports.CacheError = exports.AIModelError = exports.StepExecutionError = exports.PassmarkError = void 0; +// ─── Base ───────────────────────────────────────────────────────────────── +class PassmarkError extends Error { + /** Machine-readable error code, stable across versions. */ + code; + constructor(message, code) { + super(message); + this.name = this.constructor.name; + this.code = code; + // Maintains proper stack trace in V8 (Node.js / Chrome) + if (Error.captureStackTrace) { + Error.captureStackTrace(this, this.constructor); + } + } +} +exports.PassmarkError = PassmarkError; +// ─── Subclasses ─────────────────────────────────────────────────────────── +/** + * Thrown when a test step fails during AI or cached execution. + * + * Replaces: throw new Error(errorDescription) in index.ts + */ +class StepExecutionError extends PassmarkError { + stepDescription; + constructor(message, stepDescription) { + super(message, "STEP_EXECUTION_FAILED"); + this.stepDescription = stepDescription; + } +} +exports.StepExecutionError = StepExecutionError; +/** + * Thrown when an AI model call fails or the provider is misconfigured. + * + * Replaces: throw new Error(`Unknown AI provider: ${provider}`) in models.ts + */ +class AIModelError extends PassmarkError { + constructor(message) { + super(message, "AI_MODEL_ERROR"); + } +} +exports.AIModelError = AIModelError; +/** + * Thrown when a Redis operation fails or cache is unavailable. + * + * For future use as Redis error handling gets more granular. + */ +class CacheError extends PassmarkError { + constructor(message) { + super(message, "CACHE_ERROR"); + } +} +exports.CacheError = CacheError; +/** + * Thrown when required environment variables or configuration are missing. + * + * Replaces: throw new Error("GOOGLE_GENERATIVE_AI_API_KEY isn't set...") in models.ts + */ +class ConfigurationError extends PassmarkError { + constructor(message) { + super(message, "CONFIGURATION_ERROR"); + } +} +exports.ConfigurationError = ConfigurationError; +/** + * Thrown when input fails validation (e.g. a script step has no script). + * + * Replaces: throw new Error(`Script step ${step.description} has no script content.`) + */ +class ValidationError extends PassmarkError { + constructor(message) { + super(message, "VALIDATION_ERROR"); + } +} +exports.ValidationError = ValidationError; diff --git a/dist/extract.d.ts b/dist/extract.d.ts new file mode 100644 index 0000000..44e0584 --- /dev/null +++ b/dist/extract.d.ts @@ -0,0 +1,24 @@ +/** + * Extracts data from a page snapshot and URL using AI. + * Uses Gemini 2.5 Flash for fast, accurate extraction. + * + * @param snapshot - The accessibility snapshot of the page + * @param url - The current page URL + * @param prompt - The extraction prompt describing what to extract + * @returns The extracted value as a string + * + * @example + * ```typescript + * const token = await extractDataWithAI({ + * snapshot: await safeSnapshot(page), + * url: page.url(), + * prompt: 'Extract the token query parameter value from the URL' + * }); + * // Returns: "abc123" + * ``` + */ +export declare function extractDataWithAI({ snapshot, url, prompt, }: { + snapshot: string; + url: string; + prompt: string; +}): Promise; diff --git a/dist/extract.js b/dist/extract.js new file mode 100644 index 0000000..6685a04 --- /dev/null +++ b/dist/extract.js @@ -0,0 +1,62 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.extractDataWithAI = extractDataWithAI; +const ai_1 = require("ai"); +const zod_1 = require("zod"); +const config_1 = require("./config"); +const models_1 = require("./models"); +const extractionSchema = zod_1.z.object({ + extractedValue: zod_1.z.string().describe("The extracted value based on the prompt"), +}); +/** + * Extracts data from a page snapshot and URL using AI. + * Uses Gemini 2.5 Flash for fast, accurate extraction. + * + * @param snapshot - The accessibility snapshot of the page + * @param url - The current page URL + * @param prompt - The extraction prompt describing what to extract + * @returns The extracted value as a string + * + * @example + * ```typescript + * const token = await extractDataWithAI({ + * snapshot: await safeSnapshot(page), + * url: page.url(), + * prompt: 'Extract the token query parameter value from the URL' + * }); + * // Returns: "abc123" + * ``` + */ +async function extractDataWithAI({ snapshot, url, prompt, }) { + const { output } = await (0, ai_1.generateText)({ + model: (0, models_1.resolveModel)((0, config_1.getModelId)("utility")), + temperature: 0, + output: ai_1.Output.object({ schema: extractionSchema }), + prompt: `You are an AI assistant that extracts specific data from web pages. + +Given the following page snapshot and URL, extract the value described in the extraction prompt. + + +${url} + + + +${snapshot} + + + +${prompt} + + + +- Extract exactly what is requested in the prompt +- If extracting from the URL, parse query parameters, path segments, or hash values as needed +- If extracting from the page content, find the relevant text in the snapshot +- Return only the extracted value, not the surrounding context +- If the value cannot be found, return an empty string + + +Return the extracted value.`, + }); + return output.extractedValue; +} diff --git a/dist/index.d.ts b/dist/index.d.ts new file mode 100644 index 0000000..d83f7fc --- /dev/null +++ b/dist/index.d.ts @@ -0,0 +1,97 @@ +import "./instrumentation"; +import { PlaywrightTestArgs, PlaywrightTestOptions, PlaywrightWorkerArgs, PlaywrightWorkerOptions, TestType } from "@playwright/test"; +import { RunStepsOptions, UserFlowOptions } from "./types"; +/** + * Executes a sequence of test steps using AI with intelligent caching. + * Each step is described in natural language and executed via browser automation. + * Successfully executed steps are cached for faster subsequent runs. + * + * @param options - Configuration including page, steps, assertions, and callbacks + * @param options.page - The Playwright page instance + * @param options.userFlow - Name of the user flow (used as cache key prefix) + * @param options.steps - Array of steps to execute, each with a description and optional data + * @param options.bypassCache - When true, skips cache and forces AI execution for all steps + * @param options.assertions - Optional assertions to verify after step execution + * @param options.executionId - Links multiple runSteps calls to share {{global.*}} placeholders + * @param options.onStepStart - Callback fired when a step begins execution + * @param options.onStepEnd - Callback fired when a step completes + * @param options.onReasoning - Callback fired with AI reasoning for each tool call + * @throws Rethrows step execution timeout errors + * + * @example + * ```typescript + * await runSteps({ + * page, + * userFlow: "Checkout Flow", + * steps: [ + * { description: "Add item to cart" }, + * { description: "Fill in email", data: { value: "{{run.email}}" } }, + * ], + * assertions: [{ assertion: "Order confirmation is displayed" }], + * expect, + * }); + * ``` + */ +export declare const runSteps: ({ page, test, expect, userFlow, steps, auth, bypassCache, onStepStart, onStepEnd, onReasoning, assertions, projectId, executionId, failAssertionsSilently, }: RunStepsOptions) => Promise; +/** + * Runs a complete user flow as a single AI agent call. + * Best for exploratory testing where exact steps are flexible. + * The AI autonomously navigates, interacts, and verifies the flow. + * + * @param options - User flow configuration + * @param options.page - The Playwright page instance + * @param options.userFlow - Description of the user flow to execute + * @param options.steps - Natural language description of steps to perform + * @param options.effort - "low" uses a faster model, "high" uses a more capable model with deeper thinking + * @param options.assertion - Optional assertion to verify after the flow completes + * @returns The assertion result if an assertion was provided, the raw AI text response otherwise, or undefined on error + * + * @example + * ```typescript + * const result = await runUserFlow({ + * page, + * userFlow: "Complete a purchase", + * steps: "Navigate to store, add an item, checkout", + * effort: "high", + * assertion: "Order confirmation is displayed", + * }); + * ``` + */ +export declare const runUserFlow: ({ page, userFlow, steps, assertion, effort, thinkingBudget, }: UserFlowOptions) => Promise; +/** + * Wraps a cached Playwright flow with AI fallback for auto-healing. + * Tries the cached flow first; if it fails (e.g., due to UI changes), falls back to AI execution. + * + * @param config - Configuration for cached and AI flow execution + * @param config.cachedFlow - The cached Playwright flow to try first + * @param config.aiFlow - The AI-powered fallback flow to run if cached flow fails + * @param config.aiFlowTimeout - Optional timeout for the AI flow in milliseconds + * @param config.test - Playwright test instance for retry detection and timeout management + * + * @example + * ```typescript + * await executeWithAutoHealing({ + * cachedFlow: async () => { await page.getByRole("button").click(); }, + * aiFlow: async () => { await runSteps({ page, userFlow: "Click submit", steps }); }, + * test, + * }); + * ``` + */ +export declare const executeWithAutoHealing: (config: { + cachedFlow: () => Promise; + aiFlow: () => Promise; + aiFlowTimeout?: number; + test: TestType; +}) => Promise; +export { configure } from "./config"; +export type { EmailProvider } from "./config"; +export { emailsinkProvider } from "./providers/emailsink"; +export { extractEmailContent, generateEmail } from "./email"; +export { assert } from "./assertion"; +export type { AssertionResult } from "./types"; +export type { CacheStore } from "./cache"; +export { PassmarkError, StepExecutionError, ValidationError, AIModelError, CacheError, ConfigurationError } from "./errors"; diff --git a/dist/index.js b/dist/index.js new file mode 100644 index 0000000..a4e3505 --- /dev/null +++ b/dist/index.js @@ -0,0 +1,572 @@ +"use strict"; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.ConfigurationError = exports.CacheError = exports.AIModelError = exports.ValidationError = exports.StepExecutionError = exports.PassmarkError = exports.assert = exports.generateEmail = exports.extractEmailContent = exports.emailsinkProvider = exports.configure = exports.executeWithAutoHealing = exports.runUserFlow = exports.runSteps = void 0; +const errors_1 = require("./errors"); +require("./instrumentation"); // For Axiom AI instrumentation +const ai_1 = require("ai"); +const ai_2 = require("axiom/ai"); +const shortid_1 = __importDefault(require("shortid")); +const instrumentation_1 = require("./instrumentation"); +// Only use withSpan when Axiom is configured, otherwise just execute the function directly +async function maybeWithSpan(meta, fn) { + return instrumentation_1.axiomEnabled ? (0, ai_2.withSpan)(meta, async () => fn()) : fn(); +} +const zod_1 = require("zod"); +const prompts_1 = require("./prompts"); +const cache_1 = require("./cache"); +const tools_1 = require("./tools"); +const utils_1 = require("./utils"); +const assertion_1 = require("./assertion"); +const data_cache_1 = require("./data-cache"); +const config_1 = require("./config"); +const extract_1 = require("./extract"); +const logger_1 = require("./logger"); +const models_1 = require("./models"); +const secure_script_runner_1 = require("./utils/secure-script-runner"); +const tab_manager_1 = require("./utils/tab-manager"); +const constants_1 = require("./constants"); +/** + * Executes a sequence of test steps using AI with intelligent caching. + * Each step is described in natural language and executed via browser automation. + * Successfully executed steps are cached for faster subsequent runs. + * + * @param options - Configuration including page, steps, assertions, and callbacks + * @param options.page - The Playwright page instance + * @param options.userFlow - Name of the user flow (used as cache key prefix) + * @param options.steps - Array of steps to execute, each with a description and optional data + * @param options.bypassCache - When true, skips cache and forces AI execution for all steps + * @param options.assertions - Optional assertions to verify after step execution + * @param options.executionId - Links multiple runSteps calls to share {{global.*}} placeholders + * @param options.onStepStart - Callback fired when a step begins execution + * @param options.onStepEnd - Callback fired when a step completes + * @param options.onReasoning - Callback fired with AI reasoning for each tool call + * @throws Rethrows step execution timeout errors + * + * @example + * ```typescript + * await runSteps({ + * page, + * userFlow: "Checkout Flow", + * steps: [ + * { description: "Add item to cart" }, + * { description: "Fill in email", data: { value: "{{run.email}}" } }, + * ], + * assertions: [{ assertion: "Order confirmation is displayed" }], + * expect, + * }); + * ``` + */ +const runSteps = async ({ page, test, expect, userFlow, steps, auth, bypassCache = false, onStepStart, onStepEnd, onReasoning, assertions, projectId, executionId, failAssertionsSilently, }) => { + executionId = executionId || process.env.executionId; + // Track all open tabs for this run. The active page is updated automatically + // when a new tab opens, or explicitly via the `switchToTab` step field. + const tabManager = (0, tab_manager_1.createTabManager)(page); + if (!cache_1.cache) { + logger_1.logger.warn("Cache not configured. Step caching is disabled — all steps will use AI execution."); + if (executionId) { + logger_1.logger.warn("{{global.*}} placeholders will not persist across runSteps calls without a cache provider."); + } + } + // Check if this is a Playwright retry - if so, bypass cache and use AI only + const isPlaywrightRetry = test ? test.info().retry > 0 : false; + if (isPlaywrightRetry) { + logger_1.logger.debug(`Playwright retry detected (retry #${test.info().retry}). Bypassing cache and using AI only.`); + } + // Process dynamic placeholders before running steps + const { processedSteps, processedAssertions, localValues, globalValues, projectDataValues } = await (0, data_cache_1.processPlaceholders)(steps, assertions, executionId, projectId); + logger_1.logger.info(`Starting step-by-step execution of ${processedSteps.length} steps.`); + let errorInStepExecution, stepThatFailed = ""; + for (let i = 0; i < processedSteps.length; i++) { + // Resolve email placeholders lazily just before step execution + // This ensures the email has arrived before we try to extract content + // Use global email if available, otherwise fall back to run email, and then use the supplied email from regex + // ~~~ This logic needs to be fixed as global email will always be present if executionId is provided ~~~ + const dynamicEmail = (0, data_cache_1.getDynamicEmail)(localValues, globalValues); + // Re-process step data and waitUntil with current localValues to pick up extracted values from previous steps + let currentStep = processedSteps[i]; + if (currentStep.data) { + currentStep = { + ...currentStep, + data: Object.fromEntries(Object.entries(currentStep.data).map(([k, v]) => [ + k, + (0, data_cache_1.replacePlaceholders)(v, localValues, globalValues, projectDataValues), + ])), + }; + } + if (currentStep.waitUntil) { + currentStep = { + ...currentStep, + waitUntil: (0, data_cache_1.replacePlaceholders)(currentStep.waitUntil, localValues, globalValues, projectDataValues), + }; + } + const step = await (0, data_cache_1.resolveEmailPlaceholders)(currentStep, dynamicEmail); + const id = shortid_1.default.generate(); + if (onStepStart) { + onStepStart({ id, description: step.description }); + } + // Switch tab before executing the step if requested. + if (step.switchToTab !== undefined) { + await tabManager.switchTo(step.switchToTab); + } + // Script mode: execute script directly, skip AI and cache + if (step.isScript) { + if (!step.script) { + throw new errors_1.ValidationError(`Script step ${step.description} has no script content.`); + } + logger_1.logger.debug(`Executing Script Step: ${step.description}`); + if (step.moduleId) { + // moduleId is optional metadata used only for logging/debugging to identify the source module of this script step. + logger_1.logger.debug(`Module ID: ${step.moduleId}`); + } + try { + let pageScreenshotBeforeApplyingAction = ""; + if (step.waitUntil) { + pageScreenshotBeforeApplyingAction = (await tabManager.active().screenshot({ fullPage: false })).toString("base64"); + } + if (onReasoning) { + onReasoning({ + id, + reasoning: `Executing script for step: ${step.description}`, + }); + } + // Execute script securely using AST-based validation + // This prevents arbitrary code execution by only allowing safe Playwright method chains + await (0, secure_script_runner_1.runSecureScript)({ + page: tabManager, + script: step.script, + localValues: localValues, + globalValues: globalValues, + expect, // Pass expect for assertions like expect(locator).toContainText() + }); + // Handle waitUntil if specified + if (step.waitUntil) { + await (0, utils_1.waitForCondition)({ + page: tabManager, + condition: step.waitUntil, + pageScreenshotBeforeApplyingAction, + previousSteps: processedSteps.slice(0, i), + currentStep: step, + nextStep: processedSteps[i + 1], + }); + } + // Handle data extraction if specified + // This is done post script execution + if (step.extract) { + const snapshot = await (0, utils_1.safeSnapshot)(tabManager); + const url = tabManager.active().url(); + const extracted = await (0, extract_1.extractDataWithAI)({ + snapshot, + url, + prompt: step.extract.prompt, + }); + const placeholderKey = `{{run.${step.extract.as}}}`; + localValues[placeholderKey] = extracted; + logger_1.logger.info(`Extracted {{run.${step.extract.as}}}: "${extracted}"`); + } + if (onStepEnd) { + onStepEnd({ id, description: step.description }); + } + continue; // Skip to next step + } + catch (error) { + const message = error instanceof Error ? error.message : String(error); + logger_1.logger.error(`Script execution failed: ${message}`); + errorInStepExecution = message; + stepThatFailed = step.description; + break; // Stop execution on script failure + } + } + // First check if the step is cached on redis + const cachedStep = cache_1.cache ? await cache_1.cache.hgetall(`step:${userFlow}:${step.description}`) : {}; + if (!bypassCache && + !isPlaywrightRetry && + !step.bypassCache && + cachedStep && + Object.keys(cachedStep).length > 0) { + // Running cached step + logger_1.logger.debug(`Executing Cached Step: ${step.description}`); + const locator = cachedStep["locator"]; + const action = cachedStep["action"]; + const description = cachedStep["description"].replace(/'/g, "\\'"); + const value = cachedStep["value"]; + const input = step.data?.value || value; + let code = ""; + switch (action) { + case "click": + case "dblclick": + code = `await page.${locator}.describe('${description}').${action}({ timeout: ${constants_1.CACHED_ACTION_TIMEOUT} });`; + break; + case "fill": + code = `await page.${locator}.describe('${description}').fill("${input}", { timeout: ${constants_1.CACHED_ACTION_TIMEOUT} })`; + break; + case "hover": + code = `await page.${locator}.describe('${description}').hover({ timeout: ${constants_1.CACHED_ACTION_TIMEOUT} })`; + break; + case "select-option": + code = `await page.${locator}.describe('${description}').selectOption("${input}", { timeout: ${constants_1.CACHED_ACTION_TIMEOUT} })`; + break; + case "waitForText": + code = `await page.getByText("${value}", { exact: true }).first().waitFor({ state: "visible" })`; + break; + } + logger_1.logger.debug(`Executing cached action:\n${code}`); + try { + let pageScreenshotBeforeApplyingAction = ""; + if (step.waitUntil) { + pageScreenshotBeforeApplyingAction = (await tabManager.active().screenshot({ fullPage: false })).toString("base64"); + } + /** + * Before executing the first cached step, ensure the DOM is stable to avoid + * taking snapshot of a loading or transitioning state. Give it higher idle time because the page might + * take a bit longer to stabilize right after navigation. + */ + const INITIAL_DOM_STABILIZATION_IDLE_TIME = constants_1.INITIAL_DOM_STABILIZATION_IDLE; + if (i === 0) { + await (0, utils_1.waitForDOMStabilization)(tabManager, test, INITIAL_DOM_STABILIZATION_IDLE_TIME); + } + const pageSnapshotBeforeApplyingAction = await (0, utils_1.safeSnapshot)(tabManager); + await (0, utils_1.runLocatorCode)(tabManager, code); + /** + * Verify that the action had the intended effect on the page. This is because sometimes cached pw action may silently fail. + * + * Before verifying, this function will wait for the DOM to stabilize. + * stabilization idle time is set to 500ms by default. + * + * This means workflow is this: action performed -> wait for DOM stabilization -> check if action had effect -> next step + * + * Auto healing will be triggered if the action did not have any effect on the page. + */ + await (0, utils_1.verifyActionEffect)(tabManager, action, pageSnapshotBeforeApplyingAction); + if (step.waitUntil) { + await (0, utils_1.waitForCondition)({ + page: tabManager, + condition: step.waitUntil, + pageScreenshotBeforeApplyingAction, + previousSteps: processedSteps.slice(0, i), + currentStep: step, + nextStep: processedSteps[i + 1], + }); + } + // Handle data extraction if specified + // This is done post cached step execution + if (step.extract) { + const snapshot = await (0, utils_1.safeSnapshot)(tabManager); + const url = tabManager.active().url(); + const extracted = await (0, extract_1.extractDataWithAI)({ + snapshot, + url, + prompt: step.extract.prompt, + }); + const placeholderKey = `{{run.${step.extract.as}}}`; + localValues[placeholderKey] = extracted; + logger_1.logger.info(`Extracted {{run.${step.extract.as}}}: "${extracted}"`); + } + continue; + } + catch (error) { + logger_1.logger.debug(`Error executing cached step, falling back to AI execution: ${error}`); + } + } + const abortController = new AbortController(); + const { tools, getPendingCacheData, clearPendingCacheData } = (0, tools_1.getAItools)(tabManager.active(), { + currentStep: step, + abortController, + test, + tabManager, + }); + logger_1.logger.debug(`Executing Step: ${step.description}`); + let pageScreenshotBeforeApplyingAction = ""; + if (step.waitUntil) { + pageScreenshotBeforeApplyingAction = (await tabManager.active().screenshot({ fullPage: false })).toString("base64"); + } + const model = (0, models_1.resolveModel)((0, config_1.getModelId)("stepExecution")); + logger_1.logger.debug(`Using model: ${(0, config_1.getModelId)("stepExecution")} for step execution / gateway: ${(0, config_1.getConfig)().ai?.gateway ?? "none"}`); + try { + const result = await maybeWithSpan({ capability: "step_execution", step: "agentic_tool_calling" }, async () => (0, ai_1.generateText)({ + model, + maxRetries: constants_1.MAX_RETRIES, + temperature: 0, + tools: tools, + providerOptions: { + google: { + thinkingConfig: { + includeThoughts: false, + thinkingLevel: "medium", + }, + }, + openrouter: { + reasoning: { + effort: "medium", + exclude: true + }, + }, + }, + onStepFinish: async ({ toolCalls }) => { + if (!onReasoning) + return; + // Append tool call reasoning to the response + toolCalls.forEach((toolCall) => { + const reasoning = `${(toolCall?.input).reasoning}\n\n`; + onReasoning({ + id, + reasoning, + }); + }); + }, + stopWhen: (0, ai_1.stepCountIs)(constants_1.STEP_EXECUTION_MAX_STEPS), + abortSignal: AbortSignal.timeout(constants_1.STEP_EXECUTION_TIMEOUT), + toolChoice: "auto", + prompt: (0, prompts_1.buildRunStepsPrompt)({ + auth, + steps: processedSteps, + step, + userFlow, + stepIndex: i, + }), + })); + // Cache the step action only if it was a single tool call (simple, deterministic action). + // Multi-step actions are not cached as they may be non-deterministic. + const allToolCalls = result.steps + .flatMap((s) => s.toolCalls) + .filter((tool) => ["browser_snapshot", "browser_stop"].indexOf(tool.toolName) === -1); + if (allToolCalls.length === 1 && cache_1.cache) { + const cacheData = getPendingCacheData(); + if (cacheData) { + await cache_1.cache.hset(`step:${userFlow}:${step.description}`, cacheData); + logger_1.logger.debug(`Cached step action: ${step.description}`); + } + } + clearPendingCacheData(); + } + catch (error) { + logger_1.logger.error({ err: error }, `Step execution failed: ${step.description}`); + errorInStepExecution = error instanceof Error ? error.message : String(error); + stepThatFailed = step.description; + break; + } + if (step.waitUntil) { + await (0, utils_1.waitForCondition)({ + page: tabManager, + condition: step.waitUntil, + pageScreenshotBeforeApplyingAction, + previousSteps: processedSteps.slice(0, i), + currentStep: step, + nextStep: processedSteps[i + 1], + }); + } + // Handle data extraction if specified + // This is done post AI step execution + if (step.extract) { + const snapshot = await (0, utils_1.safeSnapshot)(tabManager); + const url = tabManager.active().url(); + const extracted = await (0, extract_1.extractDataWithAI)({ + snapshot, + url, + prompt: step.extract.prompt, + }); + const placeholderKey = `{{run.${step.extract.as}}}`; + localValues[placeholderKey] = extracted; + logger_1.logger.info(`Extracted {{run.${step.extract.as}}}: "${extracted}"`); + } + if (onStepEnd) { + onStepEnd({ id, description: step.description }); + } + } + if (errorInStepExecution) { + logger_1.logger.warn(`Step execution encountered an error. Skipping assertions execution.`); + const errorDescription = `\n${errorInStepExecution}\nStep: ${stepThatFailed}`; + if (test) { + test.info().annotations.push({ + type: "Error", + description: errorDescription, + }); + } + throw new errors_1.StepExecutionError(errorDescription, stepThatFailed); + } + if (processedAssertions && processedAssertions.length > 0 && expect) { + for (const { assertion, effort, images } of processedAssertions) { + logger_1.logger.info(`Running assertion: ${assertion}`); + const id = shortid_1.default.generate(); + if (onStepStart) { + onStepStart({ + id, + description: "Starting assertion verification", + }); + } + if (onReasoning) { + onReasoning({ + id, + reasoning: `Verifying assertion: ${assertion}`, + }); + } + const reasoning = await (0, assertion_1.assert)({ + page: tabManager, + assertion, + test, + expect, + effort, + images, + failSilently: failAssertionsSilently, + maxRetries: 1, + onRetry: (retryCount, previousResult) => { }, + }); + if (onReasoning) { + onReasoning({ + id, + reasoning: `\n\n${reasoning}`, + }); + } + if (onStepEnd) { + onStepEnd({ id, description: "Successfully verified assertion" }); + } + } + } +}; +exports.runSteps = runSteps; +/** + * Runs a complete user flow as a single AI agent call. + * Best for exploratory testing where exact steps are flexible. + * The AI autonomously navigates, interacts, and verifies the flow. + * + * @param options - User flow configuration + * @param options.page - The Playwright page instance + * @param options.userFlow - Description of the user flow to execute + * @param options.steps - Natural language description of steps to perform + * @param options.effort - "low" uses a faster model, "high" uses a more capable model with deeper thinking + * @param options.assertion - Optional assertion to verify after the flow completes + * @returns The assertion result if an assertion was provided, the raw AI text response otherwise, or undefined on error + * + * @example + * ```typescript + * const result = await runUserFlow({ + * page, + * userFlow: "Complete a purchase", + * steps: "Navigate to store, add an item, checkout", + * effort: "high", + * assertion: "Order confirmation is displayed", + * }); + * ``` + */ +const runUserFlow = async ({ page, userFlow, steps, assertion, effort = "low", thinkingBudget = constants_1.THINKING_BUDGET_DEFAULT, }) => { + const abortController = new AbortController(); + const model = effort === "low" + ? (0, models_1.resolveModel)((0, config_1.getModelId)("userFlowLow")) + : (0, models_1.resolveModel)((0, config_1.getModelId)("userFlowHigh")); + const { tools } = (0, tools_1.getAItools)(page, { + abortController, + }); + try { + const { text } = await maybeWithSpan({ capability: "user_flow_execution", step: "agentic_tool_calling" }, async () => { + return (0, ai_1.generateText)({ + model, + maxRetries: constants_1.MAX_RETRIES, + temperature: 0, + tools: tools, + providerOptions: { + google: { + thinkingConfig: { + thinkingBudget, + }, + }, + openrouter: { + reasoning: { + max_tokens: thinkingBudget, + }, + }, + }, + stopWhen: (0, ai_1.stepCountIs)(constants_1.USER_FLOW_MAX_STEPS), + abortSignal: abortController.signal, + prepareStep: async ({ messages }) => { + // Remove older messages to keep the context window small + if (messages.length > 11) { + const modifiedMessages = [messages[0], ...messages.slice(-10)]; + return { + messages: modifiedMessages, + }; + } + return {}; + }, + toolChoice: "auto", + prompt: (0, prompts_1.buildRunUserFlowPrompt)({ + steps, + userFlow, + assertion, + }), + }); + }); + if (assertion) { + const { output } = await (0, ai_1.generateText)({ + model: (0, models_1.resolveModel)((0, config_1.getModelId)("utility")), + prompt: `Convert the following text output into a valid JSON object with the specified properties:\n\n${text}`, + output: ai_1.Output.object({ + schema: zod_1.z.object({ + assertionPassed: zod_1.z.boolean().describe("Indicates whether the assertion passed or not."), + confidenceScore: zod_1.z + .number() + .describe("Confidence score of the assertion, between 0 and 100."), + reasoning: zod_1.z + .string() + .describe("Brief explanation of the reasoning behind the assertion."), + }), + }), + }); + return output; + } + return text; + } + catch (error) { + logger_1.logger.error({ err: error }, "Error during user flow execution"); + } +}; +exports.runUserFlow = runUserFlow; +/** + * Wraps a cached Playwright flow with AI fallback for auto-healing. + * Tries the cached flow first; if it fails (e.g., due to UI changes), falls back to AI execution. + * + * @param config - Configuration for cached and AI flow execution + * @param config.cachedFlow - The cached Playwright flow to try first + * @param config.aiFlow - The AI-powered fallback flow to run if cached flow fails + * @param config.aiFlowTimeout - Optional timeout for the AI flow in milliseconds + * @param config.test - Playwright test instance for retry detection and timeout management + * + * @example + * ```typescript + * await executeWithAutoHealing({ + * cachedFlow: async () => { await page.getByRole("button").click(); }, + * aiFlow: async () => { await runSteps({ page, userFlow: "Click submit", steps }); }, + * test, + * }); + * ``` + */ +const executeWithAutoHealing = async (config) => { + const { cachedFlow, aiFlow, test, aiFlowTimeout } = config; + if (process.env.AI || test.info().retry > 0) { + if (aiFlowTimeout) { + test.setTimeout(aiFlowTimeout); + } + await aiFlow(); + } + else { + await cachedFlow(); + } +}; +exports.executeWithAutoHealing = executeWithAutoHealing; +var config_2 = require("./config"); +Object.defineProperty(exports, "configure", { enumerable: true, get: function () { return config_2.configure; } }); +var emailsink_1 = require("./providers/emailsink"); +Object.defineProperty(exports, "emailsinkProvider", { enumerable: true, get: function () { return emailsink_1.emailsinkProvider; } }); +var email_1 = require("./email"); +Object.defineProperty(exports, "extractEmailContent", { enumerable: true, get: function () { return email_1.extractEmailContent; } }); +Object.defineProperty(exports, "generateEmail", { enumerable: true, get: function () { return email_1.generateEmail; } }); +var assertion_2 = require("./assertion"); +Object.defineProperty(exports, "assert", { enumerable: true, get: function () { return assertion_2.assert; } }); +var errors_2 = require("./errors"); +Object.defineProperty(exports, "PassmarkError", { enumerable: true, get: function () { return errors_2.PassmarkError; } }); +Object.defineProperty(exports, "StepExecutionError", { enumerable: true, get: function () { return errors_2.StepExecutionError; } }); +Object.defineProperty(exports, "ValidationError", { enumerable: true, get: function () { return errors_2.ValidationError; } }); +Object.defineProperty(exports, "AIModelError", { enumerable: true, get: function () { return errors_2.AIModelError; } }); +Object.defineProperty(exports, "CacheError", { enumerable: true, get: function () { return errors_2.CacheError; } }); +Object.defineProperty(exports, "ConfigurationError", { enumerable: true, get: function () { return errors_2.ConfigurationError; } }); diff --git a/dist/instrumentation.d.ts b/dist/instrumentation.d.ts new file mode 100644 index 0000000..5aeb2e3 --- /dev/null +++ b/dist/instrumentation.d.ts @@ -0,0 +1 @@ +export declare const axiomEnabled: boolean; diff --git a/dist/instrumentation.js b/dist/instrumentation.js new file mode 100644 index 0000000..a2051e6 --- /dev/null +++ b/dist/instrumentation.js @@ -0,0 +1,36 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.axiomEnabled = void 0; +const exporter_trace_otlp_http_1 = require("@opentelemetry/exporter-trace-otlp-http"); +const resources_1 = require("@opentelemetry/resources"); +const sdk_trace_node_1 = require("@opentelemetry/sdk-trace-node"); +const sdk_trace_node_2 = require("@opentelemetry/sdk-trace-node"); +const semantic_conventions_1 = require("@opentelemetry/semantic-conventions"); +const api_1 = require("@opentelemetry/api"); +const ai_1 = require("axiom/ai"); +const logger_1 = require("./logger"); +const axiomToken = process.env.AXIOM_TOKEN; +const axiomDataset = process.env.AXIOM_DATASET; +exports.axiomEnabled = !!(axiomToken && axiomDataset); +if (axiomToken && axiomDataset) { + logger_1.logger.info("Axiom AI instrumentation enabled"); + const tracer = api_1.trace.getTracer("ai-logs-tracer"); + const provider = new sdk_trace_node_1.NodeTracerProvider({ + resource: (0, resources_1.resourceFromAttributes)({ + [semantic_conventions_1.ATTR_SERVICE_NAME]: "passmark", + }, { + schemaUrl: "https://opentelemetry.io/schemas/1.37.0", + }), + spanProcessors: [ + new sdk_trace_node_2.SimpleSpanProcessor(new exporter_trace_otlp_http_1.OTLPTraceExporter({ + url: `https://api.axiom.co/v1/traces`, + headers: { + Authorization: `Bearer ${axiomToken}`, + "X-Axiom-Dataset": axiomDataset, + }, + })), + ], + }); + provider.register(); + (0, ai_1.initAxiomAI)({ tracer, redactionPolicy: ai_1.RedactionPolicy.AxiomDefault }); +} diff --git a/dist/logger.d.ts b/dist/logger.d.ts new file mode 100644 index 0000000..d5c3d5a --- /dev/null +++ b/dist/logger.d.ts @@ -0,0 +1,2 @@ +import pino from "pino"; +export declare const logger: pino.Logger; diff --git a/dist/logger.js b/dist/logger.js new file mode 100644 index 0000000..27402dd --- /dev/null +++ b/dist/logger.js @@ -0,0 +1,14 @@ +"use strict"; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.logger = void 0; +const pino_1 = __importDefault(require("pino")); +exports.logger = (0, pino_1.default)({ + name: "passmark-ai", + level: process.env.PASSMARK_LOG_LEVEL || "info", + transport: process.env.NODE_ENV !== "production" + ? { target: "pino-pretty", options: { colorize: true } } + : undefined, +}); diff --git a/dist/models.d.ts b/dist/models.d.ts new file mode 100644 index 0000000..ff7da8f --- /dev/null +++ b/dist/models.d.ts @@ -0,0 +1,18 @@ +import { type LanguageModel } from "ai"; +/** + * Resolves a canonical model ID to a LanguageModel instance wrapped with Axiom instrumentation. + * Input format: "provider/model-name" (e.g. "google/gemini-3-flash") + * + * Users always use canonical IDs (gateway-style). When using direct providers, + * model names are automatically mapped to the correct provider-specific names + * (e.g. "gemini-3-flash" → "gemini-3-flash-preview" for Google's direct API). + * + * When gateway is "vercel", routes through the Vercel AI Gateway as-is. + * When gateway is "openrouter", routes through OpenRouter. + * When gateway is "cloudflare", routes through Cloudflare AI Gateway using the + * provider-native paths (google-ai-studio, anthropic) so provider-specific fields + * like Gemini's thought_signature pass through unchanged. + * When gateway is "none" (default), creates a direct provider instance with alias resolution. + * All paths wrap the model with wrapAISDKModel for tracing when Axiom is enabled. + */ +export declare function resolveModel(modelId: string): LanguageModel; diff --git a/dist/models.js b/dist/models.js new file mode 100644 index 0000000..074e698 --- /dev/null +++ b/dist/models.js @@ -0,0 +1,173 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.resolveModel = resolveModel; +const errors_1 = require("./errors"); +const anthropic_1 = require("@ai-sdk/anthropic"); +const google_1 = require("@ai-sdk/google"); +const ai_sdk_provider_1 = require("@openrouter/ai-sdk-provider"); +const ai_1 = require("ai"); +const ai_2 = require("axiom/ai"); +const config_1 = require("./config"); +const instrumentation_1 = require("./instrumentation"); +function wrapModel(model) { + return instrumentation_1.axiomEnabled ? (0, ai_2.wrapAISDKModel)(model) : model; +} +let _google = null; +let _anthropic = null; +let _openrouter = null; +let _cloudflareGoogle = null; +let _cloudflareAnthropic = null; +function getGoogleProvider() { + if (!_google) { + if (!process.env.GOOGLE_GENERATIVE_AI_API_KEY) { + throw new errors_1.ConfigurationError("GOOGLE_GENERATIVE_AI_API_KEY isn't set. Add it to your environment (for example: export GOOGLE_GENERATIVE_AI_API_KEY=your_key), or use a gateway: configure({ ai: { gateway: 'vercel' } }) with AI_GATEWAY_API_KEY, configure({ ai: { gateway: 'openrouter' } }) with OPENROUTER_API_KEY, or configure({ ai: { gateway: 'cloudflare' } }) with CLOUDFLARE_ACCOUNT_ID, CLOUDFLARE_AI_GATEWAY, GOOGLE_GENERATIVE_AI_API_KEY, and CLOUDFLARE_AI_GATEWAY_API_KEY. See .env.example for reference."); + } + _google = (0, google_1.createGoogleGenerativeAI)({ + apiKey: process.env.GOOGLE_GENERATIVE_AI_API_KEY, + }); + } + return _google; +} +function getAnthropicProvider() { + if (!_anthropic) { + if (!process.env.ANTHROPIC_API_KEY) { + throw new errors_1.ConfigurationError("ANTHROPIC_API_KEY isn't set. Add it to your environment (for example: export ANTHROPIC_API_KEY=your_key), or use a gateway: configure({ ai: { gateway: 'vercel' } }) with AI_GATEWAY_API_KEY, configure({ ai: { gateway: 'openrouter' } }) with OPENROUTER_API_KEY, or configure({ ai: { gateway: 'cloudflare' } }) with CLOUDFLARE_ACCOUNT_ID, CLOUDFLARE_AI_GATEWAY, ANTHROPIC_API_KEY, and CLOUDFLARE_AI_GATEWAY_API_KEY. See .env.example for reference."); + } + _anthropic = (0, anthropic_1.createAnthropic)({ + apiKey: process.env.ANTHROPIC_API_KEY, + }); + } + return _anthropic; +} +function getOpenRouterProvider() { + if (!_openrouter) { + if (!process.env.OPENROUTER_API_KEY) { + throw new errors_1.ConfigurationError("OPENROUTER_API_KEY isn't set. Add it to your environment (for example: export OPENROUTER_API_KEY=your_key). See .env.example for reference."); + } + _openrouter = (0, ai_sdk_provider_1.createOpenRouter)({ + apiKey: process.env.OPENROUTER_API_KEY, + }); + } + return _openrouter; +} +/** + * Builds the per-provider Cloudflare AI Gateway base URL and (optional) + * `cf-aig-authorization` header. We route through Cloudflare's native + * provider paths (not the Unified/OpenAI-compat endpoint) so that + * provider-specific fields — notably Gemini's `thought_signature` on + * thinking models — pass through unmodified. + * + * @see https://developers.cloudflare.com/ai-gateway/usage/providers/google-ai-studio/ + * @see https://developers.cloudflare.com/ai-gateway/usage/providers/anthropic/ + */ +function getCloudflareGatewayConfig(providerPath) { + const accountId = process.env.CLOUDFLARE_ACCOUNT_ID; + const gatewayName = process.env.CLOUDFLARE_AI_GATEWAY; + if (!accountId || !gatewayName) { + throw new errors_1.ConfigurationError("Cloudflare AI Gateway requires CLOUDFLARE_ACCOUNT_ID and CLOUDFLARE_AI_GATEWAY (gateway name). You must also set the upstream provider key (GOOGLE_GENERATIVE_AI_API_KEY and/or ANTHROPIC_API_KEY). If the gateway is authenticated, also set CLOUDFLARE_AI_GATEWAY_API_KEY. See .env.example for reference."); + } + const cfAigToken = process.env.CLOUDFLARE_AI_GATEWAY_API_KEY; + return { + baseURL: `https://gateway.ai.cloudflare.com/v1/${accountId}/${gatewayName}/${providerPath}`, + headers: cfAigToken ? { "cf-aig-authorization": `Bearer ${cfAigToken}` } : undefined, + }; +} +function getCloudflareGoogleProvider() { + if (!_cloudflareGoogle) { + if (!process.env.GOOGLE_GENERATIVE_AI_API_KEY) { + throw new errors_1.ConfigurationError("GOOGLE_GENERATIVE_AI_API_KEY isn't set. Cloudflare AI Gateway proxies requests to Google AI Studio and requires your Google API key. Add GOOGLE_GENERATIVE_AI_API_KEY to your environment."); + } + const { baseURL, headers } = getCloudflareGatewayConfig("google-ai-studio/v1beta"); + _cloudflareGoogle = (0, google_1.createGoogleGenerativeAI)({ + apiKey: process.env.GOOGLE_GENERATIVE_AI_API_KEY, + baseURL, + headers, + }); + } + return _cloudflareGoogle; +} +function getCloudflareAnthropicProvider() { + if (!_cloudflareAnthropic) { + if (!process.env.ANTHROPIC_API_KEY) { + throw new errors_1.ConfigurationError("ANTHROPIC_API_KEY isn't set. Cloudflare AI Gateway proxies requests to Anthropic and requires your Anthropic API key. Add ANTHROPIC_API_KEY to your environment."); + } + const { baseURL, headers } = getCloudflareGatewayConfig("anthropic/v1"); + _cloudflareAnthropic = (0, anthropic_1.createAnthropic)({ + apiKey: process.env.ANTHROPIC_API_KEY, + baseURL, + headers, + }); + } + return _cloudflareAnthropic; +} +/** + * Maps canonical model names to direct Google/Anthropic API names. + * Only needed where the gateway name differs from the direct provider name. + * Add new entries here when providers rename or graduate models. + */ +const MODEL_DIRECT_ALIASES = { + "gemini-3-flash": "gemini-3-flash-preview", + "claude-sonnet-4.6": "claude-sonnet-4-6", + "claude-haiku-4.5": "claude-haiku-4-5", +}; +function resolveDirectModelName(modelName) { + return MODEL_DIRECT_ALIASES[modelName] ?? modelName; +} +/** + * Maps canonical model IDs (provider/model) to OpenRouter model IDs. + * OpenRouter uses its own naming — add entries here when they differ from canonical IDs. + */ +const OPENROUTER_MODEL_ALIASES = { + "google/gemini-3-flash": "google/gemini-3-flash-preview", +}; +function resolveOpenRouterModelId(modelId) { + return OPENROUTER_MODEL_ALIASES[modelId] ?? modelId; +} +/** + * Resolves a canonical model ID to a LanguageModel instance wrapped with Axiom instrumentation. + * Input format: "provider/model-name" (e.g. "google/gemini-3-flash") + * + * Users always use canonical IDs (gateway-style). When using direct providers, + * model names are automatically mapped to the correct provider-specific names + * (e.g. "gemini-3-flash" → "gemini-3-flash-preview" for Google's direct API). + * + * When gateway is "vercel", routes through the Vercel AI Gateway as-is. + * When gateway is "openrouter", routes through OpenRouter. + * When gateway is "cloudflare", routes through Cloudflare AI Gateway using the + * provider-native paths (google-ai-studio, anthropic) so provider-specific fields + * like Gemini's thought_signature pass through unchanged. + * When gateway is "none" (default), creates a direct provider instance with alias resolution. + * All paths wrap the model with wrapAISDKModel for tracing when Axiom is enabled. + */ +function resolveModel(modelId) { + const gatewayConfig = (0, config_1.getConfig)().ai?.gateway ?? "none"; + if (gatewayConfig === "vercel") { + if (!process.env.AI_GATEWAY_API_KEY) { + throw new errors_1.ConfigurationError("AI_GATEWAY_API_KEY isn't set. To use the Vercel AI Gateway, add AI_GATEWAY_API_KEY to your environment. If you'd rather use direct provider keys, call configure({ ai: { gateway: 'none' } }) and set GOOGLE_GENERATIVE_AI_API_KEY and/or ANTHROPIC_API_KEY."); + } + return wrapModel((0, ai_1.gateway)(modelId)); + } + if (gatewayConfig === "openrouter") { + return wrapModel(getOpenRouterProvider()(resolveOpenRouterModelId(modelId))); + } + const [provider, ...rest] = modelId.split("/"); + const modelName = rest.join("/"); + if (gatewayConfig === "cloudflare") { + switch (provider) { + case "google": + return wrapModel(getCloudflareGoogleProvider()(resolveDirectModelName(modelName))); + case "anthropic": + return wrapModel(getCloudflareAnthropicProvider()(resolveDirectModelName(modelName))); + default: + throw new errors_1.AIModelError(`Cloudflare AI Gateway routing is not configured for provider: ${provider}`); + } + } + switch (provider) { + case "google": + return wrapModel(getGoogleProvider()(resolveDirectModelName(modelName))); + case "anthropic": + return wrapModel(getAnthropicProvider()(resolveDirectModelName(modelName))); + default: + throw new errors_1.AIModelError(`Unknown AI provider: ${provider}`); + } +} diff --git a/dist/prompts/index.d.ts b/dist/prompts/index.d.ts new file mode 100644 index 0000000..0609957 --- /dev/null +++ b/dist/prompts/index.d.ts @@ -0,0 +1,6 @@ +import { RunStepsOptions, Step, UserFlowOptions } from "../types"; +export declare const buildRunStepsPrompt: ({ auth, userFlow, step, steps, stepIndex, }: Pick & { + step: Step; + stepIndex: number; +}) => string; +export declare const buildRunUserFlowPrompt: ({ userFlow, steps, assertion, }: Pick) => string; diff --git a/dist/prompts/index.js b/dist/prompts/index.js new file mode 100644 index 0000000..e4d3278 --- /dev/null +++ b/dist/prompts/index.js @@ -0,0 +1,112 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.buildRunUserFlowPrompt = exports.buildRunStepsPrompt = void 0; +const buildRunStepsPrompt = ({ auth, userFlow, step, steps, stepIndex, }) => { + return ` + **System Prompt:** + You are an AI-powered expert QA Agent that follows instructions precisely and is designed to test web applications. If you do not follow instructions exactly as specified below, very bad things will happen as bugs will go undetected. + + + ${userFlow} + + + The above user flow contains multiple steps that need to be executed one by one. However, right now we are only interested in executing one specific step. + + Execute **ONLY** the following step: + + + ${step.description} + + + + Current Step Index: ${stepIndex + 1} out of ${steps.length} steps. + + + ${stepIndex + 1 < steps.length + ? ` + + The next step (DO NOT EXECUTE THIS) is: "${steps[stepIndex + 1].description}" + This is provided for context only. Stop immediately after completing the current step given above. + ` + : ""} + + Remember we're only interested in executing the current step right now. We'll have a separate run for the next step. So, do not execute any steps other than the current step mentioned above. Stop right after executing the current step. + + ${step.data + ? ` +Use the following data for the current step: + +"${JSON.stringify(step.data)}". +`.trim() + : ""} + + ${auth + ? ` + If presented with login screen, log in to the website using the following credentials: + + - Email: ${auth.email} + - Password: ${auth.password} + ` + : ""} + + + - Wait for the page to be fully loaded and settled before executing the step. + - Start by taking a fresh snapshot of the page. If snapshot is not available or empty, wait and retry until you get a valid snapshot. + - [CRITICAL] After you execute the step, analyze the returned snapshot. The step execution is considered successful only if the latest snapshot reflects the expected state after performing the step. If it doesn't, you must take a fresh snapshot (and if needed a screenshot) and retry executing the step until the expected state is achieved. + - [CRITICAL] If you are unable to locate the element based on the snapshot or if there is any ambiguity, you must take a screenshot of the page to visually inspect the current state and then retry locating the element and executing the step. + - You should stop right after the step is successfully executed and reflected in the snapshot. + - At any point if you get an error or make any mistake, you will request a fresh snapshot (if needed a screenshot) and try to re execute the step correctly by using the available tools. + - If you see any data validation issue or UI or input errors, correct the input and retry the step, unless data is supplied already via data field or step description. + - [CRITICAL] Do not use fake \`ref\` locators in tool calls, use the actual locators from the snapshot. + - If you have to wait for some time at any step, wait for max 5s and then take a fresh snapshot to decide the next step. + - In case you are confused, you can also take a screenshot of the page to visually inspect the current state. + - [CRITICAL] Do not perform multiple steps. Your objective is to perform only the current step specified above and stop right after that. + - For file uploads, use \`browser_upload_file\` tool with ref of the file upload button from the snapshot. + - [CRITICAL] Do not use browser_navigate tool unless there is an explicit instruction to navigate in the **step description**. + + `; +}; +exports.buildRunStepsPrompt = buildRunStepsPrompt; +const buildRunUserFlowPrompt = ({ userFlow, steps, assertion, }) => { + return ` + **System Prompt:** + You are an AI-powered expert QA Agent that follows instructions precisely and is designed to test web applications to find regressions in user flows. If you do not follow instructions exactly as specified below, very bad things will happen as bugs will go undetected. But in some cases user flows might have changed and in those cases you can use your best judgement to fill in the gaps. + + Here's some context & instructions for the Agent: + + + ${userFlow} + + + ${steps + ? `Follow these steps **exactly** to test the user flow:\n\n\n${steps}\n- STOP user flow by calling \`browser_stop\` tool exactly once.\n` + : ""} + ${assertion + ? `\n\n${assertion}\n\n\n\n Double check your assertion analysis to ensure it's accurate.` + : ""} + + ${assertion + ? ` + The output should contain the following information: + - \`assertionPassed\`: A boolean indicating whether the assertion passed or not. + - \`confidenceScore\`: A number between 0 and 100 indicating the confidence score of the assertion. + - \`reasoning\`: A brief string explaining the reasoning behind the assertion. + ` + : ""} + + Follow these instructions carefully while testing the website: + + - You are given the above user flow and corresponding steps to test that user flow. You need to manually test it and assert that it works as expected. + - Run the steps one by one using the tools provided. + - At the end of each step, you will get a fresh snapshot of the page. Based on the snapshot, you will decide the optimal next step. Use your thinking to plan and iterate. + - At any point if you get an error or take any wrong step, you will request a fresh snapshot and try to correct the mistake by using the available tools. + - If you see any data validation issue or UI or input errors, correct the input and retry the step. + - Start by taking a fresh snapshot of the page. If snapshot is not available or empty, wait and retry until you get a valid snapshot. + - DO not use fake \`ref\` locators in tool calls, use the actual locators from the snapshot. + - If you have to wait for some time at any step, wait for max 5s and then take a fresh snapshot to decide the next step. + - In case you are confused, you can also take a screenshot of the page to visually inspect the current state. + - Never get stuck in a \`waitForTimeout\` loop forever. Analyze the current state and decide the next step based on the snapshot and previous tool calls. + + `; +}; +exports.buildRunUserFlowPrompt = buildRunUserFlowPrompt; diff --git a/dist/providers/emailsink.d.ts b/dist/providers/emailsink.d.ts new file mode 100644 index 0000000..610c6a9 --- /dev/null +++ b/dist/providers/emailsink.d.ts @@ -0,0 +1,11 @@ +import type { EmailProvider } from "../config"; +/** + * Emailsink is a simple email service by Bug0 that allows you to receive emails at a unique address and retrieve their content via an API + * The free plan doesn't require an API key, but you can consider getting one by upgrading to a paid plan for higher rate limits and reliability. + * + * @param options - Configuration options for the Emailsink provider + * @returns An EmailProvider instance + */ +export declare function emailsinkProvider(options: { + apiKey?: string; +}): EmailProvider; diff --git a/dist/providers/emailsink.js b/dist/providers/emailsink.js new file mode 100644 index 0000000..0f04d1b --- /dev/null +++ b/dist/providers/emailsink.js @@ -0,0 +1,38 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.emailsinkProvider = emailsinkProvider; +/** + * Emailsink is a simple email service by Bug0 that allows you to receive emails at a unique address and retrieve their content via an API + * The free plan doesn't require an API key, but you can consider getting one by upgrading to a paid plan for higher rate limits and reliability. + * + * @param options - Configuration options for the Emailsink provider + * @returns An EmailProvider instance + */ +function emailsinkProvider(options) { + return { + domain: "emailsink.dev", + extractContent: async ({ email, prompt }) => { + let url = `https://get.emailsink.dev/?email=${encodeURIComponent(email)}&prompt=${encodeURIComponent(prompt)}`; + if (options.apiKey) { + url += `&secret=${encodeURIComponent(options.apiKey)}`; + } + const response = await fetch(url); + const data = (await response.json()); + let result = data.result; + // Handle case where result is a string containing a JSON object + if (typeof result === "string" && result.startsWith("{")) { + try { + const parsedResult = JSON.parse(result); + result = parsedResult.result; + } + catch { + // Keep the original result if parsing fails + } + } + if (result !== undefined && result !== null && result !== "") { + return result; + } + throw new Error("No email content found"); + }, + }; +} diff --git a/dist/redis.d.ts b/dist/redis.d.ts new file mode 100644 index 0000000..75a979f --- /dev/null +++ b/dist/redis.d.ts @@ -0,0 +1,3 @@ +import Redis from "ioredis"; +declare let redis: Redis | null; +export { redis }; diff --git a/dist/redis.js b/dist/redis.js new file mode 100644 index 0000000..080c96d --- /dev/null +++ b/dist/redis.js @@ -0,0 +1,16 @@ +"use strict"; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.redis = void 0; +const ioredis_1 = __importDefault(require("ioredis")); +const logger_1 = require("./logger"); +let redis = null; +exports.redis = redis; +if (process.env.REDIS_URL) { + exports.redis = redis = new ioredis_1.default(process.env.REDIS_URL); +} +else { + logger_1.logger.warn("REDIS_URL not set. Step caching, global placeholders, and project data are disabled."); +} diff --git a/dist/tools.d.ts b/dist/tools.d.ts new file mode 100644 index 0000000..9d659d2 --- /dev/null +++ b/dist/tools.d.ts @@ -0,0 +1,188 @@ +import { type Page } from "@playwright/test"; +import { PlaywrightTestArgs, PlaywrightTestOptions, PlaywrightWorkerArgs, PlaywrightWorkerOptions, TestType } from "@playwright/test"; +import type { TabManager } from "./utils/tab-manager"; +type ToolSettings = { + abortController?: AbortController; + currentStep?: { + description: string; + data?: Record; + }; + test?: TestType; + /** + * Optional tab manager. When provided, tools resolve the active page + * dynamically and auto-switch to a newly opened tab after action tools. + */ + tabManager?: TabManager; +}; +export declare function getAItools(page: Page, settings?: ToolSettings): { + tools: { + browser_navigate: import("ai").Tool<{ + url: string; + reasoning: string; + doesActionAdvanceUsTowardsGoal: boolean; + }, string | ({ + success: boolean; + url: string; + } & { + snapshot: string; + })>; + browser_click: import("ai").Tool<{ + ref: string; + elementDescription: string; + reasoning: string; + doesActionAdvanceUsTowardsGoal: boolean; + button?: "left" | "right" | "middle" | undefined; + doubleClick?: boolean | undefined; + }, string | ({ + success: boolean; + } & { + snapshot: string; + })>; + browser_type: import("ai").Tool<{ + ref: string; + elementDescription: string; + text: string; + reasoning: string; + doesActionAdvanceUsTowardsGoal: boolean; + }, string | ({ + success: boolean; + text: string; + } & { + snapshot: string; + })>; + browser_take_screenshot: import("ai").Tool<{ + fullPage: boolean; + reasoning: string; + }, unknown>; + browser_press_key: import("ai").Tool<{ + key: string; + }, string | ({ + success: boolean; + key: string; + } & { + snapshot: string; + })>; + browser_navigate_back: import("ai").Tool, string | ({ + success: boolean; + } & { + snapshot: string; + })>; + browser_navigate_forward: import("ai").Tool, string | ({ + success: boolean; + } & { + snapshot: string; + })>; + browser_reload: import("ai").Tool<{ + reasoning: string; + }, string | ({ + success: boolean; + } & { + snapshot: string; + })>; + browser_snapshot: import("ai").Tool<{ + reasoning: string; + }, string>; + browser_wait: import("ai").Tool<{ + timeout: number; + reasoning: string; + }, string | ({ + success: boolean; + timeout: number; + message: string; + } & { + snapshot: string; + })>; + browser_mouse_move: import("ai").Tool<{ + x: number; + y: number; + reasoning: string; + }, string | ({ + success: boolean; + } & { + snapshot: string; + })>; + browser_mouse_down: import("ai").Tool<{ + reasoning: string; + }, string | ({ + success: boolean; + } & { + snapshot: string; + })>; + browser_mouse_up: import("ai").Tool<{ + reasoning: string; + }, string | ({ + success: boolean; + } & { + snapshot: string; + })>; + browser_select_dropdown_option: import("ai").Tool<{ + ref: string; + elementDescription: string; + value: string; + reasoning: string; + doesActionAdvanceUsTowardsGoal: boolean; + }, string | ({ + success: boolean; + value: string; + } & { + snapshot: string; + })>; + browser_stop: import("ai").Tool<{ + reasoning: string; + }, { + success: boolean; + message: string; + }>; + browser_drag_and_drop: import("ai").Tool<{ + sourceRef: string; + sourceElementDescription: string; + targetRef: string; + targetElementDescription: string; + reasoning: string; + doesActionAdvanceUsTowardsGoal: boolean; + }, string | ({ + success: boolean; + } & { + snapshot: string; + })>; + browser_hover: import("ai").Tool<{ + ref: string; + elementDescription: string; + reasoning: string; + doesActionAdvanceUsTowardsGoal: boolean; + }, string | ({ + success: boolean; + } & { + snapshot: string; + })>; + browser_upload_file: import("ai").Tool<{ + ref: string; + elementDescription: string; + filePaths: string[]; + reasoning: string; + doesActionAdvanceUsTowardsGoal: boolean; + }, string | ({ + success: boolean; + prefixedFilePaths: string[]; + } & { + snapshot: string; + })>; + browser_trigger_blur: import("ai").Tool<{ + reasoning: string; + doesActionAdvanceUsTowardsGoal: boolean; + }, string | ({ + success: boolean; + } & { + snapshot: string; + })>; + get_unique_value: import("ai").Tool<{ + prefix: string; + }, { + success: boolean; + value: string; + }>; + }; + getPendingCacheData: () => Record | null; + clearPendingCacheData: () => void; +}; +export {}; diff --git a/dist/tools.js b/dist/tools.js new file mode 100644 index 0000000..54312c2 --- /dev/null +++ b/dist/tools.js @@ -0,0 +1,500 @@ +"use strict"; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.getAItools = getAItools; +const ai_1 = require("ai"); +const zod_1 = require("zod"); +const ai_2 = require("axiom/ai"); +const shortid_1 = __importDefault(require("shortid")); +const config_1 = require("./config"); +const instrumentation_1 = require("./instrumentation"); +const logger_1 = require("./logger"); +const constants_1 = require("./constants"); +// Only wrap tools with Axiom instrumentation when Axiom is configured +const maybeWrapTool = instrumentation_1.axiomEnabled ? ai_2.wrapTool : (_name, t) => t; +function getAItools(page, settings) { + const playwrightTools = new PlaywrightTools(page, settings); + const withSnapshot = async (fn, args) => { + try { + const result = await fn(args); + // tab-manager's persistent 'page' listener auto-switches active focus + // when a new tab opens, so getSnapshot() below targets it automatically. + const snapshot = await playwrightTools.getSnapshot(); + return { ...result, snapshot }; + } + catch (_error) { + return `Error executing this action. Retry the action or try a different one.\n\nLatest Snapshot:\n\n${await playwrightTools.getSnapshot()}`; + } + }; + const tools = { + browser_navigate: maybeWrapTool("browser_navigate", (0, ai_1.tool)({ + description: "Navigate to a URL. This tool should be used only when an explicit instruction to navigate is given in a particular step", + inputSchema: playwrightTools.navigateSchema, + execute: async (args) => withSnapshot(playwrightTools.navigate.bind(playwrightTools), args), + })), + browser_click: maybeWrapTool("browser_click", (0, ai_1.tool)({ + description: "Click on an element", + inputSchema: playwrightTools.clickSchema, + execute: async (args) => withSnapshot(playwrightTools.click.bind(playwrightTools), args), + })), + browser_type: maybeWrapTool("browser_type", (0, ai_1.tool)({ + description: "Type text into an element", + inputSchema: playwrightTools.typeSchema, + execute: async (args) => withSnapshot(playwrightTools.type.bind(playwrightTools), args), + })), + browser_take_screenshot: maybeWrapTool("browser_take_screenshot", (0, ai_1.tool)({ + description: "Take a screenshot", + inputSchema: playwrightTools.screenshotSchema, + execute: async (args) => { + const fn = playwrightTools.takeScreenshot.bind(playwrightTools); + const screenshot = await fn(args); + return screenshot; + }, + toModelOutput: (result) => { + const base64 = (typeof result === "string" ? result : result.output); + return { + type: "content", + value: [ + { type: "media", data: base64, mediaType: "image/png" }, + ], + }; + }, + })), + browser_press_key: maybeWrapTool("browser_press_key", (0, ai_1.tool)({ + description: "Press a key", + inputSchema: playwrightTools.pressKeySchema, + execute: async (args) => withSnapshot(playwrightTools.pressKey.bind(playwrightTools), args), + })), + browser_navigate_back: maybeWrapTool("browser_navigate_back", (0, ai_1.tool)({ + description: "Go back to previous page", + inputSchema: zod_1.z.object({}), + execute: async () => withSnapshot(playwrightTools.goBack.bind(playwrightTools), {}), + })), + browser_navigate_forward: maybeWrapTool("browser_navigate_forward", (0, ai_1.tool)({ + description: "Go forward to next page", + inputSchema: zod_1.z.object({}), + execute: async () => withSnapshot(playwrightTools.goForward.bind(playwrightTools), {}), + })), + browser_reload: maybeWrapTool("browser_reload", (0, ai_1.tool)({ + description: "Reload the current page", + inputSchema: zod_1.z.object({ + reasoning: zod_1.z.string().describe("A quick one-line reasoning behind this action"), + }), + execute: async (args) => withSnapshot(playwrightTools.reload.bind(playwrightTools), args), + })), + browser_snapshot: maybeWrapTool("browser_snapshot", (0, ai_1.tool)({ + description: "Take fresh snapshot of the current page", + inputSchema: zod_1.z.object({ + reasoning: zod_1.z.string().describe("A quick one-line reasoning behind this action"), + }), + execute: async (_args) => { + return await playwrightTools.getSnapshot(); + }, + })), + browser_wait: maybeWrapTool("browser_wait", (0, ai_1.tool)({ + description: "Wait for a specified amount of time", + inputSchema: playwrightTools.waitSchema, + execute: async (args) => withSnapshot(playwrightTools.wait.bind(playwrightTools), args), + })), + browser_mouse_move: maybeWrapTool("browser_mouse_move", (0, ai_1.tool)({ + description: "Move the mouse to a specific coordinate", + inputSchema: playwrightTools.mouseMoveSchema, + execute: async (args) => withSnapshot(playwrightTools.mouseMove.bind(playwrightTools), args), + })), + browser_mouse_down: maybeWrapTool("browser_mouse_down", (0, ai_1.tool)({ + description: "Press the left mouse button.", + inputSchema: playwrightTools.mouseDownSchema, + execute: async (args) => withSnapshot(playwrightTools.mouseDown.bind(playwrightTools), args), + })), + browser_mouse_up: maybeWrapTool("browser_mouse_up", (0, ai_1.tool)({ + description: "Release the left mouse button", + inputSchema: playwrightTools.mouseUpSchema, + execute: async (args) => withSnapshot(playwrightTools.mouseUp.bind(playwrightTools), args), + })), + browser_select_dropdown_option: maybeWrapTool("browser_select_dropdown_option", (0, ai_1.tool)({ + description: "Select an option from a dropdown", + inputSchema: playwrightTools.selectDropdownOptionSchema, + execute: async (args) => withSnapshot(playwrightTools.selectDropdownOption.bind(playwrightTools), args), + })), + browser_stop: maybeWrapTool("browser_stop", (0, ai_1.tool)({ + description: "Stop the user flow test", + inputSchema: playwrightTools.stopSchema, + execute: async (args) => playwrightTools.stop(args), + })), + browser_drag_and_drop: maybeWrapTool("browser_drag_and_drop", (0, ai_1.tool)({ + description: "Drag an element and drop it onto another element", + inputSchema: playwrightTools.dragAndDropSchema, + execute: async (args) => withSnapshot(playwrightTools.dragAndDrop.bind(playwrightTools), args), + })), + browser_hover: maybeWrapTool("browser_hover", (0, ai_1.tool)({ + description: "Hover over an element", + inputSchema: playwrightTools.hoverSchema, + execute: async (args) => withSnapshot(playwrightTools.hover.bind(playwrightTools), args), + })), + browser_upload_file: maybeWrapTool("browser_upload_file", (0, ai_1.tool)({ + description: "Upload a file", + inputSchema: playwrightTools.uploadFileSchema, + execute: async (args) => withSnapshot(playwrightTools.uploadFile.bind(playwrightTools), args), + })), + browser_trigger_blur: maybeWrapTool("browser_trigger_blur", (0, ai_1.tool)({ + description: "Trigger a blur event by clicking on the body. Useful for when an element needs to lose focus.", + inputSchema: playwrightTools.triggerBlurSchema, + execute: async (args) => withSnapshot(playwrightTools.triggerBlur.bind(playwrightTools), args), + })), + get_unique_value: maybeWrapTool("get_unique_value", (0, ai_1.tool)({ + description: "Generate a unique value by appending a shortid to a prefix", + inputSchema: playwrightTools.getUniqueValueSchema, + execute: async (args) => playwrightTools.getUniqueValue(args), + })), + }; + return { + tools, + getPendingCacheData: () => playwrightTools.pendingCacheData, + clearPendingCacheData: () => { + playwrightTools.pendingCacheData = null; + }, + }; +} +class PlaywrightTools { + initialPage; + tabManager; + currentStep; + abortController; + pendingCacheData = null; + get page() { + return this.tabManager ? this.tabManager.active() : this.initialPage; + } + constructor(page, settings = {}) { + const { currentStep, abortController, tabManager } = settings; + this.initialPage = page; + this.tabManager = tabManager; + this.currentStep = currentStep; + this.abortController = abortController; + } + async getSnapshot() { + const snapshot = await this.page.ariaSnapshot({ mode: "ai", timeout: constants_1.SNAPSHOT_TIMEOUT }); + return `url: ${this.page.url()}\n\n${snapshot}`; + } + navigateSchema = zod_1.z.object({ + url: zod_1.z.string().describe("The URL to navigate to"), + reasoning: zod_1.z.string().describe("A quick one-line reasoning behind this action"), + doesActionAdvanceUsTowardsGoal: zod_1.z + .boolean() + .describe('"true" indicates high confidence that this action will advance us towards the goal. "false" indicates low confidence and could be an AI hallucination.'), + }); + async navigate({ url }) { + await this.page.goto(url, { waitUntil: "load" }); + return { success: true, url }; + } + clickSchema = zod_1.z.object({ + ref: zod_1.z.string().describe("The ref of the element to click"), + elementDescription: zod_1.z + .string() + .describe("A description of the element to click, used for debugging"), + button: zod_1.z + .enum(["left", "right", "middle"]) + .optional() + .describe("Button to click, defaults to left"), + doubleClick: zod_1.z + .boolean() + .optional() + .describe("Whether to perform a double click instead of a single click"), + reasoning: zod_1.z.string().describe("A quick one-line reasoning behind this action"), + doesActionAdvanceUsTowardsGoal: zod_1.z + .boolean() + .describe('"true" indicates high confidence that this action will advance us towards the goal. "false" indicates low confidence and could be an AI hallucination.'), + }); + async click({ ref, elementDescription, button, doubleClick, }) { + const locator = this.page.locator(`aria-ref=${ref}`).describe(elementDescription); + let cachedLocator = ""; + if (this.currentStep) { + cachedLocator = await this.resolveLocator(locator); + } + if (doubleClick) { + await locator.dblclick({ button, timeout: constants_1.LOCATOR_ACTION_TIMEOUT }); + } + else { + await locator.click({ button, timeout: constants_1.LOCATOR_ACTION_TIMEOUT }); + } + this.prepareCacheData(cachedLocator, doubleClick ? "dblclick" : "click", elementDescription); + return { + success: true, + }; + } + typeSchema = zod_1.z.object({ + ref: zod_1.z.string().describe("The ref of the element to type into"), + elementDescription: zod_1.z.string().describe("A description of the element, used for debugging"), + text: zod_1.z.string().describe("The text to type"), + reasoning: zod_1.z.string().describe("A quick one-line reasoning behind this action"), + doesActionAdvanceUsTowardsGoal: zod_1.z + .boolean() + .describe('"true" indicates high confidence that this action will advance us towards the goal. "false" indicates low confidence and could be an AI hallucination.'), + }); + async type({ ref, elementDescription, text }) { + const locator = this.page.locator(`aria-ref=${ref}`).describe(elementDescription); + let cachedLocator = ""; + if (this.currentStep) { + cachedLocator = await this.resolveLocator(locator); + } + await locator.fill(text, { timeout: constants_1.LOCATOR_ACTION_TIMEOUT }); + this.prepareCacheData(cachedLocator, "fill", elementDescription, text); + return { + success: true, + text, + }; + } + screenshotSchema = zod_1.z.object({ + fullPage: zod_1.z.boolean().describe("Whether to take a screenshot of the full scrollable page"), + reasoning: zod_1.z.string().describe("A quick one-line reasoning behind this action"), + }); + async takeScreenshot({ fullPage: _fullPage }) { + // temporarily disabling fullPage as it sometimes causes issues with vision based models if dimension is too large. + // we can re-enable this in the future with some dimension checks and optimizations if needed + const screenshot = (await this.page.screenshot({ fullPage: false })).toString("base64"); + return screenshot; + } + pressKeySchema = zod_1.z.object({ + key: zod_1.z + .string() + .describe("Name of the key to press or a character to generate, such as `ArrowLeft` or `a`"), + }); + async pressKey({ key }) { + await this.page.keyboard.press(key); + return { success: true, key }; + } + async goBack() { + await this.page.goBack(); + return { success: true }; + } + async goForward() { + await this.page.goForward(); + return { success: true }; + } + async reload() { + await this.page.reload({ waitUntil: "load" }); + return { success: true }; + } + waitSchema = zod_1.z.object({ + timeout: zod_1.z.number().describe("Time to wait in milliseconds"), + reasoning: zod_1.z.string().describe("A quick one-line reasoning behind this action"), + }); + async wait({ timeout }) { + await this.page.waitForTimeout(timeout); + return { + success: true, + timeout, + message: `Waited for ${timeout}ms. You can either 1. wait more or 2. retry a previous action or 3. try a new action.`, + }; + } + mouseMoveSchema = zod_1.z.object({ + x: zod_1.z.number().describe("x-coordinate to move to"), + y: zod_1.z.number().describe("y-coordinate to move to"), + reasoning: zod_1.z.string().describe("A quick one-line reasoning behind this action"), + }); + async mouseMove({ x, y }) { + await this.page.mouse.move(x, y); + return { success: true }; + } + mouseDownSchema = zod_1.z.object({ + reasoning: zod_1.z.string().describe("A quick one-line reasoning behind this action"), + }); + async mouseDown(_) { + await this.page.mouse.down(); + return { success: true }; + } + mouseUpSchema = zod_1.z.object({ + reasoning: zod_1.z.string().describe("A quick one-line reasoning behind this action"), + }); + async mouseUp(_) { + await this.page.mouse.up(); + return { success: true }; + } + selectDropdownOptionSchema = zod_1.z.object({ + ref: zod_1.z.string().describe("The ref of the dropdown element to select from"), + elementDescription: zod_1.z + .string() + .describe("A description of the dropdown element, used for debugging"), + value: zod_1.z.string().describe("The value of the option to select"), + reasoning: zod_1.z.string().describe("A quick one-line reasoning behind this action"), + doesActionAdvanceUsTowardsGoal: zod_1.z + .boolean() + .describe('"true" indicates high confidence that this action will advance us towards the goal. "false" indicates low confidence and could be an AI hallucination.'), + }); + async selectDropdownOption({ ref, elementDescription, value, }) { + const locator = this.page.locator(`aria-ref=${ref}`).describe(elementDescription); + let cachedLocator = ""; + if (this.currentStep) { + cachedLocator = await this.resolveLocator(locator); + } + await locator.selectOption(value, { timeout: constants_1.LOCATOR_ACTION_TIMEOUT }); + this.prepareCacheData(cachedLocator, "selectOption", elementDescription, value); + return { success: true, value }; + } + stopSchema = zod_1.z.object({ + reasoning: zod_1.z.string().describe("A quick one-line reasoning behind this action"), + }); + async stop(_) { + const DELAY = constants_1.STOP_DELAY; // 3 seconds + // brief sleep to ensure any ongoing navigation or actions are complete + // In future we could add graceful stop logic here + await new Promise((resolve) => setTimeout(resolve, DELAY)); + if (this.abortController) { + this.abortController.abort(); + } + return { success: true, message: "Execution stopped" }; + } + dragAndDropSchema = zod_1.z.object({ + sourceRef: zod_1.z.string().describe("The ref of the element to drag"), + sourceElementDescription: zod_1.z + .string() + .describe("A description of the source element being dragged, used for debugging"), + targetRef: zod_1.z.string().describe("The ref of the element to drop onto"), + targetElementDescription: zod_1.z + .string() + .describe("A description of the target element to drop onto, used for debugging"), + reasoning: zod_1.z.string().describe("A quick one-line reasoning behind this action"), + doesActionAdvanceUsTowardsGoal: zod_1.z + .boolean() + .describe('"true" indicates high confidence that this action will advance us towards the goal. "false" indicates low confidence and could be an AI hallucination.'), + }); + async dragAndDrop({ sourceRef, sourceElementDescription, targetRef, targetElementDescription, }) { + const sourceLocator = this.page + .locator(`aria-ref=${sourceRef}`) + .describe(sourceElementDescription); + const targetLocator = this.page + .locator(`aria-ref=${targetRef}`) + .describe(targetElementDescription); + // Use two hover steps to ensure dragover events fire correctly across browsers + await sourceLocator.hover({ timeout: constants_1.LOCATOR_ACTION_TIMEOUT }); + await this.page.mouse.down(); + await targetLocator.hover({ timeout: constants_1.LOCATOR_ACTION_TIMEOUT }); + await targetLocator.hover({ timeout: constants_1.LOCATOR_ACTION_TIMEOUT }); + await this.page.mouse.up(); + return { + success: true, + }; + } + hoverSchema = zod_1.z.object({ + ref: zod_1.z.string().describe("The ref of the element to hover over"), + elementDescription: zod_1.z + .string() + .describe("A description of the element to hover, used for debugging"), + reasoning: zod_1.z.string().describe("A quick one-line reasoning behind this action"), + doesActionAdvanceUsTowardsGoal: zod_1.z + .boolean() + .describe('"true" indicates high confidence that this action will advance us towards the goal. "false" indicates low confidence and could be an AI hallucination.'), + }); + async hover({ ref, elementDescription }) { + const locator = this.page.locator(`aria-ref=${ref}`).describe(elementDescription); + let cachedLocator = ""; + if (this.currentStep) { + cachedLocator = await this.resolveLocator(locator); + } + await locator.hover({ timeout: constants_1.LOCATOR_ACTION_TIMEOUT }); + this.prepareCacheData(cachedLocator, "hover", elementDescription); + return { + success: true, + }; + } + uploadFileSchema = zod_1.z.object({ + ref: zod_1.z.string().describe('The ref of the "button" that triggers a FileChooser to upload files'), + elementDescription: zod_1.z.string().describe("A description of the element, used for debugging"), + filePaths: zod_1.z.array(zod_1.z.string()).describe("Array of absolute file paths to upload"), + reasoning: zod_1.z.string().describe("A quick one-line reasoning behind this action"), + doesActionAdvanceUsTowardsGoal: zod_1.z + .boolean() + .describe('"true" indicates high confidence that this action will advance us towards the goal. "false" indicates low confidence and could be an AI hallucination.'), + }); + async uploadFile({ ref, elementDescription, filePaths, // This is not a full path. It accepts a string filename which should be available in `uploads` directory + }) { + const locator = this.page.locator(`aria-ref=${ref}`).describe(elementDescription); + // We expect to find these files in the `./uploads` directory if no base path is configured + const uploadBasePath = (0, config_1.getConfig)().uploadBasePath || "./uploads"; + const prefixedFilePaths = filePaths.map((filePath) => `${uploadBasePath}/${filePath}`); + // File uploads are not cached for now as it needs a two step process + // We can solve this later by introducing multi-action caching if needed + const fileChooserPromise = this.page.waitForEvent("filechooser"); + await locator.click({ timeout: constants_1.LOCATOR_ACTION_TIMEOUT }); + const fileChooser = await fileChooserPromise; + await fileChooser.setFiles(prefixedFilePaths, { + timeout: constants_1.LOCATOR_ACTION_TIMEOUT, + }); + return { + success: true, + prefixedFilePaths, + }; + } + triggerBlurSchema = zod_1.z.object({ + reasoning: zod_1.z.string().describe("A quick one-line reasoning behind this action"), + doesActionAdvanceUsTowardsGoal: zod_1.z + .boolean() + .describe('"true" indicates high confidence that this action will advance us towards the goal. "false" indicates low confidence and could be an AI hallucination.'), + }); + async triggerBlur(_args) { + await this.page.locator("body").click({ position: { x: 0, y: 0 } }); + return { + success: true, + }; + } + getUniqueValueSchema = zod_1.z.object({ + prefix: zod_1.z.string().describe('The prefix to prepend to the unique id, e.g. "Topic", "Username"'), + }); + async getUniqueValue({ prefix }) { + const uniqueValue = `${prefix} ${shortid_1.default.generate()}`; + return { success: true, value: uniqueValue }; + } + async resolveLocator(locator) { + let generatedLocator = ""; + try { + generatedLocator = (await locator.normalize()).toString(); + } + catch (e) { + logger_1.logger.error({ err: e }, "Error generating locator"); + } + return generatedLocator; + } + /** + * Prepares cache data for a step action. Stores it on the instance + * instead of writing to Redis directly. The caller (runSteps in index.ts) + * decides whether to persist based on a logic (for now the number of tool calls). + */ + prepareCacheData(cachedLocator, action, elementDescription, value) { + if (!this.currentStep) { + return; + } + const ACTIONS_THAT_REQUIRE_NO_LOCATOR = ["waitForText"]; + // Skip caching if no locator is provided, unless it's an action that doesn't require a locator + if (!cachedLocator && ACTIONS_THAT_REQUIRE_NO_LOCATOR.indexOf(action) === -1) { + return; + } + /** + * If the current step's data contains values that are also present in the generated locator, it's likely that the locator is overfitted to those specific values and may not be reusable in future runs. + * In such cases, we should avoid caching to prevent storing non-reusable locators. + */ + let isCacheable = true; + if (this.currentStep.data && cachedLocator) { + for (const key in this.currentStep.data) { + const dataValue = this.currentStep.data[key]; + if (cachedLocator.includes(dataValue)) { + isCacheable = false; + break; + } + } + } + if (isCacheable) { + const cacheData = { + action, + description: elementDescription, + }; + if (cachedLocator) { + cacheData.locator = cachedLocator; + } + if (value) { + cacheData.value = value; + } + this.pendingCacheData = cacheData; + } + } +} diff --git a/dist/types.d.ts b/dist/types.d.ts new file mode 100644 index 0000000..c7c7a73 --- /dev/null +++ b/dist/types.d.ts @@ -0,0 +1,109 @@ +import { LanguageModel } from "ai"; +import { Expect, type Page, PlaywrightTestArgs, PlaywrightTestOptions, PlaywrightWorkerArgs, PlaywrightWorkerOptions, TestType } from "@playwright/test"; +import type { TabManager } from "./utils/tab-manager"; +export type PageInput = Page | TabManager; +export type AssertionResult = { + assertionPassed: boolean; + confidenceScore: number; + reasoning: string; +}; +export type UserFlowOptions = { + page: Page; + userFlow: string; + steps: string; + assertion?: string; + effort?: "low" | "high"; + thinkingBudget?: number; + auth?: { + email: string; + password: string; + }; + model?: LanguageModel; +}; +/** + * Configuration for extracting data from a page using AI. + * The extracted value will be stored as {{run.keyName}} and can be used in subsequent steps. + */ +export type ExtractionConfig = { + /** Key name - the extracted value will be accessible as {{run.keyName}} in subsequent steps' data.value */ + as: string; + /** Prompt describing what to extract from the page/URL */ + prompt: string; +}; +export type Step = { + bypassCache?: boolean; + description: string; + data?: Record; + waitUntil?: string; + isScript?: boolean; + script?: string; + moduleId?: string; + /** Extract data from page/URL using AI and store as {{run.as}} for later use */ + extract?: ExtractionConfig; + /** Switch the active page before this step runs. 'main' = original tab, 'latest' = most recently opened, or numeric index. */ + switchToTab?: "main" | "latest" | number; +}; +export type AssertionOptions = { + page: PageInput; + assertion: string; + failSilently?: boolean; + test?: TestType; + expect: Expect<{}>; + effort?: "low" | "high"; + images?: string[]; + maxRetries?: number; + onRetry?: (retryCount: number, previousResult: AssertionResult) => void; +}; +export type WaitConditionResult = { + conditionMet: boolean; + reasoning: string; +}; +export type WaitForConditionOptions = { + page: PageInput; + condition: string; + pageScreenshotBeforeApplyingAction: string; + previousSteps?: Step[]; + currentStep: Step; + nextStep?: Step; + initialInterval?: number; + timeout?: number; + maxInterval?: number; +}; +export type RunStepsOptions = { + projectId?: string; + page: Page; + test?: TestType; + userFlow: string; + steps: Step[]; + bypassCache?: boolean; + failAssertionsSilently?: boolean; + auth?: { + email: string; + password: string; + }; + onStepStart?: (step: { + id: string; + description: string; + }) => void; + onStepEnd?: (step: { + id: string; + description: string; + }) => void; + onReasoning?: (step: { + id: string; + reasoning: string; + }) => void; + /** + * Execution ID to link multiple runSteps calls together. + * When provided, {{global.*}} placeholders are persisted to the cache + * and shared across all runSteps calls with the same executionId. + * Required when using {{global.*}} placeholders. + */ + executionId?: string; +} & ({ + assertions: Omit[]; + expect: Expect<{}>; +} | { + assertions?: never; + expect?: never; +}); diff --git a/dist/types.js b/dist/types.js new file mode 100644 index 0000000..c8ad2e5 --- /dev/null +++ b/dist/types.js @@ -0,0 +1,2 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); diff --git a/dist/utils/index.d.ts b/dist/utils/index.d.ts new file mode 100644 index 0000000..ffe9cc6 --- /dev/null +++ b/dist/utils/index.d.ts @@ -0,0 +1,67 @@ +import { type Page, PlaywrightTestArgs, PlaywrightTestOptions, PlaywrightWorkerArgs, PlaywrightWorkerOptions, TestType } from "@playwright/test"; +import { PageInput, WaitConditionResult, WaitForConditionOptions } from "../types"; +/** + * Resolves a `Page | TabManager` to the currently-active Playwright Page. + * Call this every time you need the page, so tab-switches mid-operation + * (e.g. during a polling wait) are reflected on the very next access. + */ +export declare const resolvePage: (input: PageInput) => Page; +export declare const withTimeout: (promise: Promise, ms: number, enabled?: boolean) => Promise; +export declare const safeSnapshot: (input: PageInput, timeout?: number) => Promise; +/** Deterministic short hash for Redis keys */ +export declare function flowKey(flow: string, { prefix, length, // 16 base64url chars ≈ 96 bits +secret, }?: { + prefix?: string; + length?: number; + secret?: string; +}): string; +export declare function runLocatorCode(input: PageInput, code: string): Promise; +/** + * Waits for the DOM to stabilize by observing mutations. + * Resolves when no mutations have occurred for the specified idle time. + * @param page The Playwright page instance + * @param idleTime Time in ms to wait after last mutation before considering DOM stable (default: 500ms) + * @param timeout Maximum time to wait for stabilization (default: 5000ms) + */ +export declare function waitForDOMStabilization(input: PageInput, test?: TestType, idleTime?: number, timeout?: number): Promise; +/** + * Waits for a condition to be met by polling AI with screenshots. + * Uses gemini-2.5-flash to evaluate the condition. + * Uses exponential backoff to reduce checks during long UI processes. + * + * @param options - Configuration options for waiting + * @param options.page - The Playwright page instance + * @param options.condition - The condition string to wait for + * @param options.previousSteps - Array of previous step descriptions for context + * @param options.currentStep - The current step being executed + * @param options.nextStep - The next step to be executed (for context) + * @param options.initialInterval - Initial interval between polls in ms (default: 1000) + * @param options.maxInterval - Maximum interval between polls in ms (default: 10000) + * @param options.timeout - Maximum time to wait in ms (default: 30000) + * @returns Promise with the final condition result + * + * @example + * ```typescript + * const result = await waitForCondition({ + * page, + * condition: 'The loading spinner should disappear', + * previousSteps: ['Navigate to dashboard', 'Click refresh button'], + * currentStep: 'Wait for data to load', + * nextStep: 'Verify data is displayed', + * initialInterval: 1000, + * maxInterval: 8000, + * }); + * ``` + */ +export declare function waitForCondition({ page, condition, pageScreenshotBeforeApplyingAction, previousSteps, currentStep, nextStep, initialInterval, maxInterval, timeout, }: WaitForConditionOptions): Promise; +/** + * Verifies if an action had an observable effect by comparing accessibility snapshots. + * Returns true if the action likely succeeded, false if it appears to have silently failed. + */ +export declare function verifyActionEffect(input: PageInput, action: string, snapshotBefore: string): Promise<{ + success: boolean; +}>; +/** + * Generates a random unique 10-digit phone number. + */ +export declare function generatePhoneNumber(): string; diff --git a/dist/utils/index.js b/dist/utils/index.js new file mode 100644 index 0000000..2dd8a61 --- /dev/null +++ b/dist/utils/index.js @@ -0,0 +1,298 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.safeSnapshot = exports.withTimeout = exports.resolvePage = void 0; +exports.flowKey = flowKey; +exports.runLocatorCode = runLocatorCode; +exports.waitForDOMStabilization = waitForDOMStabilization; +exports.waitForCondition = waitForCondition; +exports.verifyActionEffect = verifyActionEffect; +exports.generatePhoneNumber = generatePhoneNumber; +const ai_1 = require("ai"); +const crypto_1 = require("crypto"); +const zod_1 = require("zod"); +const config_1 = require("../config"); +const logger_1 = require("../logger"); +const models_1 = require("../models"); +/** + * Resolves a `Page | TabManager` to the currently-active Playwright Page. + * Call this every time you need the page, so tab-switches mid-operation + * (e.g. during a polling wait) are reflected on the very next access. + */ +const resolvePage = (input) => typeof input.active === "function" + ? input.active() + : input; +exports.resolvePage = resolvePage; +const constants_1 = require("../constants"); +const withTimeout = (promise, ms, enabled = true) => { + if (!enabled) { + return promise; + } + return new Promise((resolve, reject) => { + const timeoutId = setTimeout(() => { + reject(new Error(`Promise timed out after ${ms} ms`)); + }, ms); + promise.then((res) => { + clearTimeout(timeoutId); + resolve(res); + }, (err) => { + clearTimeout(timeoutId); + reject(err); + }); + }); +}; +exports.withTimeout = withTimeout; +const safeSnapshot = async (input, timeout = constants_1.SNAPSHOT_TIMEOUT) => { + const attempt = async () => { + return await (0, exports.resolvePage)(input).ariaSnapshot({ mode: "ai", timeout }); + }; + try { + const snapshot = await attempt(); + return snapshot; + } + catch (err) { + if (err instanceof Error && err.message === "timeout") { + logger_1.logger.debug("Snapshot timed out, retrying once..."); + // retry once + return await attempt(); + } + throw err; + } +}; +exports.safeSnapshot = safeSnapshot; +/** Deterministic short hash for Redis keys */ +function flowKey(flow, { prefix = "flow", length = 16, // 16 base64url chars ≈ 96 bits +secret, // optional HMAC secret to avoid leaking the flow + } = {}) { + const h = secret + ? (0, crypto_1.createHash)("sha256").update(secret).update("\x00").update(flow).digest() + : (0, crypto_1.createHash)("sha256").update(flow).digest(); + // base64url without padding + const b64url = h.toString("base64").replace(/\+/g, "-").replace(/\//g, "_").replace(/=+$/, ""); + const short = b64url.slice(0, length); + return `${prefix}:${short}`; +} +async function runLocatorCode(input, code) { + const fn = new Function("page", ` + return (async () => { + ${code} + })(); + `); + return fn((0, exports.resolvePage)(input)); +} +/** + * Waits for the DOM to stabilize by observing mutations. + * Resolves when no mutations have occurred for the specified idle time. + * @param page The Playwright page instance + * @param idleTime Time in ms to wait after last mutation before considering DOM stable (default: 500ms) + * @param timeout Maximum time to wait for stabilization (default: 5000ms) + */ +async function waitForDOMStabilization(input, test, idleTime = constants_1.DOM_STABILIZATION_IDLE, timeout = constants_1.DOM_STABILIZATION_TIMEOUT) { + const _waitForStabilization = async () => { + try { + await (0, exports.resolvePage)(input).evaluate(({ idleTime, timeout }) => { + return new Promise((resolve) => { + let timeoutId; + // eslint-disable-next-line prefer-const + let overallTimeoutId; + // @ts-expect-error MutationObserver exists in browser context via page.evaluate + const observer = new MutationObserver(() => { + clearTimeout(timeoutId); + timeoutId = setTimeout(() => { + observer.disconnect(); + clearTimeout(overallTimeoutId); + resolve(); + }, idleTime); + }); + // @ts-expect-error document.body exists in browser context via page.evaluate + observer.observe(document.body, { + childList: true, + subtree: true, + attributes: true, + characterData: true, + }); + // Start the idle timer immediately in case no mutations occur + timeoutId = setTimeout(() => { + observer.disconnect(); + clearTimeout(overallTimeoutId); + resolve(); + }, idleTime); + // Overall timeout to prevent hanging indefinitely + overallTimeoutId = setTimeout(() => { + observer.disconnect(); + clearTimeout(timeoutId); + resolve(); + }, timeout); + }); + }, { idleTime, timeout }); + } + catch (error) { + // If execution context was destroyed due to navigation, wait for load state + if ((error instanceof Error && error.message?.includes("Execution context was destroyed")) || + (error instanceof Error && error.message?.includes("navigation"))) { + // Navigation occurred - wait for the page to be ready + await (0, exports.resolvePage)(input).waitForLoadState("domcontentloaded").catch(() => { }); + return; + } + // Re-throw other errors + throw error; + } + }; + if (test) { + await test.step("Waiting for DOM stabilization", async () => { + await _waitForStabilization(); + }); + } + else { + await _waitForStabilization(); + } +} +const waitConditionSchema = zod_1.z.object({ + conditionMet: zod_1.z.boolean().describe("Indicates whether the wait condition has been met."), + reasoning: zod_1.z + .string() + .describe("Brief explanation of why the condition is met or not met based on the current page state."), +}); +/** + * Waits for a condition to be met by polling AI with screenshots. + * Uses gemini-2.5-flash to evaluate the condition. + * Uses exponential backoff to reduce checks during long UI processes. + * + * @param options - Configuration options for waiting + * @param options.page - The Playwright page instance + * @param options.condition - The condition string to wait for + * @param options.previousSteps - Array of previous step descriptions for context + * @param options.currentStep - The current step being executed + * @param options.nextStep - The next step to be executed (for context) + * @param options.initialInterval - Initial interval between polls in ms (default: 1000) + * @param options.maxInterval - Maximum interval between polls in ms (default: 10000) + * @param options.timeout - Maximum time to wait in ms (default: 30000) + * @returns Promise with the final condition result + * + * @example + * ```typescript + * const result = await waitForCondition({ + * page, + * condition: 'The loading spinner should disappear', + * previousSteps: ['Navigate to dashboard', 'Click refresh button'], + * currentStep: 'Wait for data to load', + * nextStep: 'Verify data is displayed', + * initialInterval: 1000, + * maxInterval: 8000, + * }); + * ``` + */ +async function waitForCondition({ page, condition, pageScreenshotBeforeApplyingAction, previousSteps = [], currentStep, nextStep, initialInterval = constants_1.WAIT_CONDITION_INITIAL_INTERVAL, maxInterval = constants_1.WAIT_CONDITION_MAX_INTERVAL, timeout = constants_1.WAIT_CONDITION_TIMEOUT, }) { + await waitForDOMStabilization(page); // Ensure DOM is stable before starting + const startTime = Date.now(); + let currentInterval = initialInterval; + const checkCondition = async () => { + const pageScreenshotAfterApplyingAction = (await (0, exports.resolvePage)(page).screenshot({ fullPage: false })).toString("base64"); + const prompt = ` +You are an AI-powered QA Agent designed to test web applications. + +You are helping to determine if a wait condition has been met during a test flow. + + +${previousSteps.length > 0 + ? `Previous steps completed:\n${previousSteps + .map((s, i) => `${i + 1}. ${s.description}\n${s.data ? ` Data: ${JSON.stringify(s.data)}` : ""}`) + .join("\n")}` + : "No previous steps."} + +Last executed step: ${currentStep.description} +${nextStep ? `Next step: ${nextStep.description}` : ""} + +Attached are before and after screenshots of the page surrounding the last executed step. Image 1 is before executing the step, and Image 2 is after executing the step. + + + +${condition} + + + +- Assume last executed step has been performed on the page. +- Examine the screenshot carefully to determine if the wait condition has been met. +- Consider the context of the previous steps and last executed step when evaluating. +- The condition should be evaluated based on what is visually present on the page. +- Be practical - if the core condition appears to be satisfied, mark it as met. +- Don't be overly strict about exact text matching; focus on the intent of the condition. + + + +- \`conditionMet\`: A boolean indicating whether the wait condition has been met. +- \`reasoning\`: A brief string explaining why the condition is or is not met. + + +Analyze the attached before and after screenshots and determine if the wait condition has been met. +`; + const { output } = await (0, ai_1.generateText)({ + model: (0, models_1.resolveModel)((0, config_1.getModelId)("utility")), + temperature: 0, + messages: [ + { + role: "user", + content: [ + { type: "text", text: prompt }, + { type: "image", image: pageScreenshotBeforeApplyingAction }, + { type: "image", image: pageScreenshotAfterApplyingAction }, + ], + }, + ], + output: ai_1.Output.object({ schema: waitConditionSchema }), + }); + return output; + }; + while (Date.now() - startTime < timeout) { + try { + const result = await checkCondition(); + if (result.conditionMet) { + logger_1.logger.info(`Condition met: ${result.reasoning}`); + return result; + } + logger_1.logger.debug(`Condition not met yet: ${result.reasoning}. Retrying in ${currentInterval}ms...`); + // Wait before next poll + await new Promise((resolve) => setTimeout(resolve, currentInterval)); + // Exponential backoff: double the interval, capped at maxInterval + currentInterval = Math.min(currentInterval * 2, maxInterval); + } + catch (error) { + logger_1.logger.error({ err: error }, "Error checking condition"); + // Wait before retry on error + await new Promise((resolve) => setTimeout(resolve, currentInterval)); + currentInterval = Math.min(currentInterval * 2, maxInterval); + } + } + // Timeout reached, do one final check + const finalResult = await checkCondition(); + if (!finalResult.conditionMet) { + logger_1.logger.warn(`Wait condition timed out after ${timeout}ms: ${finalResult.reasoning}`); + } + return finalResult; +} +/** + * Verifies if an action had an observable effect by comparing accessibility snapshots. + * Returns true if the action likely succeeded, false if it appears to have silently failed. + */ +async function verifyActionEffect(input, action, snapshotBefore) { + await waitForDOMStabilization(input); // Ensure DOM is stable before taking snapshot + // Actions that don't necessarily cause visible changes + if (action === "hover" || action === "waitForText") { + return { success: true }; + } + const snapshotAfter = await (0, exports.safeSnapshot)(input); + // If snapshots are identical, the action likely had no effect + if (snapshotBefore.trim() === snapshotAfter.trim()) { + throw new Error(`Action "${action}" appears to have had no effect on the page.`); + } + return { success: true }; +} +/** + * Generates a random unique 10-digit phone number. + */ +function generatePhoneNumber() { + // First digit should be 1-9 to avoid leading zero + const firstDigit = Math.floor(Math.random() * 9) + 1; + // Remaining 9 digits can be 0-9 + const remainingDigits = Array.from({ length: 9 }, () => Math.floor(Math.random() * 10)).join(""); + return `${firstDigit}${remainingDigits}`; +} diff --git a/dist/utils/playwright-best-practices.d.ts b/dist/utils/playwright-best-practices.d.ts new file mode 100644 index 0000000..957a6b5 --- /dev/null +++ b/dist/utils/playwright-best-practices.d.ts @@ -0,0 +1,2 @@ +declare const PLAYWRIGHT_BEST_PRACTICES = "\n# Playwright Guidelines\n\n## 1. **Start waiting for API responses before triggering actions or use Promise.all()**\n\nStart listening for the expected network response **before** performing actions like button clicks to avoid race conditions.\n\n**\u2705 Do:**\n\n```jsx\nconst [response] = await Promise.all([\n page.waitForResponse(\"api/submit\"),\n page.getByRole('button', { name: 'Submit' }).click(),\n]);\n\nexpect(response.status()).toBe(200);\n// next steps\n```\n\n**\u2705 Do:**\n\n```jsx\nconst response = page.waitForResponse(\"api/submit\"); // notice no await here, start listening first\nawait page.getByRole('button', { name: 'Submit' }).click(); // then perform the action\nawait response; // wait for the response to complete\n```\n\nBoth of the above approaches are valid and good practice. But the following is a bad practice:\n\n**\uD83D\uDEAB Don\u2019t:**\n\n```jsx\nawait page.getByRole('button', { name: 'Submit' }).click();\nawait page.waitForResponse(\"api/submit\"); // Too late \u2014 response might have already returned\n```\n\n---\n\n## 2. **Use `test.slow()` or `test.setTimeout()` for longer tests**\n\nFor longer or more complex flows, use `test.slow()` or `test.setTimeout()` to increase timeout without causing unnecessary failures in CI.\n\n```jsx\ntest('generates invoice after third-party sync', async ({ page }) => {\n test.slow(); // Extends (3x) timeout for this test\n\n await page.click('button:has-text(\"Sync with Xero\")');\n await expect(page.getByText('Invoice generated')).toBeVisible();\n});\n\n```\n\n---\n\n## 3. Include cleanup logic using `afterAll`\n\nUse `afterAll` to clean up any test data created during the test suite. This keeps the environment clean and prevents leftover test artifacts. It also serves as a test for delete functionality.\n\n**\u2705 Example:**\n\n```jsx\nlet userId: string;\n\ntest('creates a user', async ({ page }) => {});\n\nafterAll(async ({ request }) => {\n if (userId) {\n const res = await request.delete(`/api/users/${userId}`);\n expect(res.ok()).toBeTruthy(); // Optional assertion\n }\n});\n```\n\n**\uD83D\uDD0E Why this matters:**\n\n- Ensures test-created entities are removed after execution\n- Keeps test environments clean for future runs\n- Helps catch issues in delete endpoints as well\n\n---\n\n## 4. **Use `pressSequentially` for more human-like typing**\n\nInstead of using `fill()`, consider using `pressSequentially()` when simulating text input, especially if you're recording or showcasing tests, it mimics a real user typing, making playback more natural.\n\n**\u2705 Do:**\n\n```jsx\nawait page.locator('#email').pressSequentially('test@example.com');\n```\n\n**\uD83D\uDEAB Don\u2019t:**\n\n```jsx\nawait page.fill('#email', 'test@example.com'); // Instant fill, less realistic in playback\n```\n\n---\n\n## 6. Never use magic timeouts\n\nTimeouts like `await page.waitForTimeout()` are a big reason behind flaky tests. Do not use them unless there is a very good reason behind it. Try to use other hooks like `waitFor()` or `waitForResponse()` to smartly wait for elements to appear / disappear or API calls to complete. In general, avoid `page.waitForTimeout()`.\n\n## 7. Never use page.waitForLoadState('networkidle')\n\nUsing `page.waitForLoadState('networkidle')` is an anti-pattern that can lead to flaky tests. Instead, use more specific waits like `waitForResponse()` or `waitForSelector()` to ensure the necessary conditions are met before proceeding.\n"; +export default PLAYWRIGHT_BEST_PRACTICES; diff --git a/dist/utils/playwright-best-practices.js b/dist/utils/playwright-best-practices.js new file mode 100644 index 0000000..ec94a22 --- /dev/null +++ b/dist/utils/playwright-best-practices.js @@ -0,0 +1,110 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +const PLAYWRIGHT_BEST_PRACTICES = ` +# Playwright Guidelines + +## 1. **Start waiting for API responses before triggering actions or use Promise.all()** + +Start listening for the expected network response **before** performing actions like button clicks to avoid race conditions. + +**✅ Do:** + +\`\`\`jsx +const [response] = await Promise.all([ + page.waitForResponse("api/submit"), + page.getByRole('button', { name: 'Submit' }).click(), +]); + +expect(response.status()).toBe(200); +// next steps +\`\`\` + +**✅ Do:** + +\`\`\`jsx +const response = page.waitForResponse("api/submit"); // notice no await here, start listening first +await page.getByRole('button', { name: 'Submit' }).click(); // then perform the action +await response; // wait for the response to complete +\`\`\` + +Both of the above approaches are valid and good practice. But the following is a bad practice: + +**🚫 Don’t:** + +\`\`\`jsx +await page.getByRole('button', { name: 'Submit' }).click(); +await page.waitForResponse("api/submit"); // Too late — response might have already returned +\`\`\` + +--- + +## 2. **Use \`test.slow()\` or \`test.setTimeout()\` for longer tests** + +For longer or more complex flows, use \`test.slow()\` or \`test.setTimeout()\` to increase timeout without causing unnecessary failures in CI. + +\`\`\`jsx +test('generates invoice after third-party sync', async ({ page }) => { + test.slow(); // Extends (3x) timeout for this test + + await page.click('button:has-text("Sync with Xero")'); + await expect(page.getByText('Invoice generated')).toBeVisible(); +}); + +\`\`\` + +--- + +## 3. Include cleanup logic using \`afterAll\` + +Use \`afterAll\` to clean up any test data created during the test suite. This keeps the environment clean and prevents leftover test artifacts. It also serves as a test for delete functionality. + +**✅ Example:** + +\`\`\`jsx +let userId: string; + +test('creates a user', async ({ page }) => {}); + +afterAll(async ({ request }) => { + if (userId) { + const res = await request.delete(\`/api/users/\${userId}\`); + expect(res.ok()).toBeTruthy(); // Optional assertion + } +}); +\`\`\` + +**🔎 Why this matters:** + +- Ensures test-created entities are removed after execution +- Keeps test environments clean for future runs +- Helps catch issues in delete endpoints as well + +--- + +## 4. **Use \`pressSequentially\` for more human-like typing** + +Instead of using \`fill()\`, consider using \`pressSequentially()\` when simulating text input, especially if you're recording or showcasing tests, it mimics a real user typing, making playback more natural. + +**✅ Do:** + +\`\`\`jsx +await page.locator('#email').pressSequentially('test@example.com'); +\`\`\` + +**🚫 Don’t:** + +\`\`\`jsx +await page.fill('#email', 'test@example.com'); // Instant fill, less realistic in playback +\`\`\` + +--- + +## 6. Never use magic timeouts + +Timeouts like \`await page.waitForTimeout()\` are a big reason behind flaky tests. Do not use them unless there is a very good reason behind it. Try to use other hooks like \`waitFor()\` or \`waitForResponse()\` to smartly wait for elements to appear / disappear or API calls to complete. In general, avoid \`page.waitForTimeout()\`. + +## 7. Never use page.waitForLoadState('networkidle') + +Using \`page.waitForLoadState('networkidle')\` is an anti-pattern that can lead to flaky tests. Instead, use more specific waits like \`waitForResponse()\` or \`waitForSelector()\` to ensure the necessary conditions are met before proceeding. +`; +exports.default = PLAYWRIGHT_BEST_PRACTICES; diff --git a/dist/utils/secure-script-runner.d.ts b/dist/utils/secure-script-runner.d.ts new file mode 100644 index 0000000..fa98379 --- /dev/null +++ b/dist/utils/secure-script-runner.d.ts @@ -0,0 +1,40 @@ +import type { Expect } from "@playwright/test"; +import type { PageInput } from "../types"; +export interface RunSecureScriptOptions { + page: PageInput; + script: string; + localValues?: Record; + globalValues?: Record; + expect?: Expect<{}>; +} +/** + * Safely execute a user-supplied Playwright script. + * + * The script is parsed as an AST and validated to only contain allowed + * Playwright method chains. User code is NEVER evaluated directly. + * + * @example + * await runSecureScript({ + * page, + * script: 'page.getByRole("button", { name: "Save" }).click()', + * }); + * + * @example + * // Multi-line scripts (each line is executed in order) + * await runSecureScript({ + * page, + * script: ` + * page.getByLabel("Email").fill("test@example.com") + * page.getByLabel("Password").fill("password123") + * page.getByRole("button", { name: "Submit" }).click() + * `, + * }); + */ +export declare function runSecureScript({ page: pageInput, script, localValues, globalValues, expect: expectFn, }: RunSecureScriptOptions): Promise; +/** + * Validate a script without executing it. + * Useful for pre-validation before saving scripts. + * + * @returns true if valid, throws Error if invalid + */ +export declare function validateScript(script: string): boolean; diff --git a/dist/utils/secure-script-runner.js b/dist/utils/secure-script-runner.js new file mode 100644 index 0000000..9fbe3d5 --- /dev/null +++ b/dist/utils/secure-script-runner.js @@ -0,0 +1,1830 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.runSecureScript = runSecureScript; +exports.validateScript = validateScript; +const acorn_1 = require("acorn"); +const promises_1 = require("node:dns/promises"); +const logger_1 = require("../logger"); +const index_1 = require("./index"); +// ============================================================================= +// ALLOWED METHODS CONFIGURATION +// ============================================================================= +/** + * Methods that can be called directly on `page` to start a locator chain. + */ +const ALLOWED_START_METHODS = new Set([ + // Locator methods + "locator", + "getByRole", + "getByText", + "getByLabel", + "getByPlaceholder", + "getByTestId", + "getByAltText", + "getByTitle", + // Frame methods + "frameLocator", +]); +/** + * Assertion methods that can be called on expect(locator). + */ +const ALLOWED_EXPECT_ASSERTION_METHODS = new Set([ + // Text assertions + "toContainText", + "toHaveText", + // Visibility assertions + "toBeVisible", + "toBeHidden", + // State assertions + "toBeEnabled", + "toBeDisabled", + "toBeChecked", + "toBeEditable", + "toBeEmpty", + "toBeFocused", + // Attribute assertions + "toHaveAttribute", + "toHaveClass", + "toHaveCSS", + "toHaveId", + // Value assertions + "toHaveValue", + "toHaveValues", + // Count assertions + "toHaveCount", + // Screenshot assertions + "toHaveScreenshot", + // Attached assertions + "toBeAttached", + // Role assertions + "toHaveRole", + // Accessible name/description + "toHaveAccessibleName", + "toHaveAccessibleDescription", + // Generic assertions + "toBe", + "toEqual", + "toBeTruthy", + "toBeFalsy", + "toBeNull", + "toBeUndefined", + "toBeDefined", + "toBeNaN", + "toContain", + "toMatch", + "toHaveLength", +]); +/** + * Methods that can be chained on a locator to refine selection. + * Note: `and` and `or` are excluded because they require locator arguments, + * which cannot be created as literals in the current implementation. + */ +const ALLOWED_LOCATOR_CHAIN_METHODS = new Set([ + "first", + "last", + "nth", + "filter", // Note: `has`/`hasNot` options won't work (require locators), but `hasText`/`hasNotText` work + "locator", + "getByRole", + "getByText", + "getByLabel", + "getByPlaceholder", + "getByTestId", + "getByAltText", + "getByTitle", +]); +/** + * Action methods that perform interactions (must be last in chain). + * Note: `dragTo` is excluded because it requires a locator argument, + * which cannot be created as a literal in the current implementation. + */ +const ALLOWED_ACTION_METHODS = new Set([ + "click", + "dblclick", + "fill", + "type", + "press", + "check", + "uncheck", + "hover", + "focus", + "blur", + "selectOption", + "clear", + "scrollIntoViewIfNeeded", + "waitFor", + "isVisible", + "isEnabled", + "isChecked", + "textContent", + "innerText", + "innerHTML", + "getAttribute", + "inputValue", + "count", +]); +/** + * Methods that can be called directly on `page` (not locator chains). + * These are page-level operations like navigation, waits, etc. + * Note: `waitForFunction` is excluded because it requires a function argument, + * which cannot be created as a literal in the current implementation. + */ +const ALLOWED_PAGE_METHODS = new Set([ + // Navigation + "goto", + "reload", + "goBack", + "goForward", + // Waits + "waitForLoadState", + "waitForURL", + "waitForTimeout", + "waitForSelector", + // Page state + "title", + "url", + "content", + // Screenshots + "screenshot", + // Other + "close", + "bringToFront", + "setViewportSize", +]); +/** + * Methods that can be called on page.keyboard + */ +const ALLOWED_KEYBOARD_METHODS = new Set(["press", "type", "down", "up", "insertText"]); +/** + * Methods that can be called on page.mouse + */ +const ALLOWED_MOUSE_METHODS = new Set(["click", "dblclick", "down", "up", "move", "wheel"]); +/** + * Methods that can be called on context (BrowserContext). + * Accessed via page.context() internally. + */ +const ALLOWED_CONTEXT_METHODS = new Set([ + // Cookies + "cookies", + "addCookies", + "clearCookies", + // Storage + "storageState", + // Permissions + "clearPermissions", + // Geolocation + "setGeolocation", + // Other + "setOffline", + "waitForEvent", +]); +/** + * Methods that can be called on browser. + * Accessed via page.context().browser() internally. + */ +const ALLOWED_BROWSER_METHODS = new Set(["isConnected", "version"]); +/** + * Methods that can be called on console for logging. + */ +const ALLOWED_CONSOLE_METHODS = new Set(["log"]); +/** + * Methods that can be called on Response objects from fetch. + */ +const ALLOWED_RESPONSE_METHODS = new Set(["json", "text", "arrayBuffer", "blob"]); +/** + * HTTP methods allowed in fetch requests. + */ +const ALLOWED_FETCH_METHODS = new Set(["GET", "POST", "PUT", "PATCH", "DELETE", "HEAD", "OPTIONS"]); +/** + * Hosts that are blocked for fetch requests (localhost/loopback). + */ +const BLOCKED_HOSTS = new Set(["localhost", "127.0.0.1", "0.0.0.0", "[::1]", "::1"]); +/** + * Reserved variable names that cannot be used in user scripts. + */ +const RESERVED_VARIABLE_NAMES = new Set([ + "page", + "context", + "browser", + "console", + "expect", + "process", + "require", + "import", + "fetch", + "eval", + "Function", +]); +/** + * Constructors allowed in computed expressions. + * Only safe, side-effect-free constructors are permitted. + */ +// eslint-disable-next-line @typescript-eslint/no-explicit-any +const ALLOWED_CONSTRUCTORS = new Map([["URL", URL]]); +/** + * Properties/methods allowed per computed type. + * Maps constructor name → Set of allowed property/method names. + */ +const ALLOWED_COMPUTED_PROPERTIES = new Map([ + ["URL", new Set(["searchParams"])], + ["URLSearchParams", new Set(["toString"])], +]); +/** + * Binary operators allowed in computed expressions. + */ +const ALLOWED_BINARY_OPERATORS = new Set(["+"]); +/** + * Getter methods that should auto-log their return values. + * These are read-only methods that return data without side effects. + */ +const GETTER_METHODS = new Set([ + // Browser getters + "version", + "isConnected", + // Context getters + "cookies", + "storageState", + // Page getters + "title", + "url", + "content", + // Locator getters + "textContent", + "innerText", + "innerHTML", + "getAttribute", + "inputValue", + "count", + "isVisible", + "isEnabled", + "isChecked", +]); +// ============================================================================= +// ASSERTION HELPER +// ============================================================================= +function assert(condition, message) { + if (!condition) { + throw new Error(`[SecureScriptRunner] ${message}`); + } +} +// ============================================================================= +// URL VALIDATION FOR FETCH +// ============================================================================= +/** + * Check if a hostname is blocked (localhost, loopback, private IPs). + */ +function isBlockedHost(hostname) { + const lower = hostname.toLowerCase(); + if (BLOCKED_HOSTS.has(lower)) + return true; + // Block 127.x.x.x range + if (/^127\.\d{1,3}\.\d{1,3}\.\d{1,3}$/.test(lower)) + return true; + // Block private IP ranges + if (/^10\.\d{1,3}\.\d{1,3}\.\d{1,3}$/.test(lower)) + return true; + if (/^192\.168\.\d{1,3}\.\d{1,3}$/.test(lower)) + return true; + if (/^172\.(1[6-9]|2\d|3[0-1])\.\d{1,3}\.\d{1,3}$/.test(lower)) + return true; + return false; +} +/** + * Validate a URL for fetch requests. + * Only allows http/https and blocks localhost/private IPs. + */ +function validateFetchUrl(url) { + let parsed; + try { + parsed = new URL(url); + } + catch { + throw new Error(`[SecureScriptRunner] Invalid URL: "${url}"`); + } + // Only allow http and https + if (parsed.protocol !== "http:" && parsed.protocol !== "https:") { + throw new Error(`[SecureScriptRunner] URL must use http or https protocol, got: ${parsed.protocol}`); + } + // Block localhost and private IPs + if (isBlockedHost(parsed.hostname)) { + throw new Error(`[SecureScriptRunner] Blocked URL: cannot fetch from ${parsed.hostname}`); + } +} +/** + * Validate that a URL's hostname does not resolve to a blocked IP address. + * Prevents DNS rebinding attacks where a domain initially passes hostname + * validation but resolves to a private/loopback IP at connection time. + */ +async function validateFetchUrlResolution(url) { + const parsed = new URL(url); + const hostname = parsed.hostname.replace(/^\[|\]$/g, ""); // strip IPv6 brackets + // Skip if the hostname is already an IP literal (already checked by isBlockedHost) + if (/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/.test(hostname) || hostname.includes(":")) { + return; + } + try { + const { address } = await (0, promises_1.lookup)(hostname); + if (isBlockedHost(address)) { + throw new Error(`[SecureScriptRunner] DNS rebinding blocked: ${hostname} resolves to ${address}`); + } + } + catch (err) { + if (err instanceof Error && err.message.includes("DNS rebinding blocked")) { + throw err; + } + // DNS resolution failure — let fetch handle it naturally + } +} +/** + * Validate fetch options object. + */ +function validateFetchOptions(options) { + if (options === undefined || options === null) + return; + assert(typeof options === "object" && !Array.isArray(options), "fetch options must be an object"); + const opts = options; + // Validate method + if (opts.method !== undefined) { + assert(typeof opts.method === "string", "method must be a string"); + const method = opts.method.toUpperCase(); + assert(ALLOWED_FETCH_METHODS.has(method), `Invalid method: ${opts.method}. Allowed: ${[...ALLOWED_FETCH_METHODS].join(", ")}`); + } + // Validate headers + if (opts.headers !== undefined) { + assert(typeof opts.headers === "object" && !Array.isArray(opts.headers), "headers must be an object"); + for (const [key, value] of Object.entries(opts.headers)) { + assert(typeof value === "string", `header "${key}" value must be a string`); + } + } + // Validate body (string for JSON, or will auto-serialize objects) + if (opts.body !== undefined) { + assert(typeof opts.body === "string" || (typeof opts.body === "object" && opts.body !== null), "body must be a string or object"); + } +} +// ============================================================================= +// AST NODE TO STRING (for error messages) +// ============================================================================= +/** + * Convert an AST node to a readable string representation for error messages. + * This helps users understand what code caused the error. + */ +function nodeToString(node) { + switch (node.type) { + case "Identifier": + return node.name; + case "MemberExpression": { + const member = node; + const obj = nodeToString(member.object); + const prop = member.property.type === "Identifier" + ? member.property.name + : nodeToString(member.property); + return member.computed ? `${obj}[${prop}]` : `${obj}.${prop}`; + } + case "CallExpression": { + const call = node; + const callee = nodeToString(call.callee); + return `${callee}(...)`; + } + case "NewExpression": { + const newExpr = node; + const callee = nodeToString(newExpr.callee); + return `new ${callee}(...)`; + } + case "Literal": { + const literal = node; + if (literal.regex) { + return `/${literal.regex.pattern}/${literal.regex.flags}`; + } + return JSON.stringify(literal.value); + } + case "ArrayExpression": + return "[...]"; + case "ObjectExpression": + return "{...}"; + case "ArrowFunctionExpression": + case "FunctionExpression": + return "() => {...}"; + case "TemplateLiteral": + return "`...`"; + case "UnaryExpression": { + const unary = node; + return `${unary.operator}${nodeToString(unary.argument)}`; + } + case "BinaryExpression": { + const binary = node; + return `${nodeToString(binary.left)} ${binary.operator} ${nodeToString(binary.right)}`; + } + default: + return `[${node.type}]`; + } +} +// ============================================================================= +// SAFE LITERAL EVALUATION +// ============================================================================= +/** + * Only allow JSON-like literal values (string/number/boolean/null/regex/arrays/objects). + * No identifiers, function calls, template literals with expressions, etc. + */ +function evalSafeLiteral(node) { + switch (node.type) { + case "Literal": { + const literal = node; + // acorn uses Literal for string/number/bool/null and also RegExp in node.regex + if (literal.regex) { + return new RegExp(literal.regex.pattern, literal.regex.flags); + } + return literal.value; + } + case "ArrayExpression": { + const arr = node; + return arr.elements.map((el) => { + assert(el !== null, "Sparse arrays are not allowed"); + return evalSafeLiteral(el); + }); + } + case "ObjectExpression": { + const obj = node; + const out = {}; + for (const prop of obj.properties) { + assert(prop.type === "Property", "Only plain object properties allowed"); + assert(prop.kind === "init", "Only init properties allowed"); + assert(prop.computed === false, "Computed keys not allowed"); + let key = null; + if (prop.key.type === "Identifier") { + key = prop.key.name; + } + else if (prop.key.type === "Literal") { + const keyValue = prop.key.value; + if (typeof keyValue === "string") { + key = keyValue; + } + } + assert(typeof key === "string", "Object keys must be string or identifier"); + out[key] = evalSafeLiteral(prop.value); + } + return out; + } + case "UnaryExpression": { + // Handle negative numbers like -1 + const unary = node; + if (unary.operator === "-" && unary.argument.type === "Literal") { + const literal = unary.argument; + if (typeof literal.value === "number") { + return -literal.value; + } + } + throw new Error(`Unsupported unary expression: "${nodeToString(node)}". Only negative numbers like -1 are allowed.`); + } + case "TemplateLiteral": { + // Handle simple template literals without expressions (e.g., `hello`) + const template = node; + if (template.expressions.length > 0) { + throw new Error(`Template literals with expressions are not allowed: "${nodeToString(node)}". Use regular string literals instead.`); + } + // Concatenate all quasi values (for simple templates, there's just one) + return template.quasis.map((q) => q.value.cooked ?? q.value.raw).join(""); + } + default: + throw new Error(`Unsupported argument: "${nodeToString(node)}" (${node.type}). Only literals, arrays, and objects are allowed.`); + } +} +/** + * Check if a node is a literal value that can be safely evaluated. + * This includes: Literal, ArrayExpression, ObjectExpression, TemplateLiteral (without expressions), + * and UnaryExpression (for negative numbers like -1). + */ +function isLiteralNode(node) { + switch (node.type) { + case "Literal": + return true; + case "ArrayExpression": { + const arr = node; + return arr.elements.every((el) => el !== null && isLiteralNode(el)); + } + case "ObjectExpression": { + const obj = node; + return obj.properties.every((prop) => prop.type === "Property" && + prop.kind === "init" && + !prop.computed && + isLiteralNode(prop.value)); + } + case "TemplateLiteral": { + const template = node; + return template.expressions.length === 0; + } + case "UnaryExpression": { + const unary = node; + return unary.operator === "-" && unary.argument.type === "Literal"; + } + default: + return false; + } +} +/** + * Evaluate a node as a safe value, allowing variable references in addition to literals. + * Used for method arguments where variables should be allowed (e.g., page.goto(data.result)). + */ +function evalSafeValue(node, variables) { + if (variables) { + const chain = tryParseValueChain(node, variables); + if (chain) { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + let value = variables.get(chain.variableName); + for (const prop of chain.propertyPath) { + if (value === null || value === undefined) { + throw new Error(`Cannot read property "${prop}" of ${value} on variable "${chain.variableName}"`); + } + value = value[prop]; + } + return value; + } + // Try computed expressions (e.g., "prefix" + variable, new URL(...)) + const computed = parseSafeExpression(node, variables); + if (computed && computed.kind !== "literal") { + return evalSafeExpression(computed, variables); + } + } + return evalSafeLiteral(node); +} +// ============================================================================= +// COMPUTED EXPRESSION PARSING & EVALUATION +// ============================================================================= +const BLOCKED_PROPERTIES = new Set(["constructor", "__proto__", "prototype"]); +/** + * Parse an AST node into a safe computed expression tree. + * Returns null if the node doesn't match any computed expression pattern, + * allowing fallback to the existing parseAllowedChain. + */ +function parseSafeExpression(node, variables) { + // Literals + if (isLiteralNode(node)) { + return { kind: "literal", value: evalSafeLiteral(node) }; + } + // Variable references (identifiers and member expressions on known variables) + if (variables) { + const chain = tryParseValueChain(node, variables); + if (chain) { + return { + kind: "variableRef", + variableName: chain.variableName, + propertyPath: chain.propertyPath, + }; + } + } + // new Constructor(args) — e.g., new URL(expr) + if (node.type === "NewExpression") { + const newExpr = node; + if (newExpr.callee.type !== "Identifier") + return null; + const ctorName = newExpr.callee.name; + if (!ALLOWED_CONSTRUCTORS.has(ctorName)) + return null; + const args = []; + for (const arg of newExpr.arguments) { + const parsed = parseSafeExpression(arg, variables); + if (!parsed) + return null; + args.push(parsed); + } + return { kind: "newExpression", constructorName: ctorName, args }; + } + // Method call: expr.method(args) — e.g., searchParams.toString() + if (node.type === "CallExpression") { + const call = node; + if (call.callee.type === "MemberExpression") { + const member = call.callee; + if (member.computed) + return null; + if (member.property.type !== "Identifier") + return null; + const methodName = member.property.name; + if (BLOCKED_PROPERTIES.has(methodName)) + return null; + // If the object is a variable holding a Response, bail out so + // parseAllowedChain → executeChain handles it (which properly awaits + // async methods like .json() / .text()). + if (member.object.type === "Identifier" && + variables?.has(member.object.name) && + variables.get(member.object.name) instanceof Response) { + return null; + } + const objExpr = parseSafeExpression(member.object, variables); + if (!objExpr) + return null; + const args = []; + for (const arg of call.arguments) { + const parsed = parseSafeExpression(arg, variables); + if (!parsed) + return null; + args.push(parsed); + } + return { kind: "methodCall", object: objExpr, method: methodName, args }; + } + return null; + } + // Property access: expr.prop — e.g., url.searchParams + if (node.type === "MemberExpression") { + const member = node; + if (member.computed) + return null; + if (member.property.type !== "Identifier") + return null; + const propName = member.property.name; + if (BLOCKED_PROPERTIES.has(propName)) + return null; + const objExpr = parseSafeExpression(member.object, variables); + if (!objExpr) + return null; + return { kind: "propertyAccess", object: objExpr, property: propName }; + } + // Binary expression: left + right + if (node.type === "BinaryExpression") { + const bin = node; + if (!ALLOWED_BINARY_OPERATORS.has(bin.operator)) + return null; + const left = parseSafeExpression(bin.left, variables); + if (!left) + return null; + const right = parseSafeExpression(bin.right, variables); + if (!right) + return null; + return { kind: "binaryExpression", operator: bin.operator, left, right }; + } + return null; +} +/** + * Validate that a property/method access on a computed value is allowed. + * Enforces type-specific allowlists at runtime. + */ +function validateComputedAccess(obj, propOrMethod) { + if (BLOCKED_PROPERTIES.has(propOrMethod)) { + throw new Error(`[SecureScriptRunner] Access to "${propOrMethod}" is blocked on computed values`); + } + if (obj instanceof URL) { + const allowed = ALLOWED_COMPUTED_PROPERTIES.get("URL"); + if (!allowed || !allowed.has(propOrMethod)) { + throw new Error(`[SecureScriptRunner] Property/method "${propOrMethod}" is not allowed on URL objects. Allowed: ${[...(allowed ?? [])].join(", ")}`); + } + return; + } + if (obj instanceof URLSearchParams) { + const allowed = ALLOWED_COMPUTED_PROPERTIES.get("URLSearchParams"); + if (!allowed || !allowed.has(propOrMethod)) { + throw new Error(`[SecureScriptRunner] Property/method "${propOrMethod}" is not allowed on URLSearchParams objects. Allowed: ${[...(allowed ?? [])].join(", ")}`); + } + return; + } + // Allow property access on plain objects (e.g. JSON response data) + if (obj !== null && + obj !== undefined && + typeof obj === "object" && + Object.getPrototypeOf(obj) === Object.prototype) { + return; + } + throw new Error(`[SecureScriptRunner] Computed property/method access is not allowed on objects of this type`); +} +/** + * Evaluate a parsed safe expression tree at runtime. + */ +function evalSafeExpression(expr, variables) { + switch (expr.kind) { + case "literal": + return expr.value; + case "variableRef": { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + let value = variables.get(expr.variableName); + if (value === undefined && !variables.has(expr.variableName)) { + throw new Error(`[SecureScriptRunner] Variable "${expr.variableName}" is not defined`); + } + for (const prop of expr.propertyPath) { + if (value === null || value === undefined) { + throw new Error(`[SecureScriptRunner] Cannot read property "${prop}" of ${value} on variable "${expr.variableName}"`); + } + value = value[prop]; + } + return value; + } + case "newExpression": { + const Ctor = ALLOWED_CONSTRUCTORS.get(expr.constructorName); + if (!Ctor) { + throw new Error(`[SecureScriptRunner] Constructor "${expr.constructorName}" is not allowed`); + } + const args = expr.args.map((a) => evalSafeExpression(a, variables)); + return new Ctor(...args); + } + case "propertyAccess": { + const obj = evalSafeExpression(expr.object, variables); + validateComputedAccess(obj, expr.property); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + return obj[expr.property]; + } + case "methodCall": { + const obj = evalSafeExpression(expr.object, variables); + validateComputedAccess(obj, expr.method); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const fn = obj[expr.method]; + if (typeof fn !== "function") { + throw new Error(`[SecureScriptRunner] "${expr.method}" is not a function`); + } + const args = expr.args.map((a) => evalSafeExpression(a, variables)); + return fn.call(obj, ...args); + } + case "binaryExpression": { + const left = evalSafeExpression(expr.left, variables); + const right = evalSafeExpression(expr.right, variables); + if (expr.operator === "+") { + if (typeof left !== "string" && typeof right !== "string") { + throw new Error(`[SecureScriptRunner] The "+" operator requires at least one string operand`); + } + // eslint-disable-next-line @typescript-eslint/no-explicit-any + return left + right; + } + throw new Error(`[SecureScriptRunner] Operator "${expr.operator}" is not allowed`); + } + default: { + const _exhaustive = expr; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + throw new Error(`[SecureScriptRunner] Unknown expression kind: ${_exhaustive.kind}`); + } + } +} +// ============================================================================= +// CHAIN PARSING +// ============================================================================= +/** + * Parse a locator chain starting from page, e.g., "page.getByRole(...).first()" + * Returns the parsed steps. + */ +function parseLocatorChain(node) { + const steps = []; + let current = node; + while (current && current.type === "CallExpression") { + const call = current; + const callee = call.callee; + if (callee.type !== "MemberExpression") { + break; // Not a method call, stop + } + const member = callee; + assert(member.computed === false, "Computed property access is not allowed"); + const prop = member.property; + assert(prop.type === "Identifier", "Method name must be an identifier"); + const method = prop.name; + // Arguments must be safe literals + const args = call.arguments.map((a) => evalSafeLiteral(a)); + steps.push({ method, args }); + // Move inward: next is the object you're calling the method on + current = member.object; + } + // Now current should be Identifier("page") + assert(current !== null && current.type === "Identifier", "Locator chain must start from `page`"); + assert(current.name === "page", `Locator chain must start from 'page', got '${current.name}'`); + // We collected from outermost to innermost; reverse to execute in order + steps.reverse(); + return steps; +} +/** + * Check if a node is an identifier with a specific name + */ +function isIdentifier(node, name) { + return node.type === "Identifier" && node.name === name; +} +/** + * Check if a node is a member expression like `page.keyboard` or `page.mouse` + */ +function isPageSubObject(node, subObject) { + if (node.type !== "MemberExpression") + return false; + const member = node; + return isIdentifier(member.object, "page") && isIdentifier(member.property, subObject); +} +/** + * Parse "page.getByRole(...).click()", "page.goto(...)", "page.keyboard.press(...)", + * "fetch(...)", "variable.json()", or "expect(page.getByRole(...)).toContainText(...)" + * Returns the appropriate ParsedChain type. + */ +function parseAllowedChain(exprNode, variables) { + // Expression must be a call at the top-level (so you can actually do something) + assert(exprNode.type === "CallExpression", "Top-level must be a function call"); + const topCall = exprNode; + // Check for expect() patterns (including negated and value-based) + const expectResult = tryParseExpectChain(topCall, variables); + if (expectResult) + return expectResult; + // Check for page.keyboard.xxx() pattern + const keyboardResult = tryParseKeyboardChain(topCall); + if (keyboardResult) + return keyboardResult; + // Check for page.mouse.xxx() pattern + const mouseResult = tryParseMouseChain(topCall); + if (mouseResult) + return mouseResult; + // Check for page.method() pattern (page-level methods like goto, reload) + const pageMethodResult = tryParsePageMethodChain(topCall, variables); + if (pageMethodResult) + return pageMethodResult; + // Check for context.xxx() pattern + const contextResult = tryParseContextChain(topCall); + if (contextResult) + return contextResult; + // Check for browser.xxx() pattern + const browserResult = tryParseBrowserChain(topCall); + if (browserResult) + return browserResult; + // Check for console.xxx() pattern + const consoleResult = tryParseConsoleChain(topCall, variables); + if (consoleResult) + return consoleResult; + // Check for fetch() pattern + const fetchResult = tryParseFetchChain(topCall); + if (fetchResult) + return fetchResult; + // Check for response method pattern (variable.json(), variable.text()) + if (variables) { + const responseMethodResult = tryParseResponseMethodChain(topCall, variables); + if (responseMethodResult) + return responseMethodResult; + } + // Default: parse as locator chain (page.getByRole().click()) + const steps = parseLocatorChain(exprNode); + validateLocatorChainSteps(steps); + return { + type: "locator", + steps, + }; +} +/** + * Try to parse an expect() chain, including negated assertions. + * Patterns: + * - expect(locator).toBeVisible() + * - expect(locator).not.toBeVisible() + * - expect(variable).toBe(value) + * - expect(variable.property).toBe(value) + */ +function tryParseExpectChain(topCall, variables) { + if (topCall.callee.type !== "MemberExpression") + return null; + const topMember = topCall.callee; + let expectCall = null; + let assertionMethod = ""; + let negated = false; + // Check for negated pattern: expect(locator).not.toBeVisible() + // Structure: CallExpr { callee: MemberExpr { object: MemberExpr { object: CallExpr(expect), property: "not" }, property: "toBeVisible" } } + if (topMember.object.type === "MemberExpression" && + isIdentifier(topMember.object.property, "not")) { + const notMember = topMember.object; + if (notMember.object.type === "CallExpression" && + notMember.object.callee.type === "Identifier" && + isIdentifier(notMember.object.callee, "expect")) { + expectCall = notMember.object; + negated = true; + assert(topMember.property.type === "Identifier", "Assertion method must be an identifier"); + assertionMethod = topMember.property.name; + } + } + // Check for regular pattern: expect(locator).toBeVisible() + if (!expectCall && + topMember.object.type === "CallExpression" && + topMember.object.callee.type === "Identifier" && + isIdentifier(topMember.object.callee, "expect")) { + expectCall = topMember.object; + assert(topMember.property.type === "Identifier", "Assertion method must be an identifier"); + assertionMethod = topMember.property.name; + } + if (!expectCall) + return null; + // Validate assertion method + assert(ALLOWED_EXPECT_ASSERTION_METHODS.has(assertionMethod), `Disallowed assertion method: ${assertionMethod}. Allowed: ${[...ALLOWED_EXPECT_ASSERTION_METHODS].join(", ")}`); + // Get assertion arguments + const assertionArgs = topCall.arguments.map((a) => evalSafeLiteral(a)); + // Parse the argument inside expect() + assert(expectCall.arguments.length === 1, "expect() must have exactly one argument"); + const expectArg = expectCall.arguments[0]; + // Check if the argument is a variable reference (possibly with property access) + // e.g., expect(data) or expect(data.url) or expect(response.status) + const valueChain = tryParseValueChain(expectArg, variables); + if (valueChain) { + return { + type: "expectValue", + variableName: valueChain.variableName, + propertyPath: valueChain.propertyPath, + assertionMethod: assertionMethod, + assertionArgs, + negated, + }; + } + // Check if the argument is a literal value (string, number, boolean, etc.) + // e.g., expect("some string").toBe("expected") or expect(42).toBe(42) + if (isLiteralNode(expectArg)) { + const literalValue = evalSafeLiteral(expectArg); + return { + type: "expectLiteral", + literalValue, + assertionMethod: assertionMethod, + assertionArgs, + negated, + }; + } + // Otherwise, try to parse as a locator chain (page.getByRole(), etc.) + const locatorSteps = parseLocatorChain(expectArg); + validateLocatorSteps(locatorSteps); + return { + type: "expect", + locatorSteps, + assertionMethod: assertionMethod, + assertionArgs, + negated, + }; +} +/** + * Try to parse a value chain like `data` or `data.url` or `data.nested.property` + * Returns the variable name and property path if it's a valid variable reference. + */ +function tryParseValueChain(node, variables) { + // Simple identifier: expect(data) + if (node.type === "Identifier") { + const name = node.name; + // Only match if it's a known variable (not page, context, etc.) + if (variables && variables.has(name)) { + return { variableName: name, propertyPath: [] }; + } + return null; + } + // Member expression: expect(data.url) or expect(data.nested.property) + if (node.type === "MemberExpression") { + const propertyPath = []; + // Walk up the member chain to get all properties + let current = node; + while (current.type === "MemberExpression") { + const mem = current; + assert(mem.property.type === "Identifier", "Property access must be an identifier"); + assert(!mem.computed, "Computed property access not allowed"); + propertyPath.unshift(mem.property.name); + current = mem.object; + } + // The base should be an identifier (the variable name) + if (current.type === "Identifier") { + const name = current.name; + // Only match if it's a known variable + if (variables && variables.has(name)) { + return { variableName: name, propertyPath }; + } + } + } + return null; +} +/** + * Try to parse page.keyboard.xxx() pattern + */ +function tryParseKeyboardChain(topCall) { + if (topCall.callee.type !== "MemberExpression") + return null; + const member = topCall.callee; + if (!isPageSubObject(member.object, "keyboard")) + return null; + assert(member.property.type === "Identifier", "Keyboard method must be an identifier"); + const method = member.property.name; + assert(ALLOWED_KEYBOARD_METHODS.has(method), `Disallowed keyboard method: ${method}. Allowed: ${[...ALLOWED_KEYBOARD_METHODS].join(", ")}`); + const args = topCall.arguments.map((a) => evalSafeLiteral(a)); + return { + type: "keyboard", + method, + args, + }; +} +/** + * Try to parse page.mouse.xxx() pattern + */ +function tryParseMouseChain(topCall) { + if (topCall.callee.type !== "MemberExpression") + return null; + const member = topCall.callee; + if (!isPageSubObject(member.object, "mouse")) + return null; + assert(member.property.type === "Identifier", "Mouse method must be an identifier"); + const method = member.property.name; + assert(ALLOWED_MOUSE_METHODS.has(method), `Disallowed mouse method: ${method}. Allowed: ${[...ALLOWED_MOUSE_METHODS].join(", ")}`); + const args = topCall.arguments.map((a) => evalSafeLiteral(a)); + return { + type: "mouse", + method, + args, + }; +} +/** + * Try to parse page.method() pattern (page-level methods like goto, reload) + */ +function tryParsePageMethodChain(topCall, variables) { + if (topCall.callee.type !== "MemberExpression") + return null; + const member = topCall.callee; + // Check if it's page.methodName() where methodName is in ALLOWED_PAGE_METHODS + if (!isIdentifier(member.object, "page")) + return null; + assert(member.property.type === "Identifier", "Page method must be an identifier"); + const method = member.property.name; + // Only match if it's a page-level method, not a locator start method + if (!ALLOWED_PAGE_METHODS.has(method)) + return null; + const args = topCall.arguments.map((a) => evalSafeValue(a, variables)); + return { + type: "pageMethod", + method, + args, + }; +} +/** + * Try to parse context.xxx() pattern + * Executed as page.context().xxx() internally + */ +function tryParseContextChain(topCall) { + if (topCall.callee.type !== "MemberExpression") + return null; + const member = topCall.callee; + // Check if it's context.methodName() + if (!isIdentifier(member.object, "context")) + return null; + assert(member.property.type === "Identifier", "Context method must be an identifier"); + const method = member.property.name; + assert(ALLOWED_CONTEXT_METHODS.has(method), `Disallowed context method: ${method}. Allowed: ${[...ALLOWED_CONTEXT_METHODS].join(", ")}`); + const args = topCall.arguments.map((a) => evalSafeLiteral(a)); + return { + type: "context", + method, + args, + }; +} +/** + * Try to parse browser.xxx() pattern + * Executed as page.context().browser()?.xxx() internally + */ +function tryParseBrowserChain(topCall) { + if (topCall.callee.type !== "MemberExpression") + return null; + const member = topCall.callee; + // Check if it's browser.methodName() + if (!isIdentifier(member.object, "browser")) + return null; + assert(member.property.type === "Identifier", "Browser method must be an identifier"); + const method = member.property.name; + assert(ALLOWED_BROWSER_METHODS.has(method), `Disallowed browser method: ${method}. Allowed: ${[...ALLOWED_BROWSER_METHODS].join(", ")}`); + const args = topCall.arguments.map((a) => evalSafeLiteral(a)); + return { + type: "browser", + method, + args, + }; +} +/** + * Try to parse console.xxx() pattern + * Supports console.log, console.warn, console.error, console.info, console.debug + */ +function tryParseConsoleChain(topCall, variables) { + if (topCall.callee.type !== "MemberExpression") + return null; + const member = topCall.callee; + // Check if it's console.methodName() + if (!isIdentifier(member.object, "console")) + return null; + assert(member.property.type === "Identifier", "Console method must be an identifier"); + const method = member.property.name; + assert(ALLOWED_CONSOLE_METHODS.has(method), `Disallowed console method: ${method}. Allowed: ${[...ALLOWED_CONSOLE_METHODS].join(", ")}`); + // Arguments can be safe literals or variable references + const args = topCall.arguments.map((a) => evalSafeValue(a, variables)); + return { + type: "console", + method, + args, + }; +} +/** + * Try to parse fetch(url, options?) pattern. + */ +function tryParseFetchChain(topCall) { + // Check if it's fetch(url, options?) + if (topCall.callee.type !== "Identifier") + return null; + if (topCall.callee.name !== "fetch") + return null; + assert(topCall.arguments.length >= 1 && topCall.arguments.length <= 2, "fetch() requires 1-2 arguments: fetch(url, options?)"); + // Extract URL + const urlArg = evalSafeLiteral(topCall.arguments[0]); + assert(typeof urlArg === "string", "fetch URL must be a string"); + // Validate URL security + validateFetchUrl(urlArg); + // Extract options if present + let options; + if (topCall.arguments.length === 2) { + const optArg = evalSafeLiteral(topCall.arguments[1]); + validateFetchOptions(optArg); + options = optArg; + } + return { + type: "fetch", + url: urlArg, + options, + }; +} +/** + * Try to parse response method chain: variable.json(), variable.text(), etc. + */ +function tryParseResponseMethodChain(topCall, variables) { + if (topCall.callee.type !== "MemberExpression") + return null; + const member = topCall.callee; + // Check if object is a variable identifier + if (member.object.type !== "Identifier") + return null; + const varName = member.object.name; + // Variable must exist (will be checked at runtime for Response type) + if (!variables.has(varName)) + return null; + assert(member.property.type === "Identifier", "Response method must be an identifier"); + const method = member.property.name; + assert(ALLOWED_RESPONSE_METHODS.has(method), `Disallowed response method: ${method}. Allowed: ${[...ALLOWED_RESPONSE_METHODS].join(", ")}`); + const args = topCall.arguments.map((a) => evalSafeLiteral(a)); + return { + type: "responseMethod", + variableName: varName, + method, + args, + }; +} +/** + * Validate locator steps (used inside expect - no actions allowed). + */ +function validateLocatorSteps(steps) { + assert(steps.length >= 1, "Empty locator chain"); + assert(ALLOWED_START_METHODS.has(steps[0].method), `First call must be one of: ${[...ALLOWED_START_METHODS].join(", ")}. Got: ${steps[0].method}`); + // All steps must be start methods or chain methods (no actions in expect locators) + for (let i = 0; i < steps.length; i++) { + const m = steps[i].method; + if (i === 0) + continue; // start method already validated + const isChain = ALLOWED_LOCATOR_CHAIN_METHODS.has(m); + const isStart = ALLOWED_START_METHODS.has(m); // For nested locators + assert(isChain || isStart, `Disallowed method in locator chain: ${m}. Allowed: ${[...ALLOWED_LOCATOR_CHAIN_METHODS].join(", ")}`); + } +} +/** + * Validate locator chain steps (actions allowed at the end). + */ +function validateLocatorChainSteps(steps) { + assert(steps.length >= 1, "Empty chain"); + assert(ALLOWED_START_METHODS.has(steps[0].method), `First call must be one of: ${[...ALLOWED_START_METHODS].join(", ")}. Got: ${steps[0].method}`); + // Validate each step is in allowlists + for (let i = 0; i < steps.length; i++) { + const m = steps[i].method; + if (i === 0) + continue; // start method already validated + const isChain = ALLOWED_LOCATOR_CHAIN_METHODS.has(m); + const isAction = ALLOWED_ACTION_METHODS.has(m); + assert(isChain || isAction, `Disallowed method: ${m}. Allowed chain methods: ${[...ALLOWED_LOCATOR_CHAIN_METHODS].join(", ")}. Allowed action methods: ${[...ALLOWED_ACTION_METHODS].join(", ")}`); + // If it's an action, it must be the last step + if (isAction && i !== steps.length - 1) { + assert(false, `Action method '${m}' must be the last in the chain, but found more methods after it`); + } + } +} +// ============================================================================= +// ARGUMENT VALIDATION +// ============================================================================= +/** + * Per-method argument validation for extra safety. + */ +function validateMethodArgs(method, args) { + switch (method) { + case "locator": + assert(typeof args[0] === "string", "locator(selector) requires a string selector"); + assert(args.length <= 2, "locator() accepts at most 2 arguments"); + break; + case "getByRole": + assert(typeof args[0] === "string", "getByRole(role) requires a string role"); + if (args[1] != null) { + assert(typeof args[1] === "object" && !Array.isArray(args[1]), "getByRole() options must be an object"); + } + break; + case "getByText": + case "getByLabel": + case "getByPlaceholder": + case "getByAltText": + case "getByTitle": + assert(typeof args[0] === "string" || args[0] instanceof RegExp, `${method}() requires a string or RegExp`); + break; + case "getByTestId": + assert(typeof args[0] === "string" || args[0] instanceof RegExp, "getByTestId() requires a string or RegExp"); + break; + case "nth": + assert(typeof args[0] === "number" && Number.isInteger(args[0]), "nth(index) requires an integer index"); + break; + case "fill": + case "type": + assert(typeof args[0] === "string", `${method}(value) requires a string value`); + break; + case "press": + assert(typeof args[0] === "string", "press(key) requires a string key"); + break; + case "selectOption": + // Can be string, array of strings, or object + break; + case "frameLocator": + assert(typeof args[0] === "string", "frameLocator(selector) requires a string selector"); + break; + // Actions that take no required arguments + case "click": + case "dblclick": + case "check": + case "uncheck": + case "hover": + case "focus": + case "blur": + case "clear": + case "scrollIntoViewIfNeeded": + case "first": + case "last": + case "count": + case "isVisible": + case "isEnabled": + case "isChecked": + case "textContent": + case "innerText": + case "innerHTML": + case "inputValue": + // These are fine with no args or optional args + break; + case "filter": + case "and": + case "or": + // These take locator options + break; + case "waitFor": + case "getAttribute": + // These have their own validation in Playwright + break; + // Page-level methods + case "goto": + assert(typeof args[0] === "string", "goto(url) requires a string URL"); + validateFetchUrl(args[0]); // Block file://, localhost, and private IPs + break; + case "waitForURL": + assert(typeof args[0] === "string" || args[0] instanceof RegExp, "waitForURL() requires a string or RegExp"); + break; + case "waitForTimeout": + assert(typeof args[0] === "number", "waitForTimeout(ms) requires a number"); + break; + case "waitForSelector": + assert(typeof args[0] === "string", "waitForSelector(selector) requires a string"); + break; + case "waitForLoadState": + // Optional state argument + if (args[0] != null) { + assert(typeof args[0] === "string", "waitForLoadState(state) requires a string state"); + } + break; + case "setViewportSize": + assert(typeof args[0] === "object" && args[0] !== null, "setViewportSize({ width, height }) requires an object"); + break; + // Keyboard methods + case "insertText": + assert(typeof args[0] === "string", "keyboard.insertText(text) requires a string"); + break; + case "down": + case "up": + // Keyboard down/up take a key string + if (args.length > 0) { + assert(typeof args[0] === "string", `keyboard.${method}(key) requires a string key`); + } + break; + // Mouse methods + case "move": + assert(typeof args[0] === "number" && typeof args[1] === "number", "mouse.move(x, y) requires two numbers"); + break; + case "wheel": + assert(typeof args[0] === "number" && typeof args[1] === "number", "mouse.wheel(deltaX, deltaY) requires two numbers"); + break; + // Drag and drop + case "dragTo": + // dragTo takes a locator, which we can't easily validate here + // Playwright will validate it at runtime + break; + // Page methods that take no required arguments + case "reload": + case "goBack": + case "goForward": + case "title": + case "url": + case "content": + case "screenshot": + case "close": + case "bringToFront": + case "waitForFunction": + // These are fine with no args or optional args + break; + default: + // Unknown method - this shouldn't happen if allowlists are correct + break; + } +} +// ============================================================================= +// DATA PLACEHOLDER INTERPOLATION +// ============================================================================= +/** + * Escape a string value for safe insertion into a JavaScript string literal. + * Handles both single and double quotes since scripts may use either. + */ +function escapeForStringLiteral(value) { + return value + .replace(/\\/g, "\\\\") // Escape backslashes first + .replace(/'/g, "\\'") // Escape single quotes + .replace(/"/g, '\\"') // Escape double quotes + .replace(/\n/g, "\\n") // Escape newlines + .replace(/\r/g, "\\r") // Escape carriage returns + .replace(/\t/g, "\\t"); // Escape tabs +} +/** + * Strip inline comments from a line of code. + * Handles // comments while preserving // inside string literals. + * + * Examples: + * - 'page.goto("https://example.com") // comment' → 'page.goto("https://example.com")' + * - 'page.goto("https://example.com")' → 'page.goto("https://example.com")' (URL preserved) + * - "page.fill('input', 'test') // fill" → "page.fill('input', 'test')" + */ +function stripInlineComments(line) { + let inString = null; + let escaped = false; + for (let i = 0; i < line.length; i++) { + const char = line[i]; + if (escaped) { + escaped = false; + continue; + } + if (char === "\\") { + escaped = true; + continue; + } + // Track string state + if (char === '"' || char === "'") { + if (inString === null) { + inString = char; + } + else if (inString === char) { + inString = null; + } + continue; + } + // Check for // comment start (only outside strings) + if (inString === null && char === "/" && line[i + 1] === "/") { + // Found comment start, return everything before it (trimmed) + return line.slice(0, i).trim(); + } + } + return line; +} +/** + * Replace {{run.xxx}} and {{global.xxx}} placeholders in a string with values from the data objects. + * This is done BEFORE parsing to allow dynamic values in scripts. + * + * @param line - The line to interpolate + * @param localValues - Values for {{run.xxx}} placeholders (e.g., {{run.email}}, {{run.extractedOtp}}) + * @param globalValues - Values for {{global.xxx}} placeholders (e.g., {{global.email}}) + */ +function interpolatePlaceholders(line, localValues, globalValues) { + let result = line; + // Replace {{run.xxx}} placeholders + if (localValues) { + result = result.replace(/\{\{run\.(\w+)\}\}/g, (match) => { + if (match in localValues) { + return escapeForStringLiteral(localValues[match]); + } + return match; // Keep original if key not found + }); + } + // Replace {{global.xxx}} placeholders + if (globalValues) { + result = result.replace(/\{\{global\.(\w+)\}\}/g, (match) => { + if (match in globalValues) { + return escapeForStringLiteral(globalValues[match]); + } + return match; // Keep original if key not found + }); + } + return result; +} +// ============================================================================= +// GETTER RESULT LOGGING +// ============================================================================= +/** + * Format a value for logging (handles objects, arrays, strings, etc.) + */ +function formatResultForLog(value) { + if (value === null) + return "null"; + if (value === undefined) + return "undefined"; + if (typeof value === "string") + return `"${value}"`; + if (typeof value === "number" || typeof value === "boolean") + return String(value); + try { + return JSON.stringify(value, null, 2); + } + catch { + return String(value); + } +} +/** + * Log the result of a getter method call. + */ +function logGetterResult(methodPath, result) { + logger_1.logger.debug(`[SecureScriptRunner] ${methodPath} → ${formatResultForLog(result)}`); +} +/** + * Safely execute a user-supplied Playwright script. + * + * The script is parsed as an AST and validated to only contain allowed + * Playwright method chains. User code is NEVER evaluated directly. + * + * @example + * await runSecureScript({ + * page, + * script: 'page.getByRole("button", { name: "Save" }).click()', + * }); + * + * @example + * // Multi-line scripts (each line is executed in order) + * await runSecureScript({ + * page, + * script: ` + * page.getByLabel("Email").fill("test@example.com") + * page.getByLabel("Password").fill("password123") + * page.getByRole("button", { name: "Submit" }).click() + * `, + * }); + */ +async function runSecureScript({ page: pageInput, script, localValues, globalValues, expect: expectFn, }) { + // Resolve to the currently-active page at script start. Scripts are + // short-lived, so we don't re-resolve per-line; if the script itself opens + // a tab, the auto-switch happens for subsequent steps, not within the script. + const page = (0, index_1.resolvePage)(pageInput); + // Variable storage for the script context + const variables = new Map(); + // Track the last result for return value + let lastResult = undefined; + // Split script into lines, strip comments, and filter out empty lines + const lines = script + .split("\n") + .map((line) => line.trim()) + .filter((line) => line && !line.startsWith("//") && !line.startsWith("#")) + .map((line) => stripInlineComments(line)) // Strip inline comments + .filter((line) => line); // Filter out lines that became empty after stripping + if (lines.length === 0) { + throw new Error("[SecureScriptRunner] Script is empty"); + } + for (const rawLine of lines) { + // Interpolate placeholders ({{run.xxx}} and {{global.xxx}}) + const line = interpolatePlaceholders(rawLine, localValues, globalValues); + // Remove trailing semicolons (optional in our DSL) + const cleanLine = line.replace(/;$/, "").trim(); + // Parse as a single JS statement (supporting variable declarations and await) + let ast; + try { + // Use sourceType: 'module' to allow top-level await + ast = (0, acorn_1.parse)(cleanLine, { + ecmaVersion: "latest", + sourceType: "module", + }); + } + catch (parseError) { + throw new Error(`[SecureScriptRunner] Failed to parse line: "${cleanLine}"\nParse error: ${parseError.message}`); + } + assert(ast.type === "Program", "Invalid program"); + assert(ast.body.length === 1, "Only one statement per line is allowed"); + const stmt = ast.body[0]; + // Handle variable declarations: const x = await fetch(...) or const y = await res.json() + if (stmt.type === "VariableDeclaration") { + const varDecl = stmt; + assert(varDecl.declarations.length === 1, "Only one variable per declaration"); + assert(varDecl.kind === "const" || varDecl.kind === "let", "Only const/let declarations allowed"); + const declarator = varDecl.declarations[0]; + assert(declarator.id.type === "Identifier", "Variable name must be identifier"); + const varName = declarator.id.name; + // Validate variable name (no reserved names) + assert(!RESERVED_VARIABLE_NAMES.has(varName), `Cannot use reserved name: ${varName}`); + assert(declarator.init !== null, "Variable must have initializer"); + // Handle await expression in the initializer + let initExpr = declarator.init; + if (initExpr.type === "AwaitExpression") { + initExpr = initExpr.argument; + } + // Check if the initializer is a literal value (string, number, etc.) + // This allows: const url = "{{run.url}}" or const count = 5 + if (isLiteralNode(initExpr)) { + const literalValue = evalSafeLiteral(initExpr); + variables.set(varName, literalValue); + lastResult = literalValue; + continue; + } + // Try computed expression (new URL(...), string concat, etc.) + const computedExpr = parseSafeExpression(initExpr, variables); + if (computedExpr) { + const computedValue = evalSafeExpression(computedExpr, variables); + variables.set(varName, computedValue); + lastResult = computedValue; + continue; + } + // Parse the initializer expression (must be a function call) + const parsedInit = parseAllowedChain(initExpr, variables); + // Execute the initializer and store result + const result = await executeChain(parsedInit, page, variables, expectFn); + variables.set(varName, result); + lastResult = result; + continue; + } + assert(stmt.type === "ExpressionStatement", "Only expression statements or variable declarations are allowed"); + // Handle await expression at the statement level + let exprNode = stmt.expression; + if (exprNode.type === "AwaitExpression") { + exprNode = exprNode.argument; + } + const parsed = parseAllowedChain(exprNode, variables); + lastResult = await executeChain(parsed, page, variables, expectFn); + } + return lastResult; +} +/** + * Execute a parsed chain and return the result. + */ +/* eslint-disable @typescript-eslint/no-explicit-any */ +async function executeChain(parsed, page, variables, expectFn) { + switch (parsed.type) { + case "expect": { + // Handle expect() assertion + assert(expectFn !== undefined, "expect() assertions require passing the 'expect' function to runSecureScript"); + // Build the locator from the steps + let locator = page; + for (const { method, args } of parsed.locatorSteps) { + validateMethodArgs(method, args); + locator = locator[method](...args); + } + // Call expect(locator).assertionMethod(args) or expect(locator).not.assertionMethod(args) + let expectation = expectFn(locator); + if (parsed.negated) { + expectation = expectation.not; + } + const assertion = expectation[parsed.assertionMethod](...parsed.assertionArgs); + // Await if it's a promise (most assertions are async) + if (assertion instanceof Promise) { + await assertion; + } + return undefined; + } + case "expectValue": { + // Handle expect() with a variable value: expect(data.url).toBe(...) + assert(expectFn !== undefined, "expect() assertions require passing the 'expect' function to runSecureScript"); + // Get the variable value + const varValue = variables.get(parsed.variableName); + assert(varValue !== undefined, `Variable "${parsed.variableName}" is not defined`); + // Navigate the property path + let value = varValue; + for (const prop of parsed.propertyPath) { + assert(value !== null && value !== undefined, `Cannot read property "${prop}" of ${value}`); + value = value[prop]; + } + // Call expect(value).assertionMethod(args) or expect(value).not.assertionMethod(args) + let expectation = expectFn(value); + if (parsed.negated) { + expectation = expectation.not; + } + const assertion = expectation[parsed.assertionMethod](...parsed.assertionArgs); + // Await if it's a promise + if (assertion instanceof Promise) { + await assertion; + } + return undefined; + } + case "expectLiteral": { + // Handle expect() with a literal value: expect("string").toBe(...) + assert(expectFn !== undefined, "expect() assertions require passing the 'expect' function to runSecureScript"); + // Call expect(literalValue).assertionMethod(args) or expect(literalValue).not.assertionMethod(args) + let expectation = expectFn(parsed.literalValue); + if (parsed.negated) { + expectation = expectation.not; + } + const assertion = expectation[parsed.assertionMethod](...parsed.assertionArgs); + // Await if it's a promise + if (assertion instanceof Promise) { + await assertion; + } + return undefined; + } + case "pageMethod": { + // Handle page-level methods like goto, reload, etc. + validateMethodArgs(parsed.method, parsed.args); + const result = page[parsed.method](...parsed.args); + const resolvedResult = result instanceof Promise ? await result : result; + // Auto-log getter method results + if (GETTER_METHODS.has(parsed.method)) { + logGetterResult(`page.${parsed.method}()`, resolvedResult); + } + return resolvedResult; + } + case "keyboard": { + // Handle page.keyboard.xxx() methods + validateMethodArgs(parsed.method, parsed.args); + const result = page.keyboard[parsed.method](...parsed.args); + if (result instanceof Promise) { + return await result; + } + return result; + } + case "mouse": { + // Handle page.mouse.xxx() methods + validateMethodArgs(parsed.method, parsed.args); + const result = page.mouse[parsed.method](...parsed.args); + if (result instanceof Promise) { + return await result; + } + return result; + } + case "context": { + // Handle context.xxx() methods (executed as page.context().xxx()) + validateMethodArgs(parsed.method, parsed.args); + const context = page.context(); + const result = context[parsed.method](...parsed.args); + const resolvedResult = result instanceof Promise ? await result : result; + // Auto-log getter method results + if (GETTER_METHODS.has(parsed.method)) { + logGetterResult(`context.${parsed.method}()`, resolvedResult); + } + return resolvedResult; + } + case "browser": { + // Handle browser.xxx() methods (executed as page.context().browser()?.xxx()) + validateMethodArgs(parsed.method, parsed.args); + const browser = page.context().browser(); + assert(browser !== null, "Browser is not available"); + const result = browser[parsed.method](...parsed.args); + const resolvedResult = result instanceof Promise ? await result : result; + // Auto-log getter method results + if (GETTER_METHODS.has(parsed.method)) { + logGetterResult(`browser.${parsed.method}()`, resolvedResult); + } + return resolvedResult; + } + case "console": { + // Handle console.xxx() methods (log, warn, error, info, debug) + const consoleMethod = console[parsed.method]; + consoleMethod(...parsed.args); + return undefined; + } + case "fetch": { + // Handle fetch() calls + const fetchOptions = {}; + if (parsed.options?.method) { + fetchOptions.method = parsed.options.method; + } + if (parsed.options?.headers) { + fetchOptions.headers = parsed.options.headers; + } + if (parsed.options?.body) { + fetchOptions.body = + typeof parsed.options.body === "string" + ? parsed.options.body + : JSON.stringify(parsed.options.body); + } + // DNS rebinding protection: verify resolved IP is not blocked + await validateFetchUrlResolution(parsed.url); + const response = await fetch(parsed.url, fetchOptions); + logger_1.logger.debug(`[SecureScriptRunner] fetch(${parsed.url}) → ${response.status}`); + return response; + } + case "responseMethod": { + // Handle response method calls (res.json(), res.text()) + const response = variables.get(parsed.variableName); + assert(response instanceof Response, `Variable "${parsed.variableName}" is not a Response object`); + const result = await response[parsed.method](...parsed.args); + logger_1.logger.debug(`[SecureScriptRunner] ${parsed.variableName}.${parsed.method}() completed`); + return result; + } + case "variableDeclaration": { + // This case shouldn't be reached since variable declarations are handled earlier + // But include for completeness + const result = await executeChain(parsed.value, page, variables, expectFn); + return result; + } + case "locator": { + // Handle locator chain (page.getByRole().click()) + let cur = page; + const lastStep = parsed.steps[parsed.steps.length - 1]; + for (const { method, args } of parsed.steps) { + validateMethodArgs(method, args); + const result = cur[method](...args); + if (result instanceof Promise) { + cur = await result; + } + else { + cur = result; + } + } + // Auto-log getter method results (last method in chain) + if (lastStep && GETTER_METHODS.has(lastStep.method)) { + const chainPath = parsed.steps.map((s) => `${s.method}()`).join("."); + logGetterResult(`page.${chainPath}`, cur); + } + return cur; + } + default: { + // Exhaustive check - should never reach here + const _exhaustive = parsed; + throw new Error(`Unknown chain type: ${_exhaustive.type}`); + } + } +} +/* eslint-enable @typescript-eslint/no-explicit-any */ +/** + * Validate a script without executing it. + * Useful for pre-validation before saving scripts. + * + * @returns true if valid, throws Error if invalid + */ +function validateScript(script) { + // Track declared variables for validation + const declaredVariables = new Map(); + const lines = script + .split("\n") + .map((line) => line.trim()) + .filter((line) => line && !line.startsWith("//") && !line.startsWith("#")) + .map((line) => stripInlineComments(line)) // Strip inline comments + .filter((line) => line); // Filter out lines that became empty after stripping + if (lines.length === 0) { + throw new Error("[SecureScriptRunner] Script is empty"); + } + for (const line of lines) { + const cleanLine = line.replace(/;$/, "").trim(); + // Note: We can't interpolate placeholders during validation + // because we don't have the data. We'll validate the structure only. + // Replace placeholders with a dummy value for parsing. + // The placeholder is already inside quotes, so we just replace the {{...}} part. + const lineForParsing = cleanLine + .replace(/\{\{run\.\w+\}\}/g, "__PLACEHOLDER__") + .replace(/\{\{global\.\w+\}\}/g, "__PLACEHOLDER__"); + let ast; + try { + // Use sourceType: 'module' to allow top-level await + ast = (0, acorn_1.parse)(lineForParsing, { + ecmaVersion: "latest", + sourceType: "module", + }); + } + catch (parseError) { + throw new Error(`[SecureScriptRunner] Failed to parse line: "${cleanLine}"\nParse error: ${parseError.message}`); + } + assert(ast.type === "Program", "Invalid program"); + assert(ast.body.length === 1, "Only one statement per line is allowed"); + const stmt = ast.body[0]; + // Handle variable declarations + if (stmt.type === "VariableDeclaration") { + const varDecl = stmt; + assert(varDecl.declarations.length === 1, "Only one variable per declaration"); + assert(varDecl.kind === "const" || varDecl.kind === "let", "Only const/let declarations allowed"); + const declarator = varDecl.declarations[0]; + assert(declarator.id.type === "Identifier", "Variable name must be identifier"); + const varName = declarator.id.name; + // Validate variable name (no reserved names) + assert(!RESERVED_VARIABLE_NAMES.has(varName), `Cannot use reserved name: ${varName}`); + assert(declarator.init !== null, "Variable must have initializer"); + // Handle await expression in the initializer + let initExpr = declarator.init; + if (initExpr.type === "AwaitExpression") { + initExpr = initExpr.argument; + } + // Check if the initializer is a literal value + if (isLiteralNode(initExpr)) { + // Validate that it's a safe literal (will throw if not) + evalSafeLiteral(initExpr); + declaredVariables.set(varName, "__PLACEHOLDER__"); + continue; + } + // Try computed expression (validates structure without executing) + const computedExpr = parseSafeExpression(initExpr, declaredVariables); + if (computedExpr) { + declaredVariables.set(varName, "__COMPUTED_PLACEHOLDER__"); + continue; + } + // Parse the initializer expression (this will throw if invalid) + parseAllowedChain(initExpr, declaredVariables); + // Mark variable as declared (with placeholder value for validation) + declaredVariables.set(varName, "__PLACEHOLDER__"); + continue; + } + assert(stmt.type === "ExpressionStatement", "Only expression statements or variable declarations are allowed"); + // Handle await expression at the statement level + let exprNode = stmt.expression; + if (exprNode.type === "AwaitExpression") { + exprNode = exprNode.argument; + } + // This will throw if the chain is invalid + parseAllowedChain(exprNode, declaredVariables); + } + return true; +} diff --git a/dist/utils/tab-manager.d.ts b/dist/utils/tab-manager.d.ts new file mode 100644 index 0000000..54680da --- /dev/null +++ b/dist/utils/tab-manager.d.ts @@ -0,0 +1,8 @@ +import type { Page } from "@playwright/test"; +export type TabTarget = "main" | "latest" | number; +export type TabManager = { + active: () => Page; + pages: () => Page[]; + switchTo: (target: TabTarget) => Promise; +}; +export declare const createTabManager: (initialPage: Page) => TabManager; diff --git a/dist/utils/tab-manager.js b/dist/utils/tab-manager.js new file mode 100644 index 0000000..ac13000 --- /dev/null +++ b/dist/utils/tab-manager.js @@ -0,0 +1,47 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.createTabManager = void 0; +const createTabManager = (initialPage) => { + const pages = [initialPage]; + let activeIndex = 0; + const context = initialPage.context(); + context.on("page", (newPage) => { + if (!pages.includes(newPage)) { + pages.push(newPage); + } + // Auto-switch active focus to any newly opened tab so subsequent + // snapshots/actions target it without explicit switching. + activeIndex = pages.indexOf(newPage); + newPage.on("close", () => { + const idx = pages.indexOf(newPage); + if (idx === -1) + return; + pages.splice(idx, 1); + if (activeIndex === idx) { + activeIndex = Math.max(0, pages.length - 1); + } + else if (activeIndex > idx) { + activeIndex -= 1; + } + }); + }); + return { + active: () => pages[activeIndex], + pages: () => [...pages], + switchTo: async (target) => { + let idx; + if (target === "main") + idx = 0; + else if (target === "latest") + idx = pages.length - 1; + else + idx = target; + if (idx < 0 || idx >= pages.length) { + throw new Error(`switchToTab: invalid target ${target}; ${pages.length} tab(s) open.`); + } + activeIndex = idx; + return pages[idx]; + }, + }; +}; +exports.createTabManager = createTabManager; diff --git a/src/__tests__/data-cache.test.ts b/src/__tests__/data-cache.test.ts index 69be5ec..04e29c5 100644 --- a/src/__tests__/data-cache.test.ts +++ b/src/__tests__/data-cache.test.ts @@ -1,7 +1,7 @@ import { describe, it, expect, vi, beforeEach } from "vitest"; -vi.mock("../redis", () => ({ - redis: { hgetall: vi.fn(), hset: vi.fn(), expire: vi.fn() }, +vi.mock("../cache", () => ({ + cache: { hgetall: vi.fn(), hset: vi.fn(), expire: vi.fn() }, })); vi.mock("../email", () => ({ diff --git a/src/__tests__/integration/run-steps.test.ts b/src/__tests__/integration/run-steps.test.ts index c3edc40..aa7c6d8 100644 --- a/src/__tests__/integration/run-steps.test.ts +++ b/src/__tests__/integration/run-steps.test.ts @@ -3,12 +3,12 @@ import { describe, it, expect, vi, beforeEach } from "vitest"; // Mock instrumentation (imported as side effect) vi.mock("../../instrumentation", () => ({ axiomEnabled: false })); -// Mock Redis -vi.mock("../../redis", () => ({ - redis: { +// Mock Cache +vi.mock("../../cache", () => ({ + cache: { hgetall: vi.fn().mockResolvedValue({}), - hset: vi.fn().mockResolvedValue("OK"), - expire: vi.fn().mockResolvedValue(1), + hset: vi.fn().mockResolvedValue(undefined), + expire: vi.fn().mockResolvedValue(undefined), }, })); @@ -89,7 +89,7 @@ vi.mock("../../utils/secure-script-runner", () => ({ import { runSteps } from "../../index"; import { resetConfig } from "../../config"; -import { redis } from "../../redis"; +import { cache } from "../../cache"; import { generateText } from "ai"; import type { Page } from "@playwright/test"; import type { Step } from "../../types"; @@ -121,8 +121,8 @@ describe("runSteps", () => { beforeEach(() => { vi.clearAllMocks(); resetConfig(); - // Reset redis mock to default empty - vi.mocked(redis!.hgetall).mockResolvedValue({}); + // Reset cache mock to default empty + vi.mocked(cache!.hgetall).mockResolvedValue({}); }); it("executes a simple step", async () => { @@ -198,8 +198,8 @@ describe("runSteps", () => { const page = createMockPage(); const steps: Step[] = [{ description: "Click submit" }]; - // Mock redis to return cached step data - vi.mocked(redis!.hgetall).mockResolvedValue({ + // Mock cache to return cached step data + vi.mocked(cache!.hgetall).mockResolvedValue({ locator: 'getByRole("button", { name: "Submit" })', action: "click", description: "Submit button", @@ -220,8 +220,8 @@ describe("runSteps", () => { const page = createMockPage(); const steps: Step[] = [{ description: "Click submit" }]; - // Mock redis to return cached step data - vi.mocked(redis!.hgetall).mockResolvedValue({ + // Mock cache to return cached step data + vi.mocked(cache!.hgetall).mockResolvedValue({ locator: 'getByRole("button", { name: "Submit" })', action: "click", description: "Submit button", @@ -291,8 +291,8 @@ describe("runSteps", () => { it("bypasses cache for individual step when step.bypassCache is true", async () => { const page = createMockPage(); - // Mock redis to return cached data - vi.mocked(redis!.hgetall).mockResolvedValue({ + // Mock cache to return cached data + vi.mocked(cache!.hgetall).mockResolvedValue({ locator: 'getByRole("button", { name: "Go" })', action: "click", description: "Go button", diff --git a/src/cache.ts b/src/cache.ts new file mode 100644 index 0000000..8075921 --- /dev/null +++ b/src/cache.ts @@ -0,0 +1,160 @@ +import { logger } from "./logger"; + +// ============================================================================= +// Cache Store Interface +// ============================================================================= + +/** + * Interface for a hash-based cache store. + * Implementations must support hash get/set and key expiration. + */ +export interface CacheStore { + hgetall(key: string): Promise>; + hset(key: string, values: Record): Promise; + expire(key: string, seconds: number): Promise; +} + +// ============================================================================= +// Redis Store +// ============================================================================= + +class RedisStore implements CacheStore { + private client: import("ioredis").default; + + constructor(url: string) { + // eslint-disable-next-line @typescript-eslint/no-require-imports + const Redis = require("ioredis") as typeof import("ioredis").default; + this.client = new Redis(url); + } + + async hgetall(key: string): Promise> { + return this.client.hgetall(key); + } + + async hset(key: string, values: Record): Promise { + await this.client.hset(key, values); + } + + async expire(key: string, seconds: number): Promise { + await this.client.expire(key, seconds); + } +} + +// ============================================================================= +// File Store +// ============================================================================= + +import * as fs from "fs"; +import * as path from "path"; + +class FileStore implements CacheStore { + private dir: string; + + constructor(dir: string) { + this.dir = dir; + if (!fs.existsSync(dir)) { + fs.mkdirSync(dir, { recursive: true }); + } + } + + private filePath(key: string): string { + // Encode key to a safe filename + const safeKey = encodeURIComponent(key); + return path.join(this.dir, `${safeKey}.json`); + } + + private read(key: string): { data: Record; expiresAt?: number } | null { + const fp = this.filePath(key); + if (!fs.existsSync(fp)) return null; + + try { + const raw = JSON.parse(fs.readFileSync(fp, "utf-8")); + + // Check expiration + if (raw.expiresAt && Date.now() > raw.expiresAt) { + fs.unlinkSync(fp); + return null; + } + + return raw; + } catch { + return null; + } + } + + private write(key: string, entry: { data: Record; expiresAt?: number }): void { + const fp = this.filePath(key); + fs.writeFileSync(fp, JSON.stringify(entry), "utf-8"); + } + + async hgetall(key: string): Promise> { + const entry = this.read(key); + return entry?.data ?? {}; + } + + async hset(key: string, values: Record): Promise { + const existing = this.read(key); + const merged = { ...(existing?.data ?? {}), ...values }; + this.write(key, { data: merged, expiresAt: existing?.expiresAt }); + } + + async expire(key: string, seconds: number): Promise { + const existing = this.read(key); + if (!existing) return; + this.write(key, { ...existing, expiresAt: Date.now() + seconds * 1000 }); + } +} + +// ============================================================================= +// Factory +// ============================================================================= + +/** + * Creates the cache store based on environment variables. + * + * CACHE_PROVIDER selects the backend: + * - "redis" (default when REDIS_URL is set): uses Redis via ioredis + * - "file": uses JSON files on disk at CACHE_DIR (defaults to .passmark-cache) + * - "none": disables caching entirely + * + * For backwards compatibility, if CACHE_PROVIDER is not set: + * - If REDIS_URL is set → uses Redis + * - Otherwise → caching is disabled (null) + */ +function createCacheStore(): CacheStore | null { + const provider = process.env.CACHE_PROVIDER?.toLowerCase(); + + if (provider === "none") { + logger.warn("Cache provider set to 'none'. Caching is disabled."); + return null; + } + + if (provider === "file") { + const dir = process.env.CACHE_DIR || ".passmark-cache"; + logger.info(`Using file-based cache at: ${dir}`); + return new FileStore(dir); + } + + if (provider === "redis" || (!provider && process.env.REDIS_URL)) { + if (!process.env.REDIS_URL) { + logger.warn("CACHE_PROVIDER is 'redis' but REDIS_URL is not set. Caching is disabled."); + return null; + } + logger.info("Using Redis cache."); + return new RedisStore(process.env.REDIS_URL); + } + + if (provider) { + logger.warn(`Unknown CACHE_PROVIDER '${provider}'. Caching is disabled.`); + return null; + } + + // No CACHE_PROVIDER and no REDIS_URL + logger.warn( + "No cache provider configured. Set CACHE_PROVIDER=redis|file|none or REDIS_URL. " + + "Step caching, global placeholders, and project data are disabled.", + ); + return null; +} + +export const cache: CacheStore | null = createCacheStore(); diff --git a/src/constants.ts b/src/constants.ts index 195521f..384ccd9 100644 --- a/src/constants.ts +++ b/src/constants.ts @@ -22,5 +22,5 @@ export const MAX_RETRIES = 3; // Thinking budgets (tokens) export const THINKING_BUDGET_DEFAULT = 1024; -// Redis +// Cache export const GLOBAL_VALUES_TTL_SECONDS = 86400; diff --git a/src/data-cache.ts b/src/data-cache.ts index 01ecec2..f7b415b 100644 --- a/src/data-cache.ts +++ b/src/data-cache.ts @@ -4,7 +4,7 @@ import { getConfig } from "./config"; import { extractEmailContent } from "./email"; import { GLOBAL_VALUES_TTL_SECONDS } from "./constants"; import { logger } from "./logger"; -import { redis } from "./redis"; +import { cache } from "./cache"; import { Step } from "./types"; import { generatePhoneNumber } from "./utils"; @@ -26,7 +26,7 @@ export type LocalPlaceholders = { /** * Global placeholders that are shared across all tests within an execution. - * These values are persisted to Redis and loaded for subsequent runSteps calls + * These values are persisted to the cache and loaded for subsequent runSteps calls * with the same executionId. */ export type GlobalPlaceholders = { @@ -39,7 +39,7 @@ export type GlobalPlaceholders = { /** * Project data placeholders for {{data.key}} syntax. - * These are stored in Redis and managed via project settings. + * These are stored in the cache and managed via project settings. */ export type ProjectDataPlaceholders = Record; @@ -95,26 +95,26 @@ const GLOBAL_PLACEHOLDER_PATTERN = /\{\{global\.\w+\}\}/; const PROJECT_DATA_PLACEHOLDER_PATTERN = /\{\{data\.(\w+)\}\}/g; // ============================================================================= -// Redis Operations (Global Values) +// Cache Operations (Global Values) // ============================================================================= /** - * Generates a Redis key for storing global values for an execution. + * Generates a cache key for storing global values for an execution. */ -function getRedisKey(executionId: string): string { +function getCacheKey(executionId: string): string { return `execution:${executionId}:globals`; } /** - * Fetches global values from Redis for a given execution ID. + * Fetches global values from the cache for a given execution ID. * Returns null if no values exist. */ export async function getGlobalValues( executionId: string, ): Promise | null> { - if (!redis) return null; - const key = getRedisKey(executionId); - const values = await redis.hgetall(key); + if (!cache) return null; + const key = getCacheKey(executionId); + const values = await cache.hgetall(key); if (!values || Object.keys(values).length === 0) { return null; @@ -124,45 +124,45 @@ export async function getGlobalValues( } /** - * Saves global values to Redis for a given execution ID. + * Saves global values to the cache for a given execution ID. * Sets a 24-hour TTL on the key. */ export async function saveGlobalValues( executionId: string, values: GlobalPlaceholders, ): Promise { - if (!redis) return; + if (!cache) return; - const key = getRedisKey(executionId); + const key = getCacheKey(executionId); // Save all values as a hash - await redis.hset(key, values); + await cache.hset(key, values); // Set TTL - await redis.expire(key, GLOBAL_VALUES_TTL_SECONDS); + await cache.expire(key, GLOBAL_VALUES_TTL_SECONDS); - logger.debug(`Saved global values to Redis for execution: ${executionId}`); + logger.debug(`Saved global values to cache for execution: ${executionId}`); } // ============================================================================= -// Redis Operations (Project Data) +// Cache Operations (Project Data) // ============================================================================= /** - * Generates a Redis key for storing project data. + * Generates a cache key for storing project data. */ -function getProjectDataRedisKey(projectId: string): string { +function getProjectDataCacheKey(projectId: string): string { return `project:${projectId}:data`; } /** - * Fetches project data from Redis for a given project ID. + * Fetches project data from the cache for a given project ID. * Returns an empty object if no data exists. */ export async function getProjectData(projectId: string): Promise { - if (!redis) return {}; - const key = getProjectDataRedisKey(projectId); - const values = await redis.hgetall(key); + if (!cache) return {}; + const key = getProjectDataCacheKey(projectId); + const values = await cache.hgetall(key); if (!values || Object.keys(values).length === 0) { return {}; @@ -384,7 +384,7 @@ export function replacePlaceholders( /** * Processes steps and assertions to replace dynamic placeholders with consistent values. * Handles {{run.*}} placeholders (fresh per call), {{global.*}} placeholders - * (shared across execution via Redis), and {{data.*}} placeholders (project data from Redis). + * (shared across execution via cache), and {{data.*}} placeholders (project data from cache). * Returns the processed steps and assertions along with the generated values. */ export async function processPlaceholders( diff --git a/src/index.ts b/src/index.ts index 820ff31..572b8eb 100644 --- a/src/index.ts +++ b/src/index.ts @@ -22,7 +22,7 @@ async function maybeWithSpan( } import { z } from "zod"; import { buildRunStepsPrompt, buildRunUserFlowPrompt } from "./prompts"; -import { redis } from "./redis"; +import { cache } from "./cache"; import { getAItools } from "./tools"; import { RunStepsOptions, UserFlowOptions } from "./types"; import { @@ -59,7 +59,7 @@ import { /** * Executes a sequence of test steps using AI with intelligent caching. * Each step is described in natural language and executed via browser automation. - * Successfully executed steps are cached in Redis for faster subsequent runs. + * Successfully executed steps are cached for faster subsequent runs. * * @param options - Configuration including page, steps, assertions, and callbacks * @param options.page - The Playwright page instance @@ -109,13 +109,13 @@ export const runSteps = async ({ // when a new tab opens, or explicitly via the `switchToTab` step field. const tabManager = createTabManager(page); - if (!redis) { + if (!cache) { logger.warn( - "Redis not configured. Step caching is disabled — all steps will use AI execution.", + "Cache not configured. Step caching is disabled — all steps will use AI execution.", ); if (executionId) { logger.warn( - "{{global.*}} placeholders will not persist across runSteps calls without Redis.", + "{{global.*}} placeholders will not persist across runSteps calls without a cache provider.", ); } } @@ -262,7 +262,7 @@ export const runSteps = async ({ } // First check if the step is cached on redis - const cachedStep = redis ? await redis.hgetall(`step:${userFlow}:${step.description}`) : {}; + const cachedStep = cache ? await cache.hgetall(`step:${userFlow}:${step.description}`) : {}; if ( !bypassCache && @@ -443,10 +443,10 @@ export const runSteps = async ({ .flatMap((s) => s.toolCalls) .filter((tool) => ["browser_snapshot", "browser_stop"].indexOf(tool.toolName) === -1); - if (allToolCalls.length === 1 && redis) { + if (allToolCalls.length === 1 && cache) { const cacheData = getPendingCacheData(); if (cacheData) { - await redis.hset(`step:${userFlow}:${step.description}`, cacheData); + await cache.hset(`step:${userFlow}:${step.description}`, cacheData); logger.debug(`Cached step action: ${step.description}`); } } @@ -712,4 +712,5 @@ export { extractEmailContent, generateEmail } from "./email"; export { assert } from "./assertion"; export type { AssertionResult } from "./types"; +export type { CacheStore } from "./cache"; export { PassmarkError, StepExecutionError, ValidationError, AIModelError, CacheError, ConfigurationError } from "./errors"; diff --git a/src/redis.ts b/src/redis.ts deleted file mode 100644 index bf3f34d..0000000 --- a/src/redis.ts +++ /dev/null @@ -1,14 +0,0 @@ -import Redis from "ioredis"; -import { logger } from "./logger"; - -let redis: Redis | null = null; - -if (process.env.REDIS_URL) { - redis = new Redis(process.env.REDIS_URL); -} else { - logger.warn( - "REDIS_URL not set. Step caching, global placeholders, and project data are disabled.", - ); -} - -export { redis }; diff --git a/src/types.ts b/src/types.ts index abd4597..27d7d9e 100644 --- a/src/types.ts +++ b/src/types.ts @@ -111,7 +111,7 @@ export type RunStepsOptions = { /** * Execution ID to link multiple runSteps calls together. - * When provided, {{global.*}} placeholders are persisted to Redis + * When provided, {{global.*}} placeholders are persisted to the cache * and shared across all runSteps calls with the same executionId. * Required when using {{global.*}} placeholders. */