diff --git a/packages/cli/src/telemetry/system.ts b/packages/cli/src/telemetry/system.ts index 3fc37ffc7..04a80cd25 100644 --- a/packages/cli/src/telemetry/system.ts +++ b/packages/cli/src/telemetry/system.ts @@ -1,6 +1,7 @@ -import { cpus, totalmem, freemem, platform, release } from "node:os"; +import { cpus, freemem, platform, release } from "node:os"; import { existsSync, readFileSync, statfsSync } from "node:fs"; import { execSync } from "node:child_process"; +import { getSystemTotalMb } from "@hyperframes/engine"; import { detectAgentRuntime, detectSandboxRuntime, @@ -67,7 +68,7 @@ export function getSystemMeta(): SystemMeta { cpu_count: cpuInfo.length, cpu_model: firstCpu?.model?.trim() ?? null, cpu_speed: firstCpu?.speed ?? null, - memory_total_mb: bytesToMb(totalmem()), + memory_total_mb: getSystemTotalMb(), is_docker: detectDocker(), is_ci: detectCI(), ci_name: getCIName(), diff --git a/packages/cli/tsconfig.json b/packages/cli/tsconfig.json index 4908e2c64..28a84e7d4 100644 --- a/packages/cli/tsconfig.json +++ b/packages/cli/tsconfig.json @@ -7,7 +7,8 @@ "paths": { "@hyperframes/producer": ["../producer/src/index.ts"], "@hyperframes/producer/distributed": ["../producer/src/distributed.ts"], - "@hyperframes/aws-lambda/sdk": ["../aws-lambda/src/sdk/index.ts"] + "@hyperframes/aws-lambda/sdk": ["../aws-lambda/src/sdk/index.ts"], + "@hyperframes/gcp-cloud-run/sdk": ["../gcp-cloud-run/src/sdk/index.ts"] }, "strict": true, "noUncheckedIndexedAccess": true, diff --git a/packages/engine/src/services/parallelCoordinator.ts b/packages/engine/src/services/parallelCoordinator.ts index 45a3ffbbc..2e844c725 100644 --- a/packages/engine/src/services/parallelCoordinator.ts +++ b/packages/engine/src/services/parallelCoordinator.ts @@ -5,7 +5,7 @@ * Auto-detects optimal worker count based on CPU/memory. */ -import { cpus, freemem, totalmem } from "os"; +import { cpus, freemem } from "os"; import { existsSync, mkdirSync, readdirSync } from "fs"; import { copyFile, rename } from "fs/promises"; import { join } from "path"; @@ -26,6 +26,7 @@ import { DEFAULT_CONFIG, type EngineConfig } from "../config.js"; import { assertSwiftShader } from "../utils/assertSwiftShader.js"; import { readWebGlVendorInfoFromCanvas } from "../utils/readWebGlVendorInfoFromCanvas.js"; import { resolveHeadlessShellPath } from "./browserManager.js"; +import { getSystemTotalMb } from "./systemMemory.js"; export interface WorkerTask { workerId: number; @@ -153,7 +154,7 @@ export function calculateOptimalWorkers( // Use total memory instead of free memory — macOS reports misleadingly low // freemem() because it aggressively caches files in "inactive" memory that // is immediately reclaimable. - const totalMemoryMB = Math.round(totalmem() / (1024 * 1024)); + const totalMemoryMB = getSystemTotalMb(); const memoryBasedWorkers = Math.max(1, Math.floor((totalMemoryMB * 0.5) / MEMORY_PER_WORKER_MB)); const frameBasedWorkers = Math.floor(totalFrames / MIN_FRAMES_PER_WORKER); @@ -429,7 +430,7 @@ export function getSystemResources(): { } { return { cpuCores: cpus().length, - totalMemoryMB: Math.round(totalmem() / (1024 * 1024)), + totalMemoryMB: getSystemTotalMb(), freeMemoryMB: Math.round(freemem() / (1024 * 1024)), recommendedWorkers: calculateOptimalWorkers(1000), }; diff --git a/packages/engine/src/services/systemMemory.test.ts b/packages/engine/src/services/systemMemory.test.ts index f4489ba24..0d6911f97 100644 --- a/packages/engine/src/services/systemMemory.test.ts +++ b/packages/engine/src/services/systemMemory.test.ts @@ -1,5 +1,95 @@ -import { describe, it, expect } from "vitest"; -import { isLowMemorySystem, LOW_MEMORY_TOTAL_MB_THRESHOLD } from "./systemMemory.js"; +import { afterEach, beforeEach, describe, it, expect, vi } from "vitest"; +import { + _resetCgroupLimitCacheForTests, + isLowMemorySystem, + LOW_MEMORY_TOTAL_MB_THRESHOLD, + parseCgroupLimitMb, +} from "./systemMemory.js"; + +const BYTES_PER_MIB = 1024 * 1024; +const CGROUP_V2_MEMORY_MAX_PATH = "/sys/fs/cgroup/memory.max"; +const CGROUP_V1_MEMORY_LIMIT_PATH = "/sys/fs/cgroup/memory/memory.limit_in_bytes"; + +type SystemMemoryModule = typeof import("./systemMemory.js"); + +type MockSystemMemoryOptions = { + files?: Record; + hostTotalMb?: number; + platform?: NodeJS.Platform; + readErrors?: Record; + onRead?: (path: string) => void; + throwOnFileRead?: boolean; +}; + +function stubPlatform(platform: NodeJS.Platform): () => void { + const descriptor = Object.getOwnPropertyDescriptor(process, "platform"); + Object.defineProperty(process, "platform", { value: platform }); + + return () => { + if (descriptor) { + Object.defineProperty(process, "platform", descriptor); + } + }; +} + +async function withSystemMemoryMocks( + options: MockSystemMemoryOptions, + run: (systemMemory: SystemMemoryModule) => void | Promise, +): Promise { + const { + files = {}, + hostTotalMb = 32768, + platform = "linux", + readErrors = {}, + onRead, + throwOnFileRead = false, + } = options; + const restorePlatform = stubPlatform(platform); + + vi.resetModules(); + vi.doMock("os", () => ({ + totalmem: () => hostTotalMb * BYTES_PER_MIB, + })); + vi.doMock("fs", () => ({ + readFileSync: (path: string) => { + onRead?.(path); + + if (throwOnFileRead) { + throw new Error(`/sys read should not happen: ${path}`); + } + + if (path in readErrors) { + throw readErrors[path]; + } + + if (path in files) { + return files[path]; + } + + throw Object.assign(new Error("missing cgroup file"), { code: "ENOENT" }); + }, + })); + + try { + const systemMemory = await import("./systemMemory.js"); + systemMemory._resetCgroupLimitCacheForTests(); + await run(systemMemory); + } finally { + vi.doUnmock("fs"); + vi.doUnmock("os"); + vi.resetModules(); + restorePlatform(); + } +} + +beforeEach(() => { + _resetCgroupLimitCacheForTests(); +}); + +afterEach(() => { + _resetCgroupLimitCacheForTests(); + vi.restoreAllMocks(); +}); describe("isLowMemorySystem", () => { it("treats sub-threshold RAM as low-memory", () => { @@ -20,4 +110,215 @@ describe("isLowMemorySystem", () => { expect(isLowMemorySystem(16384)).toBe(false); expect(isLowMemorySystem(65536)).toBe(false); }); + + it("treats a 4 GiB cgroup v2 limit on a 32 GiB host as low-memory", async () => { + await withSystemMemoryMocks( + { + files: { + [CGROUP_V2_MEMORY_MAX_PATH]: `${4096 * BYTES_PER_MIB}`, + }, + }, + ({ getSystemTotalMb, isLowMemorySystem }) => { + expect(getSystemTotalMb()).toBe(4096); + expect(isLowMemorySystem()).toBe(true); + }, + ); + }); +}); + +describe("parseCgroupLimitMb", () => { + it("parses cgroup v2 numeric limits", () => { + expect(parseCgroupLimitMb(`${4096 * BYTES_PER_MIB}`, null)).toBe(4096); + }); + + it('ignores cgroup v2 "max" limits', () => { + expect(parseCgroupLimitMb("max", null)).toBeNull(); + }); + + it("parses cgroup v1 numeric limits and ignores no-limit sentinels", () => { + expect(parseCgroupLimitMb(null, `${6144 * BYTES_PER_MIB}`)).toBe(6144); + expect(parseCgroupLimitMb(null, "9223372036854771712")).toBeNull(); + }); + + it("ignores absent and malformed limits", () => { + expect(parseCgroupLimitMb(null, null)).toBeNull(); + + for (const content of ["", "garbage", "-1", "0"]) { + expect(parseCgroupLimitMb(content, null)).toBeNull(); + expect(parseCgroupLimitMb(null, content)).toBeNull(); + } + }); + + it("uses cgroup v2 when both v2 and v1 contents are present", () => { + expect(parseCgroupLimitMb(`${4096 * BYTES_PER_MIB}`, `${2048 * BYTES_PER_MIB}`)).toBe(4096); + }); +}); + +describe("getSystemTotalMb", () => { + it("caches cgroup probes until the test reset hook clears the cache", async () => { + const readCalls: string[] = []; + const files = { + [CGROUP_V2_MEMORY_MAX_PATH]: `${4096 * BYTES_PER_MIB}`, + }; + + await withSystemMemoryMocks( + { + files, + onRead: (path) => readCalls.push(path), + }, + ({ _resetCgroupLimitCacheForTests, getSystemTotalMb }) => { + expect(getSystemTotalMb()).toBe(4096); + expect(getSystemTotalMb()).toBe(4096); + expect(readCalls).toEqual([CGROUP_V2_MEMORY_MAX_PATH]); + + files[CGROUP_V2_MEMORY_MAX_PATH] = `${2048 * BYTES_PER_MIB}`; + _resetCgroupLimitCacheForTests(); + + expect(getSystemTotalMb()).toBe(2048); + expect(readCalls).toEqual([CGROUP_V2_MEMORY_MAX_PATH, CGROUP_V2_MEMORY_MAX_PATH]); + }, + ); + }); + + it('uses the host total when cgroup v2 reports "max"', async () => { + await withSystemMemoryMocks( + { + files: { + [CGROUP_V2_MEMORY_MAX_PATH]: "max", + }, + }, + ({ getSystemTotalMb, isLowMemorySystem }) => { + expect(getSystemTotalMb()).toBe(32768); + expect(isLowMemorySystem()).toBe(false); + }, + ); + }); + + it("honors cgroup v1 numeric limits when cgroup v2 is absent", async () => { + await withSystemMemoryMocks( + { + files: { + [CGROUP_V1_MEMORY_LIMIT_PATH]: `${6144 * BYTES_PER_MIB}`, + }, + }, + ({ getSystemTotalMb, isLowMemorySystem }) => { + expect(getSystemTotalMb()).toBe(6144); + expect(isLowMemorySystem()).toBe(true); + }, + ); + }); + + it("uses the host total when cgroup v1 reports a no-limit sentinel", async () => { + await withSystemMemoryMocks( + { + files: { + [CGROUP_V1_MEMORY_LIMIT_PATH]: "9223372036854771712", + }, + }, + ({ getSystemTotalMb, isLowMemorySystem }) => { + expect(getSystemTotalMb()).toBe(32768); + expect(isLowMemorySystem()).toBe(false); + }, + ); + }); + + it("uses the host total when cgroup files are absent", async () => { + await withSystemMemoryMocks({}, ({ getSystemTotalMb, isLowMemorySystem }) => { + expect(getSystemTotalMb()).toBe(32768); + expect(isLowMemorySystem()).toBe(false); + }); + }); + + it("warns once and uses the host total when a cgroup file is unreadable", async () => { + const warn = vi.spyOn(console, "warn").mockImplementation(() => {}); + await withSystemMemoryMocks( + { + readErrors: { + [CGROUP_V2_MEMORY_MAX_PATH]: Object.assign(new Error("permission denied"), { + code: "EACCES", + }), + }, + }, + ({ getSystemTotalMb, isLowMemorySystem }) => { + expect(getSystemTotalMb()).toBe(32768); + expect(getSystemTotalMb()).toBe(32768); + expect(isLowMemorySystem()).toBe(false); + expect(warn).toHaveBeenCalledTimes(1); + expect(warn.mock.calls[0]?.[0]).toContain( + "[SystemMemory] Unable to read cgroup memory limit", + ); + expect(warn.mock.calls[0]?.[0]).toContain("EACCES"); + }, + ); + }); + + it("stays silent when cgroup files are absent", async () => { + const warn = vi.spyOn(console, "warn").mockImplementation(() => {}); + await withSystemMemoryMocks({}, ({ getSystemTotalMb }) => { + expect(getSystemTotalMb()).toBe(32768); + expect(getSystemTotalMb()).toBe(32768); + expect(warn).not.toHaveBeenCalled(); + }); + }); + + it.each(["", "garbage", "-1", "0"])( + "uses the host total for malformed cgroup v2 content %j", + async (content) => { + await withSystemMemoryMocks( + { + files: { + [CGROUP_V2_MEMORY_MAX_PATH]: content, + }, + }, + ({ getSystemTotalMb, isLowMemorySystem }) => { + expect(getSystemTotalMb()).toBe(32768); + expect(isLowMemorySystem()).toBe(false); + }, + ); + }, + ); + + it.each(["", "garbage", "-1", "0"])( + "uses the host total for malformed cgroup v1 content %j", + async (content) => { + await withSystemMemoryMocks( + { + files: { + [CGROUP_V1_MEMORY_LIMIT_PATH]: content, + }, + }, + ({ getSystemTotalMb, isLowMemorySystem }) => { + expect(getSystemTotalMb()).toBe(32768); + expect(isLowMemorySystem()).toBe(false); + }, + ); + }, + ); + + it("uses the host total when a cgroup limit is larger than host RAM", async () => { + await withSystemMemoryMocks( + { + files: { + [CGROUP_V2_MEMORY_MAX_PATH]: `${65536 * BYTES_PER_MIB}`, + }, + }, + ({ getSystemTotalMb, isLowMemorySystem }) => { + expect(getSystemTotalMb()).toBe(32768); + expect(isLowMemorySystem()).toBe(false); + }, + ); + }); + + it("does not read cgroup files on non-Linux platforms", async () => { + await withSystemMemoryMocks( + { + platform: "darwin", + throwOnFileRead: true, + }, + ({ getSystemTotalMb, isLowMemorySystem }) => { + expect(getSystemTotalMb()).toBe(32768); + expect(isLowMemorySystem()).toBe(false); + }, + ); + }); }); diff --git a/packages/engine/src/services/systemMemory.ts b/packages/engine/src/services/systemMemory.ts index 0757dc1a5..8ac4d2212 100644 --- a/packages/engine/src/services/systemMemory.ts +++ b/packages/engine/src/services/systemMemory.ts @@ -8,11 +8,140 @@ * it lives here once instead of being re-derived inline. */ +import { readFileSync } from "fs"; import { totalmem } from "os"; +const BYTES_PER_MIB = 1024 * 1024; +const BYTES_PER_MIB_BIGINT = BigInt(BYTES_PER_MIB); +// These are the paths as seen from INSIDE a container, where the runtime +// mounts the container's own cgroup at the namespace root — the case this +// probe exists for. They are deliberately not resolved via /proc/self/cgroup: +// on a bare host under systemd the process's real limit may live in a nested +// slice (e.g. /sys/fs/cgroup/user.slice/.../memory.max) that these root paths +// don't see, and that's acceptable — bare hosts are covered by total-RAM +// detection, and chasing nested slices adds fragility for no container gain. +const CGROUP_V2_MEMORY_MAX_PATH = "/sys/fs/cgroup/memory.max"; +const CGROUP_V1_MEMORY_LIMIT_PATH = "/sys/fs/cgroup/memory/memory.limit_in_bytes"; +// Kernel no-limit sentinel is page-rounded 2^63-1 (~9223372036854771712); >= 2^60 is implausible as a real limit. +const CGROUP_V1_NO_LIMIT_CUTOFF_BYTES = 2n ** 60n; + +let _cachedCgroupLimitMb: number | null | undefined; +let _warnedCgroupReadFailure = false; + +/** Parse cgroup v2/v1 memory limits from sysfs file contents into MiB. */ +export function parseCgroupLimitMb( + v2Content: string | null, + v1Content: string | null, +): number | null { + if (v2Content !== null) { + return parseCgroupV2LimitMb(v2Content); + } + + return parseCgroupV1LimitMb(v1Content); +} + +function parseCgroupV2LimitMb(content: string): number | null { + const trimmed = content.trim(); + if (trimmed === "max") { + return null; + } + + return parsePositiveByteLimitMb(trimmed); +} + +function parseCgroupV1LimitMb(content: string | null): number | null { + if (content === null) { + return null; + } + + return parsePositiveByteLimitMb(content.trim(), CGROUP_V1_NO_LIMIT_CUTOFF_BYTES); +} + +function parsePositiveByteLimitMb(content: string, noLimitCutoffBytes?: bigint): number | null { + if (!/^\d+$/.test(content)) { + return null; + } + + const bytes = BigInt(content); + if (bytes <= 0n) { + return null; + } + + if (noLimitCutoffBytes !== undefined && bytes >= noLimitCutoffBytes) { + return null; + } + + return Number(bytes / BYTES_PER_MIB_BIGINT); +} + +/** Test-only: reset the cached cgroup memory probe. */ +export function _resetCgroupLimitCacheForTests(): void { + _cachedCgroupLimitMb = undefined; + _warnedCgroupReadFailure = false; +} + +function getCgroupLimitMb(): number | null { + if (_cachedCgroupLimitMb !== undefined) return _cachedCgroupLimitMb; + + if (process.platform !== "linux") { + _cachedCgroupLimitMb = null; + return null; + } + + const v2Content = readCgroupFile(CGROUP_V2_MEMORY_MAX_PATH); + const v1Content = v2Content === null ? readCgroupFile(CGROUP_V1_MEMORY_LIMIT_PATH) : null; + + _cachedCgroupLimitMb = parseCgroupLimitMb(v2Content, v1Content); + if (_cachedCgroupLimitMb !== null) { + console.info( + `[SystemMemory] cgroup memory limit detected: ${_cachedCgroupLimitMb} MiB — ` + + `it governs memory-adaptive render behaviour instead of host RAM.`, + ); + } + return _cachedCgroupLimitMb; +} + +function readCgroupFile(path: string): string | null { + try { + return readFileSync(path, "utf8"); + } catch (error) { + const code = getErrorCode(error); + if (code !== "ENOENT" && code !== "ENOTDIR") { + warnCgroupReadFailure(path, error); + } + return null; + } +} + +function getErrorCode(error: unknown): string | undefined { + if (typeof error !== "object" || error === null || !("code" in error)) { + return undefined; + } + + return typeof error.code === "string" ? error.code : undefined; +} + +function formatCgroupReadError(error: unknown): string { + const message = error instanceof Error ? error.message : String(error); + const code = getErrorCode(error); + return code ? `${code}: ${message}` : message; +} + +function warnCgroupReadFailure(path: string, error: unknown): void { + if (_warnedCgroupReadFailure) return; + _warnedCgroupReadFailure = true; + console.warn( + `[SystemMemory] Unable to read cgroup memory limit at ${path} ` + + `(${formatCgroupReadError(error)}); falling back to host RAM.`, + ); +} + /** Total physical RAM in MiB. */ export function getSystemTotalMb(): number { - return Math.floor(totalmem() / (1024 * 1024)); + const hostTotalMb = Math.floor(totalmem() / BYTES_PER_MIB); + const cgroupLimitMb = getCgroupLimitMb(); + + return cgroupLimitMb === null ? hostTotalMb : Math.min(hostTotalMb, cgroupLimitMb); } /** @@ -38,14 +167,13 @@ export const LOW_MEMORY_TOTAL_MB_THRESHOLD = 8192; * survive". Accepts an explicit `totalMb` so callers (and tests) can pass * a known value instead of re-probing. * - * Caveat: `os.totalmem()` reports the *host's* physical RAM, not a - * cgroup/container memory limit. A 4 GB container on a 32 GB host will not - * auto-flag as low-memory, and an 8 GB container on a 64 GB host won't - * either. Containerised and serverless callers (Docker `--docker` renders, - * Lambda) that want a specific profile should set `PRODUCER_LOW_MEMORY_MODE` - * explicitly rather than relying on auto-detection. Hosts whose *total* RAM - * is genuinely <= the threshold (laptops, small VMs, small Lambda tiers) are - * detected correctly regardless of container nesting. + * Caveat: Linux cgroup v1/v2 memory limits are consulted when readable, so + * Docker and serverless runtimes, including Lambda tiers with readable cgroup + * ceilings, inherit the tighter container limit instead of the host's physical + * RAM. Environments that hide cgroup files should set + * `PRODUCER_LOW_MEMORY_MODE` explicitly rather than relying on auto-detection. + * Hosts whose *effective* total RAM is genuinely <= the threshold (laptops, + * small VMs, small Lambda tiers, small containers) are detected correctly. */ export function isLowMemorySystem(totalMb: number = getSystemTotalMb()): boolean { return totalMb <= LOW_MEMORY_TOTAL_MB_THRESHOLD;