diff --git a/.changeset/easy-bananas-carry.md b/.changeset/easy-bananas-carry.md new file mode 100644 index 00000000..ea2dd84c --- /dev/null +++ b/.changeset/easy-bananas-carry.md @@ -0,0 +1,6 @@ +--- +"@nodesecure/tarball": major +"@nodesecure/scanner": minor +--- + +Implement new major JS-X-Ray API and completely refactor tarball package diff --git a/package-lock.json b/package-lock.json index 94df14e4..c3834f47 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11648,7 +11648,7 @@ "dependencies": { "@nodesecure/conformance": "^1.0.0", "@nodesecure/fs-walk": "^2.0.0", - "@nodesecure/js-x-ray": "^8.2.0", + "@nodesecure/js-x-ray": "^9.0.0", "@nodesecure/mama": "^1.5.0", "@nodesecure/npm-types": "^1.2.0", "@nodesecure/utils": "^2.3.0", @@ -11658,6 +11658,31 @@ "get-folder-size": "^5.0.0" } }, + "workspaces/tarball/node_modules/@nodesecure/js-x-ray": { + "version": "9.0.0", + "resolved": "https://registry.npmjs.org/@nodesecure/js-x-ray/-/js-x-ray-9.0.0.tgz", + "integrity": "sha512-yAE/b8BqcR5Lqw7ooIRVAgIckntbO9cwWuZkdb4nhqHu0c+EbMTHN03++TKtjhKW+12vIpcsDaC5EPN3jI+FGQ==", + "license": "MIT", + "workspaces": [ + "workspaces/estree-ast-utils", + "workspaces/sec-literal", + "workspaces/ts-source-parser" + ], + "dependencies": { + "@nodesecure/estree-ast-utils": "^1.5.0", + "@nodesecure/sec-literal": "^1.2.0", + "digraph-js": "^2.2.3", + "estree-walker": "^3.0.1", + "frequency-set": "^1.0.2", + "is-minified-code": "^2.0.0", + "meriyah": "^6.0.0", + "safe-regex": "^2.1.1", + "ts-pattern": "^5.0.6" + }, + "engines": { + "node": ">=20.0.0" + } + }, "workspaces/tree-walker": { "name": "@nodesecure/tree-walker", "version": "1.3.0", diff --git a/workspaces/scanner/src/class/TempDirectory.class.ts b/workspaces/scanner/src/class/TempDirectory.class.ts new file mode 100644 index 00000000..f54d8a3d --- /dev/null +++ b/workspaces/scanner/src/class/TempDirectory.class.ts @@ -0,0 +1,37 @@ +// Import Node.js Dependencies +import fs from "node:fs/promises"; +import path from "node:path"; +import os from "node:os"; + +export class TempDirectory { + location: string; + id: string; + + constructor( + location: string, + id: string + ) { + this.location = location; + this.id = id; + } + + static async create() { + const location = await fs.mkdtemp( + path.join(os.tmpdir(), "/") + ); + + return new TempDirectory( + location, + location.slice(-6) + ); + } + + async clear() { + await fs.rm( + this.location, + { recursive: true, force: true } + ); + + return this; + } +} diff --git a/workspaces/scanner/src/depWalker.ts b/workspaces/scanner/src/depWalker.ts index 6b27980b..eb3dabaa 100644 --- a/workspaces/scanner/src/depWalker.ts +++ b/workspaces/scanner/src/depWalker.ts @@ -1,15 +1,18 @@ // Import Node.js Dependencies import path from "node:path"; -import { readFileSync, promises as fs } from "node:fs"; +import { readFileSync } from "node:fs"; import timers from "node:timers/promises"; -import os from "node:os"; // Import Third-party Dependencies import { Mutex, MutexRelease } from "@openally/mutex"; -import { scanDirOrArchive, type ScanDirOrArchiveOptions } from "@nodesecure/tarball"; +import { + extractAndResolve, + scanDirOrArchive +} from "@nodesecure/tarball"; import * as Vulnera from "@nodesecure/vulnera"; import { npm } from "@nodesecure/tree-walker"; import { parseAuthor } from "@nodesecure/utils"; +import { ManifestManager } from "@nodesecure/mama"; import type { ManifestVersion, PackageJSON } from "@nodesecure/npm-types"; // Import Internal Dependencies @@ -20,6 +23,7 @@ import { getManifestLinks } from "./utils/index.js"; import { packageMetadata, manifestMetadata } from "./npmRegistry.js"; +import { TempDirectory } from "./class/TempDirectory.class.js"; import { Logger, ScannerLoggerEvents } from "./class/logger.class.js"; import type { Dependency, @@ -90,11 +94,10 @@ export async function depWalker( registry } = options; - // Create TMP directory - const tmpLocation = await fs.mkdtemp(path.join(os.tmpdir(), "/")); + const tempDir = await TempDirectory.create(); const payload: Partial = { - id: tmpLocation.slice(-6), + id: tempDir.id, rootDependencyName: manifest.name, scannerVersion: packageVersion, vulnerabilityStrategy, @@ -179,10 +182,12 @@ export async function depWalker( const scanDirOptions = { ref: dependency.versions[version] as any, location, - tmpLocation: scanRootNode && name === manifest.name ? null : tmpLocation, + isRootNode: scanRootNode && name === manifest.name, registry }; - operationsQueue.push(scanDirOrArchiveEx(name, version, locker, scanDirOptions)); + operationsQueue.push( + scanDirOrArchiveEx(name, version, locker, tempDir, scanDirOptions) + ); } logger.end(ScannerLoggerEvents.analysis.tree); @@ -279,7 +284,7 @@ export async function depWalker( } finally { await timers.setImmediate(); - await fs.rm(tmpLocation, { recursive: true, force: true }); + await tempDir.clear(); logger.emit(ScannerLoggerEvents.done); } @@ -290,12 +295,33 @@ async function scanDirOrArchiveEx( name: string, version: string, locker: Mutex, - options: ScanDirOrArchiveOptions + tempDir: TempDirectory, + options: { + registry?: string; + isRootNode: boolean; + location: string | undefined; + ref: any; + } ) { const free = await locker.acquire(); try { - await scanDirOrArchive(name, version, options); + const { + registry, + location = process.cwd(), + isRootNode, + ref + } = options; + + const mama = await (isRootNode ? + ManifestManager.fromPackageJSON(location) : + extractAndResolve(tempDir.location, { + spec: `${name}@${version}`, + registry + }) + ); + + await scanDirOrArchive(mama, ref); } catch { // ignore diff --git a/workspaces/scanner/src/index.ts b/workspaces/scanner/src/index.ts index f10acac1..f25b0514 100644 --- a/workspaces/scanner/src/index.ts +++ b/workspaces/scanner/src/index.ts @@ -14,6 +14,7 @@ import type { PackageJSON } from "@nodesecure/npm-types"; import { depWalker } from "./depWalker.js"; import { NPM_TOKEN, urlToString } from "./utils/index.js"; import { Logger, ScannerLoggerEvents } from "./class/logger.class.js"; +import { TempDirectory } from "./class/TempDirectory.class.js"; import { comparePayloads } from "./comparePayloads.js"; import type { Options } from "./types.js"; @@ -87,23 +88,20 @@ export async function verify( return tarball.scanPackage(process.cwd()); } - const tmpLocation = await fs.mkdtemp( - path.join(os.tmpdir(), "nsecure-") - ); - const dest = path.join(tmpLocation, packageName); + const tempDir = await TempDirectory.create(); try { - await pacote.extract(packageName, dest, { - ...NPM_TOKEN, registry: getLocalRegistryURL(), cache: `${os.homedir()}/.npm` + const mama = await tarball.extractAndResolve(tempDir.location, { + spec: packageName, + registry: getLocalRegistryURL() }); - - const scanResult = await tarball.scanPackage(dest, packageName); + const scanResult = await tarball.scanPackage(mama); return scanResult; } finally { await timers.setImmediate(); - await fs.rm(tmpLocation, { recursive: true, force: true }); + await tempDir.clear(); } } diff --git a/workspaces/scanner/test/depWalker.spec.ts b/workspaces/scanner/test/depWalker.spec.ts index d95a8236..a99cbe18 100644 --- a/workspaces/scanner/test/depWalker.spec.ts +++ b/workspaces/scanner/test/depWalker.spec.ts @@ -229,7 +229,10 @@ describe("scanner.cwd()", () => { ); const pkg = dependencies["random-package"]; - assert.strictEqual(pkg.metadata.author, null); + assert.deepEqual(pkg.metadata.author, { + email: "john.doe@gmail.com", + name: "John Doe" + }); }); }); diff --git a/workspaces/tarball/README.md b/workspaces/tarball/README.md index b01bd718..bbf8f9a5 100644 --- a/workspaces/tarball/README.md +++ b/workspaces/tarball/README.md @@ -35,23 +35,21 @@ console.log(scanResult); ## API -### scanDirOrArchive +- [SourceCode](./docs/SourceCode.md) +- [NpmTarball](./docs/NpmTarball.md) -Method created for Scanner (to be refactored soon) +--- -```ts -export interface ScanDirOrArchiveOptions { - ref: DependencyRef; - location?: string; - tmpLocation?: null | string; - locker: Locker; - registry: string; -} -``` +> [!CAUTION] +> The following APIs are considered legacy and are waiting for deprecation in future releases. + +### scanDirOrArchive(locationOrManifest: string | ManifestManager, ref: DependencyRef): Promise< void > -### scanPackage(dest: string, packageName?: string): Promise< ScannedPackageResult > +Scan a given local project or tarball (by providing the path or directly the ManifestManager instance). -Scan a given tarball archive or a local project. +### scanPackage(manifestOrLocation: string | ManifestManager): Promise< ScannedPackageResult > + +Scan a given local project containing a Manifest (package.json). ```ts interface ScannedPackageResult { @@ -68,7 +66,7 @@ interface ScannedPackageResult { /** Unique license contained in the tarball (MIT, ISC ..) */ uniqueLicenseIds: string[]; /** All licenses with their SPDX */ - licenses: ntlp.SpdxLicenseConformance[]; + licenses: conformance.SpdxFileLicenseConformance[]; ast: { dependencies: Record>; warnings: Warning[]; @@ -76,5 +74,9 @@ interface ScannedPackageResult { } ``` +### extractAndResolve(location: string, options: TarballResolutionOptions): Promise< ManifestManager > + +Extract a given remote package. + ## License MIT diff --git a/workspaces/tarball/docs/NpmTarball.md b/workspaces/tarball/docs/NpmTarball.md new file mode 100644 index 00000000..4bcbd592 --- /dev/null +++ b/workspaces/tarball/docs/NpmTarball.md @@ -0,0 +1,40 @@ +# NpmTarball + +## Usage example + +```ts +import { ManifestManager } from "@nodesecure/mama"; +import { NpmTarball } from "@nodesecure/tarball"; + +const mama = await ManifestManager.fromPackageJSON( + location +); +const extractor = new NpmTarball(mama); + +const { + composition, + conformance, + code +} = await extractor.scanFiles(); +``` + +## API + +### constructor(manifest: ManifestManager) + +Create a new NpmTarball instance. + +> [!CAUTION] +> ManifestManager instance must have a location defined + +### scanFiles(): Promise< ScannedFilesResult > + +Scan all the files contained in the tarball and obtain a complete report, including detection of JavaScript threats. + +```ts +interface ScannedFilesResult { + composition: TarballComposition; + conformance: SpdxExtractedResult; + code: SourceCodeReport; +} +``` diff --git a/workspaces/tarball/docs/SourceCode.md b/workspaces/tarball/docs/SourceCode.md new file mode 100644 index 00000000..fed2ceb8 --- /dev/null +++ b/workspaces/tarball/docs/SourceCode.md @@ -0,0 +1,123 @@ +# SourceCode APIs + +The **SourceCode** APIs are designed to extract, collect, and analyze information from JavaScript source files—either from a **package.json** manifest or a tarball archive. These utilities are built on top of `@nodesecure/js-x-ray` and `@nodesecure/mama` to provide a unified interface for static analysis. + +## SourceCodeScanner + +The **SourceCodeScanner** is responsible for orchestrating the analysis of JavaScript files and aggregating results into a report. + +```ts +const mama = await ManifestManager.fromPackageJSON( + location +); + +const scanner = new SourceCodeScanner(mama); + +const report = await scanner.iterate({ + manifest: ["./index.js"], + javascript: ["./index.js", "./test/foobar.js"] +}); +console.log(report); +``` + +### Method: `iterate(entries: SourceCodeEntries): Promise` + +The `iterate` method accepts an object of type `SourceCodeEntries`, which defines the source files to analyze: + +```ts +export interface SourceCodeEntries { + /** + * Source files declared in package.json (e.g. "main", "exports", etc.) + */ + manifest: string[]; + + /** + * All JavaScript source files extracted from the package tarball + */ + javascript: string[]; +} +``` + +* If `manifest` is non-empty, the scanner will prioritize those files. +* If `manifest` is empty, it will fall back to analyzing all JavaScript files. + +### Constructor + +```ts +new SourceCodeScanner(manifest: LocatedManifestManager, options?: SourceCodeScannerOptions) +``` + +Optional `options.reportInitiator` allows you to customize the report type (default is `SourceCodeReport`). + +--- + +## SourceCodeAggregator + +This interface defines the minimal structure required for an object to act as a source code report. + +```ts +export interface SourceCodeAggregator { + readonly consumed: boolean; + + push(report: ReportOnFile & { file: string; }): void; +} +``` + +- `consumed`: A flag indicating whether at least one report was successfully pushed. +- `push(report)`: Method used by the scanner to add a parsed file report to the aggregator. + +You can provide your own implementation of SourceCodeAggregator by passing a custom reportInitiator to the scanner: + +```ts +class MyCustomAggregator implements SourceCodeAggregator { + // Code here +} + +new SourceCodeScanner(mama, { + reportInitiator() { + return new MyCustomAggregator(); + } +}); +``` + +## SourceCodeReport class + +Default implementation of `SourceCodeAggregator`, returned when **no custom report is provided**. + +### Properties + +* `warnings: Warning[]` + List of warnings from all analyzed files, each enriched with the file name. + +* `dependencies: Record>` + Map of dependencies found in each file, organized by file name. + +* `minified: string[]` + List of files detected as minified. + +* `flags: { hasExternalCapacity: boolean }` + Indicates whether any file used external capabilities (like `fetch`). + +* `consumed: boolean` + Whether any file was successfully analyzed and added to the report. + +### Method: `groupAndAnalyseDependencies(mama: ManifestManager): {...}` + +Groups and analyzes the collected dependencies to identify various dependency types: + +```ts +{ + files: Set, + dependenciesInTryBlock: string[], + dependencies: { + nodejs: string[], + subpathImports: string[], + thirdparty: string[], + missing: string[], + unused: string[] + }, + flags: { + // Additional flags such as presence of deep imports, optional deps, etc. + } +} +``` diff --git a/workspaces/tarball/package.json b/workspaces/tarball/package.json index 9a8dcb69..d5ec25f9 100644 --- a/workspaces/tarball/package.json +++ b/workspaces/tarball/package.json @@ -6,7 +6,7 @@ "exports": "./dist/index.js", "types": "./dist/index.d.ts", "scripts": { - "build": "tsc -b", + "build": "tsc", "prepublishOnly": "npm run build", "test-only": "tsx --test ./test/**/*.spec.ts", "test": "c8 -r html npm run test-only" @@ -31,7 +31,7 @@ "dependencies": { "@nodesecure/conformance": "^1.0.0", "@nodesecure/fs-walk": "^2.0.0", - "@nodesecure/js-x-ray": "^8.2.0", + "@nodesecure/js-x-ray": "^9.0.0", "@nodesecure/mama": "^1.5.0", "@nodesecure/npm-types": "^1.2.0", "@nodesecure/utils": "^2.3.0", diff --git a/workspaces/tarball/src/class/NpmTarball.class.ts b/workspaces/tarball/src/class/NpmTarball.class.ts new file mode 100644 index 00000000..7039cba5 --- /dev/null +++ b/workspaces/tarball/src/class/NpmTarball.class.ts @@ -0,0 +1,75 @@ +// Import Node.js Dependencies +import path from "node:path"; + +// Import Third-party Dependencies +import * as conformance from "@nodesecure/conformance"; +import { + ManifestManager, + type LocatedManifestManager +} from "@nodesecure/mama"; + +// Import Internal Dependencies +import { + SourceCodeReport, + SourceCodeScanner +} from "./SourceCodeScanner.class.js"; +import { + getTarballComposition, + type TarballComposition +} from "../utils/index.js"; + +export interface ScannedFilesResult { + composition: TarballComposition; + conformance: conformance.SpdxExtractedResult; + code: SourceCodeReport; +} + +export class NpmTarball { + static JS_EXTENSIONS = new Set([".js", ".mjs", ".cjs"]); + + manifest: LocatedManifestManager; + + constructor( + mama: ManifestManager + ) { + if (!ManifestManager.isLocated(mama)) { + throw new Error("ManifestManager must have a location"); + } + + this.manifest = mama; + } + + async scanFiles(): Promise { + const location = this.manifest.location; + const [ + composition, + spdx + ] = await Promise.all([ + getTarballComposition(location), + conformance.extractLicenses(location) + ]); + + const code = await new SourceCodeScanner(this.manifest).iterate({ + manifest: [...this.manifest.getEntryFiles()] + .flatMap(filterJavaScriptFiles()), + javascript: composition.files + .flatMap(filterJavaScriptFiles()) + }); + + return { + conformance: spdx, + composition, + code + }; + } +} + +function filterJavaScriptFiles() { + return (file: string) => { + if (NpmTarball.JS_EXTENSIONS.has(path.extname(file))) { + return file; + } + + return []; + }; +} diff --git a/workspaces/tarball/src/class/SourceCodeScanner.class.ts b/workspaces/tarball/src/class/SourceCodeScanner.class.ts new file mode 100644 index 00000000..85a8df52 --- /dev/null +++ b/workspaces/tarball/src/class/SourceCodeScanner.class.ts @@ -0,0 +1,220 @@ +// Import Node.js Dependencies +import path from "node:path"; + +// Import Third-party Dependencies +import { + EntryFilesAnalyser, + AstAnalyser, + type Warning, + type Dependency, + type ReportOnFile +} from "@nodesecure/js-x-ray"; +import { + ManifestManager, + type LocatedManifestManager +} from "@nodesecure/mama"; + +// Import Internal Dependencies +import { + filterDependencyKind, + analyzeDependencies +} from "../utils/index.js"; + +export interface SourceCodeAggregator { + readonly consumed: boolean; + + push(report: ReportOnFile & { file: string; }): void; +} + +export interface SourceCodeEntries { + /** + * Source files from package.json + */ + manifest: string[]; + /** + * All JavaScript source files from tarball + */ + javascript: string[]; +} + +export class SourceCodeReport implements SourceCodeAggregator { + #isConsumed = false; + + warnings: Warning[] = []; + dependencies: Record< + string, + Record + > = Object.create(null); + minified: string[] = []; + flags = { + hasExternalCapacity: false + }; + + get consumed() { + return this.#isConsumed; + } + + push( + report: ReportOnFile & { file: string; } + ) { + this.#isConsumed = true; + this.warnings.push( + ...report.warnings.map((warning) => { + return { ...warning, file: report.file }; + }) + ); + + if (report.ok) { + if (report.flags.has("fetch")) { + this.flags.hasExternalCapacity = true; + } + this.dependencies[report.file] = Object.fromEntries( + report.dependencies + ); + report.flags.has("is-minified") && this.minified.push(report.file); + } + } + + groupAndAnalyseDependencies( + mama: ManifestManager + ) { + const files = new Set(); + const dependencies = new Set(); + const dependenciesInTryBlock = new Set(); + + for (const [file, fileDeps] of Object.entries(this.dependencies)) { + const filtered = filterDependencyKind( + [...Object.keys(fileDeps)], + path.dirname(file) + ); + + [...Object.entries(fileDeps)] + .flatMap(([name, dependency]) => (dependency.inTry ? [name] : [])) + .forEach((name) => dependenciesInTryBlock.add(name)); + + filtered.packages.forEach((name) => dependencies.add(name)); + filtered.files.forEach((file) => files.add(file)); + } + + const { + nodeDependencies, + thirdPartyDependencies, + subpathImportsDependencies, + missingDependencies, + unusedDependencies, + flags + } = analyzeDependencies( + [...dependencies], + { mama, tryDependencies: dependenciesInTryBlock } + ); + + return { + files, + dependenciesInTryBlock: [...dependenciesInTryBlock], + dependencies: { + nodejs: nodeDependencies, + subpathImports: subpathImportsDependencies, + thirdparty: thirdPartyDependencies, + missing: missingDependencies, + unused: unusedDependencies + }, + flags + }; + } +} + +export interface SourceCodeScannerOptions { + reportInitiator?: () => T; +} + +export class SourceCodeScanner< + T extends SourceCodeAggregator = SourceCodeReport +> { + #astAnalyser = new AstAnalyser(); + #initNewReport: () => T; + + manifest: LocatedManifestManager; + + constructor( + manifest: LocatedManifestManager, + options: SourceCodeScannerOptions = {} + ) { + const { + reportInitiator = () => new SourceCodeReport() + } = options; + + this.manifest = manifest; + this.#initNewReport = reportInitiator as () => T; + } + + async iterate( + entries: SourceCodeEntries + ): Promise { + if ( + entries.manifest.length === 0 && + entries.javascript.length === 0 + ) { + throw new Error("You must provide at least one file either in manifest or javascript"); + } + + return entries.manifest.length > 0 ? + this.#iterateWithEntries(entries) : + this.#iterateAll(entries.javascript); + } + + async #iterateWithEntries( + entries: SourceCodeEntries + ): Promise { + const report = this.#initNewReport(); + const { location } = this.manifest; + + const efa = new EntryFilesAnalyser({ + astAnalyzer: this.#astAnalyser, + rootPath: location, + ignoreENOENT: true + }); + + const absoluteEntryFiles = entries.manifest.map( + (filePath) => path.join(location, filePath) + ); + + for await (const fileReport of efa.analyse(absoluteEntryFiles)) { + report.push(fileReport); + } + + return report.consumed ? + report : + this.#iterateAll(entries.javascript); + } + + async #iterateAll( + sourceFiles: string[] + ): Promise { + if (sourceFiles.length === 0) { + throw new Error("You must provide at least one javascript source file"); + } + + const { + location, + document: { name: packageName, type } + } = this.manifest; + const report = this.#initNewReport(); + + await Promise.allSettled( + sourceFiles.map(async(relativeFile) => { + const filePath = path.join(location, relativeFile); + const fileReport = await this.#astAnalyser.analyseFile( + filePath, + { + packageName, + module: type === "module" + } + ); + + report.push({ ...fileReport, file: relativeFile }); + }) + ); + + return report; + } +} diff --git a/workspaces/tarball/src/index.ts b/workspaces/tarball/src/index.ts index fc40d2a4..1b0477e5 100644 --- a/workspaces/tarball/src/index.ts +++ b/workspaces/tarball/src/index.ts @@ -1 +1,2 @@ export * from "./tarball.js"; +export * from "./class/NpmTarball.class.js"; diff --git a/workspaces/tarball/src/sast/file.ts b/workspaces/tarball/src/sast/file.ts deleted file mode 100644 index 0ddefd21..00000000 --- a/workspaces/tarball/src/sast/file.ts +++ /dev/null @@ -1,93 +0,0 @@ -// Import Node.js Dependencies -import path from "node:path"; - -// Import Third-party Dependencies -import { - AstAnalyser, - type WarningName, - type WarningDefault -} from "@nodesecure/js-x-ray"; - -// Import Internal Dependencies -import { - filterDependencyKind -} from "../utils/index.js"; - -// CONSTANTS -const kJsExtname = new Set([".js", ".mjs", ".cjs"]); - -export interface ScanFileReport { - file: string; - warnings: (Omit, "value"> & { file: string; })[]; - isMinified: boolean; - tryDependencies: string[]; - dependencies: string[]; - filesDependencies: string[]; - filesFlags: { - hasExternalCapacity: boolean; - }; -} - -export async function scanFile( - destination: string, - file: string, - packageName: string -): Promise { - const result = await new AstAnalyser().analyseFile( - path.join(destination, file), - { - packageName - } - ); - - const warnings = result.warnings.map((curr) => Object.assign({}, curr, { file })); - if (result.ok) { - const { packages, files } = filterDependencyKind( - [...result.dependencies.keys()], - path.dirname(file) - ); - - const tryDependencies = [...result.dependencies.entries()] - .flatMap(([name, dependency]) => (dependency.inTry ? [name] : [])); - - return { - file, - warnings, - isMinified: result.isMinified, - tryDependencies, - dependencies: packages, - filesDependencies: files, - filesFlags: { - hasExternalCapacity: result.flags.has("fetch") - } - }; - } - - return { - file, - warnings, - isMinified: false, - tryDependencies: [], - dependencies: [], - filesDependencies: [], - filesFlags: { - hasExternalCapacity: false - } - }; -} - -export async function scanManyFiles( - files: string[], - destination: string, - packageName: string -): Promise { - const scannedFiles = await Promise.allSettled( - files - .filter((fileName) => kJsExtname.has(path.extname(fileName))) - .map((file) => scanFile(destination, file, packageName)) - ); - - return scannedFiles - .filter((result) => result.status === "fulfilled") - .map((result) => result.value); -} diff --git a/workspaces/tarball/src/sast/index.ts b/workspaces/tarball/src/sast/index.ts deleted file mode 100644 index 4864586c..00000000 --- a/workspaces/tarball/src/sast/index.ts +++ /dev/null @@ -1 +0,0 @@ -export * from "./file.js"; diff --git a/workspaces/tarball/src/tarball.ts b/workspaces/tarball/src/tarball.ts index c7901ded..b0620c13 100644 --- a/workspaces/tarball/src/tarball.ts +++ b/workspaces/tarball/src/tarball.ts @@ -4,26 +4,23 @@ import os from "node:os"; // Import Third-party Dependencies import { - AstAnalyser, type Warning, type Dependency } from "@nodesecure/js-x-ray"; -import pacote from "pacote"; import * as conformance from "@nodesecure/conformance"; import { ManifestManager, type PackageModuleType } from "@nodesecure/mama"; +import pacote from "pacote"; // Import Internal Dependencies import { - getTarballComposition, isSensitiveFile, - analyzeDependencies, booleanToFlags } from "./utils/index.js"; +import { NpmTarball } from "./class/NpmTarball.class.js"; import * as warnings from "./warnings.js"; -import * as sast from "./sast/index.js"; export interface DependencyRef { id: number; @@ -57,53 +54,25 @@ export interface DependencyRef { } // CONSTANTS -const NPM_TOKEN = typeof process.env.NODE_SECURE_TOKEN === "string" ? +const kNativeCodeExtensions = new Set([".gyp", ".c", ".cpp", ".node", ".so", ".h"]); +const kNpmToken = typeof process.env.NODE_SECURE_TOKEN === "string" ? { token: process.env.NODE_SECURE_TOKEN } : {}; -const kNativeCodeExtensions = new Set([".gyp", ".c", ".cpp", ".node", ".so", ".h"]); -const kJsExtname = new Set([".js", ".mjs", ".cjs"]); - -export interface ScanDirOrArchiveOptions { - ref: DependencyRef; - location?: string; - tmpLocation?: null | string; - registry: string; -} - export async function scanDirOrArchive( - name: string, - version: string, - options: ScanDirOrArchiveOptions -) { - const { ref, location = process.cwd(), tmpLocation = null, registry } = options; - - const isNpmTarball = !(tmpLocation === null); - const dest = isNpmTarball ? path.join(tmpLocation, `${name}@${version}`) : location; - - // If this is an NPM tarball then we extract it on the disk with pacote. - if (isNpmTarball) { - await pacote.extract( - ref.flags.includes("isGit") ? ref.gitUrl! : `${name}@${version}`, - dest, - { - ...NPM_TOKEN, - registry, - cache: `${os.homedir()}/.npm` - } - ); - } + locationOrManifest: string | ManifestManager, + ref: DependencyRef +): Promise { + const mama = await ManifestManager.fromPackageJSON( + locationOrManifest + ); + const tarex = new NpmTarball(mama); - // Read the package.json at the root of the directory or archive. - const [ - mama, + const { composition, - spdx - ] = await Promise.all([ - ManifestManager.fromPackageJSON(dest), - getTarballComposition(dest), - conformance.extractLicenses(dest) - ]); + conformance, + code + } = await tarex.scanFiles(); { const { description, engines, repository, scripts } = mama.document; @@ -113,59 +82,45 @@ export async function scanDirOrArchive( integrity: mama.isWorkspace ? null : mama.integrity }); } - ref.licenses = spdx.licenses; - ref.uniqueLicenseIds = spdx.uniqueLicenseIds; - // Get the composition of the (extracted) directory - if (composition.files.length === 1 && composition.files.includes("package.json")) { + if ( + composition.files.length === 1 && + composition.files.includes("package.json") + ) { ref.warnings.push(warnings.getEmptyPackageWarning()); } - // Search for minified and runtime dependencies - // Run a JS-X-Ray analysis on each JavaScript files of the project! - const scannedFiles = await sast.scanManyFiles(composition.files, dest, name); - - ref.warnings.push(...scannedFiles.flatMap((row) => row.warnings)); - if (/^0(\.\d+)*$/.test(version)) { - ref.warnings.push(warnings.getSemVerWarning(version)); + if (mama.hasZeroSemver) { + ref.warnings.push(warnings.getSemVerWarning(mama.document.version!)); } - - const dependencies = [...new Set(scannedFiles.flatMap((row) => row.dependencies))]; - const filesDependencies = [...new Set(scannedFiles.flatMap((row) => row.filesDependencies))]; - const tryDependencies = new Set(scannedFiles.flatMap((row) => row.tryDependencies)); - const minifiedFiles = scannedFiles.filter((row) => row.isMinified).flatMap((row) => row.file); - const hasExternalCapacity = scannedFiles.some((row) => row.filesFlags.hasExternalCapacity); + ref.warnings.push(...code.warnings); const { - nodeDependencies, - thirdPartyDependencies, - subpathImportsDependencies, - missingDependencies, - unusedDependencies, - flags - } = analyzeDependencies( + files, dependencies, - { mama, tryDependencies } - ); + flags + } = code.groupAndAnalyseDependencies(mama); + ref.licenses = conformance.licenses; + ref.uniqueLicenseIds = conformance.uniqueLicenseIds; ref.type = mama.moduleType; ref.size = composition.size; ref.composition.extensions.push(...composition.ext); ref.composition.files.push(...composition.files); - ref.composition.required_thirdparty = thirdPartyDependencies; - ref.composition.required_subpath = subpathImportsDependencies; - ref.composition.unused.push(...unusedDependencies); - ref.composition.missing.push(...missingDependencies); - ref.composition.required_files = filesDependencies; - ref.composition.required_nodejs = nodeDependencies; - ref.composition.minified = minifiedFiles; + ref.composition.required_thirdparty = dependencies.thirdparty; + ref.composition.required_subpath = dependencies.subpathImports; + ref.composition.unused.push(...dependencies.unused); + ref.composition.missing.push(...dependencies.missing); + ref.composition.required_files = [...files]; + ref.composition.required_nodejs = dependencies.nodejs; + ref.composition.minified = code.minified; ref.flags.push(...booleanToFlags({ ...flags, - hasExternalCapacity: hasExternalCapacity || flags.hasExternalCapacity, - hasNoLicense: spdx.uniqueLicenseIds.length === 0, - hasMultipleLicenses: spdx.uniqueLicenseIds.length > 1, - hasMinifiedCode: minifiedFiles.length > 0, + hasExternalCapacity: code.flags.hasExternalCapacity || flags.hasExternalCapacity, + hasNoLicense: conformance.uniqueLicenseIds.length === 0, + hasMultipleLicenses: conformance.uniqueLicenseIds.length > 1, + hasMinifiedCode: code.minified.length > 0, hasWarnings: ref.warnings.length > 0 && !ref.flags.includes("hasWarnings"), hasBannedFile: composition.files.some((path) => isSensitiveFile(path)), hasNativeCode: mama.flags.isNative || @@ -196,56 +151,58 @@ export interface ScannedPackageResult { } export async function scanPackage( - dest: string, - packageName?: string + manifestOrLocation: string | ManifestManager ): Promise { - const [ - mama, + const mama = await ManifestManager.fromPackageJSON( + manifestOrLocation + ); + const extractor = new NpmTarball(mama); + + const { composition, - spdx - ] = await Promise.all([ - ManifestManager.fromPackageJSON(dest), - getTarballComposition(dest), - conformance.extractLicenses(dest) - ]); - const { type = "script" } = mama.document; - - // Search for runtime dependencies - const dependencies: Record> = Object.create(null); - const minified: string[] = []; - const warnings: Warning[] = []; - - const JSFiles = composition.files - .filter((name) => kJsExtname.has(path.extname(name))); - for (const file of JSFiles) { - const result = await new AstAnalyser().analyseFile( - path.join(dest, file), - { - packageName: packageName ?? mama.document.name, - module: type === "module" - } - ); - - warnings.push( - ...result.warnings.map((curr) => Object.assign({}, curr, { file })) - ); - if (result.ok) { - dependencies[file] = Object.fromEntries(result.dependencies); - if (result.isMinified) { - minified.push(file); - } - } - } + conformance, + code + } = await extractor.scanFiles(); return { files: { list: composition.files, extensions: [...composition.ext], - minified + minified: code.minified }, directorySize: composition.size, - uniqueLicenseIds: spdx.uniqueLicenseIds, - licenses: spdx.licenses, - ast: { dependencies, warnings } + uniqueLicenseIds: conformance.uniqueLicenseIds, + licenses: conformance.licenses, + ast: { + dependencies: code.dependencies, + warnings: code.warnings + } }; } + +export interface TarballResolutionOptions { + spec: string; + registry?: string; +} + +export async function extractAndResolve( + location: string, + options: TarballResolutionOptions +): Promise { + const { spec, registry } = options; + + const tarballLocation = path.join(location, spec.replaceAll("/", "_")); + await pacote.extract( + spec, + tarballLocation, + { + ...kNpmToken, + registry, + cache: `${os.homedir()}/.npm` + } + ); + + return ManifestManager.fromPackageJSON( + tarballLocation + ); +} diff --git a/workspaces/tarball/test/SourceCodeReport.spec.ts b/workspaces/tarball/test/SourceCodeReport.spec.ts new file mode 100644 index 00000000..ab5c6543 --- /dev/null +++ b/workspaces/tarball/test/SourceCodeReport.spec.ts @@ -0,0 +1,155 @@ +// Import Node.js Dependencies +import path from "node:path"; +import { fileURLToPath } from "node:url"; +import { test } from "node:test"; +import assert from "node:assert"; + +// Import Internal Dependencies +import { SourceCodeScanner } from "../src/class/SourceCodeScanner.class.js"; + +// CONSTANTS +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const kFixturePath = path.join(__dirname, "fixtures", "scanJavascriptFile"); + +test("should detect all required dependencies (node, files, third-party)", async() => { + const thirdPartyDependencies = ["mocha", "yolo"]; + const mama = createFakeManifestManager(thirdPartyDependencies); + const scanner = new SourceCodeScanner(mama); + + const report = await scanner.iterate({ + manifest: [], + javascript: ["one.js"] + }); + assert.strictEqual(report.warnings.length, 0); + assert.strictEqual(report.minified.length, 0); + + const { files, dependencies, flags } = report.groupAndAnalyseDependencies(mama); + + assert.deepEqual( + normalize(files), + normalize([ + "src\\foo.js", + "home\\marco.js" + ]) + ); + assert.deepEqual(dependencies, { + nodejs: ["http"], + subpathImports: {}, + thirdparty: thirdPartyDependencies, + missing: [], + unused: [] + }); + assert.deepEqual(flags, { + hasExternalCapacity: true, + hasMissingOrUnusedDependency: false + }); +}); + +test("should detect and report Node.js dependencies and tag file as minified", async() => { + const mama = createFakeManifestManager(); + const scanner = new SourceCodeScanner(mama); + + const report = await scanner.iterate({ + manifest: [], + javascript: ["two.min.js"] + }); + assert.strictEqual(report.warnings.length, 0); + assert.strictEqual(report.minified.length, 1); + + const { + dependencies, + dependenciesInTryBlock, + flags + } = report.groupAndAnalyseDependencies(mama); + + assert.deepEqual(dependencies.nodejs, ["http", "fs"]); + assert.deepEqual(dependenciesInTryBlock, ["http"]); + assert.deepEqual(report.minified, ["two.min.js"]); + assert.ok(flags.hasExternalCapacity); +}); + +test("should report one required file and no minified file (because one-line requirement stmt)", async() => { + const mama = createFakeManifestManager(); + const scanner = new SourceCodeScanner(mama); + + const report = await scanner.iterate({ + manifest: [], + javascript: ["onelineStmt.min.js"] + }); + assert.strictEqual(report.warnings.length, 0); + assert.strictEqual(report.minified.length, 0); + + const { + files, + dependencies, + flags + } = report.groupAndAnalyseDependencies(mama); + + assert.deepEqual([...files], ["foobar.js"]); + assert.deepEqual(dependencies, { + nodejs: [], + subpathImports: {}, + thirdparty: [], + missing: [], + unused: [] + }); + assert.deepEqual(flags, { + hasExternalCapacity: false, + hasMissingOrUnusedDependency: false + }); +}); + +test("should catch the invalid syntax and report a ParsingError warning", async() => { + const mama = createFakeManifestManager(); + const scanner = new SourceCodeScanner(mama); + + const report = await scanner.iterate({ + manifest: [], + javascript: ["parsingError.js"] + }); + assert.strictEqual(report.warnings.length, 1); + assert.strictEqual(report.minified.length, 0); + + assert.deepEqual(report.warnings, [ + { + kind: "parsing-error", + value: "[1:4-1:5]: Unexpected token: ';'", + location: [[0, 0], [0, 0]], + file: "parsingError.js" + } + ]); +}); + +test("should detect the usage of global fetch and update hasExternalCapacity flag to true", async() => { + const mama = createFakeManifestManager(); + const scanner = new SourceCodeScanner(mama); + + const report = await scanner.iterate({ + manifest: [], + javascript: ["fetch.js"] + }); + assert.strictEqual(report.warnings.length, 0); + assert.strictEqual(report.minified.length, 0); + assert.ok(report.flags.hasExternalCapacity); +}); + +function normalize(values: Iterable): string[] { + return Array.from(values) + .map((value) => path.normalize(value)) + .sort(); +} + +function createFakeManifestManager( + dependencies: string[] = [], + devDependencies: string[] = [] +): any { + return { + location: kFixturePath, + dependencies, + devDependencies, + document: { + name: "fake-package", + type: "module" + } + }; +} diff --git a/workspaces/tarball/test/SourceCodeScanner.spec.ts b/workspaces/tarball/test/SourceCodeScanner.spec.ts new file mode 100644 index 00000000..09ceeb80 --- /dev/null +++ b/workspaces/tarball/test/SourceCodeScanner.spec.ts @@ -0,0 +1,137 @@ +// Import Node.js Dependencies +import path from "node:path"; +import { fileURLToPath } from "node:url"; +import assert from "node:assert"; +import { describe, test } from "node:test"; + +// Import Third-party Dependencies +import { + ManifestManager +} from "@nodesecure/mama"; +import type { ReportOnFile } from "@nodesecure/js-x-ray"; + +// Import Internal Dependencies +import { + SourceCodeScanner, + type SourceCodeAggregator +} from "../src/class/SourceCodeScanner.class.js"; + +// CONSTANTS +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const kFixturePath = path.join(__dirname, "fixtures", "scanPackage"); + +describe("SourceCodeScanner", () => { + test("iterate() should throw if we provide no files", async() => { + const mama = loadFixtureManifest("entryfiles"); + const scanner = new SourceCodeScanner(mama); + + await assert.rejects( + () => scanner.iterate({ manifest: [], javascript: [] }), + { message: "You must provide at least one file either in manifest or javascript" } + ); + }); + + test("iterate() should throw if we provide a manifest that doesn't exist and zero JavaScript files", async() => { + const mama = loadFixtureManifest("entryfiles"); + const scanner = new SourceCodeScanner(mama); + + await assert.rejects( + () => scanner.iterate({ + manifest: [ + "src/bar.js" + ], + javascript: [] + }), + { message: "You must provide at least one javascript source file" } + ); + }); + + test("iterate() should properly trace and report required files using one manifest entry file", async() => { + const mama = loadFixtureManifest("entryfiles"); + const aggregator = createAggregator(); + + const scanner = new SourceCodeScanner(mama, { + reportInitiator: () => aggregator + }); + await scanner.iterate({ + manifest: [ + "src/index.js" + ], + javascript: [] + }); + + const { reports } = aggregator; + + const files = reports + .map((report) => path.normalize(report.file)) + .sort(); + + assert.deepEqual( + files, + [ + "src\\index.js", + "src\\foo.js" + ].sort() + ); + }); + + test("iterate() should trace and report only provided JavaScript files", async() => { + const mama = loadFixtureManifest("caseone"); + const aggregator = createAggregator(); + + const scanner = new SourceCodeScanner(mama, { + reportInitiator: () => aggregator + }); + await scanner.iterate({ + manifest: [], + javascript: [ + "index.js", + "src/deps.js" + ] + }); + + const { reports } = aggregator; + + const files = reports + .map((report) => path.normalize(report.file)) + .sort(); + + assert.deepEqual( + files, + [ + "index.js", + "src\\deps.js" + ].sort() + ); + }); +}); + +function loadFixtureManifest( + location: string +) { + const mama = ManifestManager.fromPackageJSONSync( + path.join(kFixturePath, location) + ); + if (!ManifestManager.isLocated(mama)) { + throw new Error("manifest must be located"); + } + + return mama; +} + +type CustomAggregator = SourceCodeAggregator & { + reports: (ReportOnFile & { file: string; })[]; +}; + +function createAggregator( + consumed = true +): CustomAggregator { + return { + reports: [], + + consumed, + push(report) { + this.reports.push(report); + } + }; +} diff --git a/workspaces/tarball/test/fixtures/scanJavascriptFile/fetch.js b/workspaces/tarball/test/fixtures/scanJavascriptFile/fetch.js index d7372cb8..b1729455 100644 --- a/workspaces/tarball/test/fixtures/scanJavascriptFile/fetch.js +++ b/workspaces/tarball/test/fixtures/scanJavascriptFile/fetch.js @@ -1,3 +1,3 @@ -const apiService = async (url) => fetch(url); +const apiService = async (url) => fetch(url); -export default apiService; \ No newline at end of file +export default apiService; diff --git a/workspaces/tarball/test/fixtures/scanPackage/entryfiles/package.json b/workspaces/tarball/test/fixtures/scanPackage/entryfiles/package.json new file mode 100644 index 00000000..8d9b5fa9 --- /dev/null +++ b/workspaces/tarball/test/fixtures/scanPackage/entryfiles/package.json @@ -0,0 +1,4 @@ +{ + "name": "foobar", + "main": "./src/index.js" +} diff --git a/workspaces/tarball/test/fixtures/scanPackage/entryfiles/src/foo.js b/workspaces/tarball/test/fixtures/scanPackage/entryfiles/src/foo.js new file mode 100644 index 00000000..4f7907b3 --- /dev/null +++ b/workspaces/tarball/test/fixtures/scanPackage/entryfiles/src/foo.js @@ -0,0 +1 @@ +export const bar = "hello world"; diff --git a/workspaces/tarball/test/fixtures/scanPackage/entryfiles/src/index.js b/workspaces/tarball/test/fixtures/scanPackage/entryfiles/src/index.js new file mode 100644 index 00000000..1a25a2fd --- /dev/null +++ b/workspaces/tarball/test/fixtures/scanPackage/entryfiles/src/index.js @@ -0,0 +1,3 @@ +const { bar } = require("./foo.js"); + +console.log(bar); diff --git a/workspaces/tarball/test/sast/scanFile.spec.ts b/workspaces/tarball/test/sast/scanFile.spec.ts deleted file mode 100644 index a3885160..00000000 --- a/workspaces/tarball/test/sast/scanFile.spec.ts +++ /dev/null @@ -1,95 +0,0 @@ -// Import Node.js Dependencies -import path from "node:path"; -import { fileURLToPath } from "node:url"; -import { test } from "node:test"; -import assert from "node:assert"; - -// Import Internal Dependencies -import { scanFile } from "../../src/sast/index.js"; - -// CONSTANTS -const __dirname = path.dirname(fileURLToPath(import.meta.url)); -const kFixturePath = path.join(__dirname, "..", "fixtures", "scanJavascriptFile"); - -test("scanFile (fixture one.js)", async() => { - const result = await scanFile(kFixturePath, "one.js", "yolo"); - assert.deepEqual(result, { - file: "one.js", - warnings: [], - isMinified: false, - tryDependencies: [], - dependencies: ["http", "mocha"], - filesDependencies: ["src\\foo.js", "home\\marco.js"].map((location) => location.replaceAll("\\", path.sep)), - filesFlags: { - hasExternalCapacity: false - } - }); -}); - -test("scanFile (fixture two.min.js)", async() => { - const result = await scanFile(kFixturePath, "two.min.js", "yolo"); - assert.deepEqual(result, { - file: "two.min.js", - warnings: [], - isMinified: true, - tryDependencies: ["http"], - dependencies: ["http", "fs"], - filesDependencies: [], - filesFlags: { - hasExternalCapacity: false - } - }); -}); - -test("scanFile (fixture onelineStmt.min.js)", async() => { - const result = await scanFile(kFixturePath, "onelineStmt.min.js", "yolo"); - assert.deepEqual(result, { - file: "onelineStmt.min.js", - warnings: [], - isMinified: false, - tryDependencies: [], - dependencies: [], - filesDependencies: ["foobar.js"], - filesFlags: { - hasExternalCapacity: false - } - }); -}); - -test("scanFile (fixture parsingError.js)", async() => { - const result = await scanFile(kFixturePath, "parsingError.js", "yolo"); - - assert.deepEqual(result, { - file: "parsingError.js", - warnings: [ - { - kind: "parsing-error", - value: "[1:4-1:5]: Unexpected token: ';'", - location: [[0, 0], [0, 0]], - file: "parsingError.js" - } - ], - isMinified: false, - tryDependencies: [], - dependencies: [], - filesDependencies: [], - filesFlags: { - hasExternalCapacity: false - } - }); -}); - -test("scanFile (fixture fetch.js)", async() => { - const result = await scanFile(kFixturePath, "fetch.js", "yolo"); - assert.deepEqual(result, { - file: "fetch.js", - warnings: [], - isMinified: false, - tryDependencies: [], - dependencies: [], - filesDependencies: [], - filesFlags: { - hasExternalCapacity: true - } - }); -}); diff --git a/workspaces/tarball/test/tarball/scanPackage.spec.ts b/workspaces/tarball/test/tarball/scanPackage.spec.ts index 94164d1a..baae1684 100644 --- a/workspaces/tarball/test/tarball/scanPackage.spec.ts +++ b/workspaces/tarball/test/tarball/scanPackage.spec.ts @@ -57,11 +57,16 @@ test("scanPackage (caseone)", async() => { ]); assert.ok(result.ast.warnings.length === 0); - assert.deepEqual(Object.keys(result.ast.dependencies), [ - "index.js", - "src\\deps.js", - "src\\other.min.js" - ].map((location) => location.replace(/\\/g, path.sep))); + assert.deepEqual( + Object.keys(result.ast.dependencies).sort(), + [ + "index.js", + "src\\deps.js", + "src\\other.min.js" + ] + .map((location) => location.replace(/\\/g, path.sep)) + .sort() + ); assert.deepEqual(Object.keys(result.ast.dependencies["index.js"]), [ "./src/deps.js", "fs",