From 41ff750f0fa87a10f0f2b105132fdb485fcae3d9 Mon Sep 17 00:00:00 2001 From: Asger F Date: Thu, 13 Nov 2025 10:50:27 +0100 Subject: [PATCH 1/8] JS: Skip minified file if avg line length > 200 --- .../semmle/js/extractor/FileExtractor.java | 13 ++++++---- .../semmle/js/extractor/ParseResultInfo.java | 21 ++++++++++++++++ .../semmle/js/extractor/ScriptExtractor.java | 24 +++++++++++++++++++ 3 files changed, 54 insertions(+), 4 deletions(-) diff --git a/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java index 5ebd7374a771..42b4aa83c342 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java @@ -549,10 +549,15 @@ private ParseResultInfo extractContents( new TextualExtractor( trapwriter, locationManager, source, config.getExtractLines(), metrics, extractedFile); ParseResultInfo loc = extractor.extract(textualExtractor); - int numLines = textualExtractor.isSnippet() ? 0 : textualExtractor.getNumLines(); - int linesOfCode = loc.getLinesOfCode(), linesOfComments = loc.getLinesOfComments(); - trapwriter.addTuple("numlines", fileLabel, numLines, linesOfCode, linesOfComments); - trapwriter.addTuple("filetype", fileLabel, fileType.toString()); + if (loc.getSkipReason() != null) { + System.err.println("Skipping file " + extractedFile + ": " + loc.getSkipReason()); + System.err.flush(); + } else{ + int numLines = textualExtractor.isSnippet() ? 0 : textualExtractor.getNumLines(); + int linesOfCode = loc.getLinesOfCode(), linesOfComments = loc.getLinesOfComments(); + trapwriter.addTuple("numlines", fileLabel, numLines, linesOfCode, linesOfComments); + trapwriter.addTuple("filetype", fileLabel, fileType.toString()); + } metrics.stopPhase(ExtractionPhase.FileExtractor_extractContents); metrics.writeTimingsToTrap(trapwriter); successful = true; diff --git a/javascript/extractor/src/com/semmle/js/extractor/ParseResultInfo.java b/javascript/extractor/src/com/semmle/js/extractor/ParseResultInfo.java index 6a1b14447ce3..28b412207d42 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/ParseResultInfo.java +++ b/javascript/extractor/src/com/semmle/js/extractor/ParseResultInfo.java @@ -10,6 +10,7 @@ public class ParseResultInfo { private int linesOfCode, linesOfComments; private List parseErrors; + private String skipReason; public ParseResultInfo(int linesOfCode, int linesOfComments, List parseErrors) { this.linesOfCode = linesOfCode; @@ -17,6 +18,19 @@ public ParseResultInfo(int linesOfCode, int linesOfComments, List pa this.parseErrors = new ArrayList<>(parseErrors); } + private ParseResultInfo() { + this.linesOfCode = 0; + this.linesOfComments = 0; + this.parseErrors = new ArrayList<>(); + this.skipReason = null; + } + + public static final ParseResultInfo skipped(String reason) { + ParseResultInfo info = new ParseResultInfo(); + info.skipReason = reason; + return info; + } + public void add(ParseResultInfo that) { this.linesOfCode += that.linesOfCode; this.linesOfComments += that.linesOfComments; @@ -41,4 +55,11 @@ public int getLinesOfComments() { public List getParseErrors() { return parseErrors; } + + /** + * If extraction of this file was skipped, gets the reason for skipping it. + */ + public String getSkipReason() { + return skipReason; + } } diff --git a/javascript/extractor/src/com/semmle/js/extractor/ScriptExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/ScriptExtractor.java index 7c539d70e636..6c9bfd2725c0 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/ScriptExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/ScriptExtractor.java @@ -38,10 +38,34 @@ private boolean isAlwaysCommonJSModule(String extension, String packageType) { return extension.equals(".cjs") || (extension.equals(".js") && "commonjs".equals(packageType)); } + private boolean isMinified(String source) { + // If the average line length is over 200 characters, consider the file minified. + int numberOfLineBreaks = 0; + for (int i = 0; i < source.length(); i++) { + char c = source.charAt(i); + if (c == '\n') { + numberOfLineBreaks++; + } else if (c == '\r') { + numberOfLineBreaks++; + if (i + 1 < source.length() && source.charAt(i + 1) == '\n') { + i++; // skip the next \n in case of \r\n + } + } + } + int averageLineLength = + numberOfLineBreaks == 0 ? source.length() : source.length() / numberOfLineBreaks; + return averageLineLength > 200; + } + @Override public ParseResultInfo extract(TextualExtractor textualExtractor) { LocationManager locationManager = textualExtractor.getLocationManager(); String source = textualExtractor.getSource(); + + if (isMinified(source)) { + return ParseResultInfo.skipped("File appears to be minified."); + } + String shebangLine = null, shebangLineTerm = null; if (source.startsWith("#!")) { From 17351145b997fb09d7c826ded04fdfe798f3476e Mon Sep 17 00:00:00 2001 From: Asger F Date: Tue, 2 Dec 2025 11:38:51 +0100 Subject: [PATCH 2/8] JS: Accept test change due to file no longer being extracted --- .../query-tests/filters/ClassifyFiles/ClassifyFiles.expected | 1 - 1 file changed, 1 deletion(-) diff --git a/javascript/ql/test/query-tests/filters/ClassifyFiles/ClassifyFiles.expected b/javascript/ql/test/query-tests/filters/ClassifyFiles/ClassifyFiles.expected index 5b896e958c54..377a0e1ad29a 100644 --- a/javascript/ql/test/query-tests/filters/ClassifyFiles/ClassifyFiles.expected +++ b/javascript/ql/test/query-tests/filters/ClassifyFiles/ClassifyFiles.expected @@ -17,7 +17,6 @@ | jquery-datatables.js:0:0:0:0 | jquery-datatables.js | library | | jquery-jstree.js:0:0:0:0 | jquery-jstree.js | library | | jquery-snippet.js:0:0:0:0 | jquery-snippet.js | library | -| json-like.js:0:0:0:0 | json-like.js | generated | | jsx-old.js:0:0:0:0 | jsx-old.js | generated | | jsx.js:0:0:0:0 | jsx.js | generated | | multi-part-bundle.html:0:0:0:0 | multi-part-bundle.html | generated | From 38eef8abf5c0fc45c48ac03e2232ac3e3cd0be2b Mon Sep 17 00:00:00 2001 From: Asger F Date: Tue, 2 Dec 2025 11:42:14 +0100 Subject: [PATCH 3/8] JS: Make sure a file is not seen as minified --- .../Security/CWE-400/ReDoS/regexplib/dates.js | 100 ++++++++++++++++++ 1 file changed, 100 insertions(+) diff --git a/javascript/ql/test/query-tests/Security/CWE-400/ReDoS/regexplib/dates.js b/javascript/ql/test/query-tests/Security/CWE-400/ReDoS/regexplib/dates.js index 14468a515657..dc172e06b184 100644 --- a/javascript/ql/test/query-tests/Security/CWE-400/ReDoS/regexplib/dates.js +++ b/javascript/ql/test/query-tests/Security/CWE-400/ReDoS/regexplib/dates.js @@ -132,3 +132,103 @@ /^(([0-9])|([0-1][0-9])|([2][0-3])):?([0-5][0-9])$/g; /^[\w-\.]+@([\w-]+\.)+[\w-]{2,3}$/g; /(((0[1-9]|[12][0-9]|3[01])([/])(0[13578]|10|12)([/])(\d{4}))|(([0][1-9]|[12][0-9]|30)([/])(0[469]|11)([/])(\d{4}))|((0[1-9]|1[0-9]|2[0-8])([/])(02)([/])(\d{4}))|((29)(\.|-|\/)(02)([/])([02468][048]00))|((29)([/])(02)([/])([13579][26]00))|((29)([/])(02)([/])([0-9][0-9][0][48]))|((29)([/])(02)([/])([0-9][0-9][2468][048]))|((29)([/])(02)([/])([0-9][0-9][13579][26])))/g; +// +// Add some empty lines to lower the average line length so the file is not classified as minified. +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// From e9de73b54dc78fb1614591515baa77d8d165efa6 Mon Sep 17 00:00:00 2001 From: Asger F Date: Fri, 5 Dec 2025 10:22:31 +0100 Subject: [PATCH 4/8] JS: Add environment variable to opt out of the behaviour if needed --- .../src/com/semmle/js/extractor/AutoBuild.java | 7 +++++-- .../js/extractor/EnvironmentVariables.java | 8 ++++++++ .../semmle/js/extractor/ExtractorConfig.java | 17 +++++++++++++++++ .../semmle/js/extractor/ScriptExtractor.java | 2 +- 4 files changed, 31 insertions(+), 3 deletions(-) diff --git a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java index 416fa237e97e..d88897819211 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java +++ b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java @@ -408,8 +408,10 @@ private void setupFilters() { for (String extension : fileTypes.keySet()) patterns.add("**/*" + extension); // exclude files whose name strongly suggests they are minified - patterns.add("-**/*.min.js"); - patterns.add("-**/*-min.js"); + if (!EnvironmentVariables.allowMinifiedFiles()) { + patterns.add("-**/*.min.js"); + patterns.add("-**/*-min.js"); + } // exclude `node_modules` and `bower_components` patterns.add("-**/node_modules"); @@ -1074,6 +1076,7 @@ private ExtractorConfig mkExtractorConfig() { config = config.withSourceType(getSourceType()); config = config.withVirtualSourceRoot(virtualSourceRoot); if (defaultEncoding != null) config = config.withDefaultEncoding(defaultEncoding); + config = config.withAllowMinified(EnvironmentVariables.allowMinifiedFiles()); return config; } diff --git a/javascript/extractor/src/com/semmle/js/extractor/EnvironmentVariables.java b/javascript/extractor/src/com/semmle/js/extractor/EnvironmentVariables.java index f2ac4227589f..9d883960256c 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/EnvironmentVariables.java +++ b/javascript/extractor/src/com/semmle/js/extractor/EnvironmentVariables.java @@ -101,4 +101,12 @@ public static String getWipDatabase() { public static boolean isActionsExtractor() { return Env.systemEnv().getNonEmpty(CODEQL_EXTRACTOR_ACTIONS_WIP_DATABASE_ENV_VAR) != null; } + + public static boolean allowMinifiedFiles() { + String env = Env.systemEnv().getNonEmpty("CODEQL_EXTRACTOR_JAVASCRIPT_ALLOW_MINIFIED_FILES"); + if (env == null) { + return false; // default is to not allow minified files + } + return Boolean.parseBoolean(env); + } } diff --git a/javascript/extractor/src/com/semmle/js/extractor/ExtractorConfig.java b/javascript/extractor/src/com/semmle/js/extractor/ExtractorConfig.java index 884d07446941..538ac1a43679 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/ExtractorConfig.java +++ b/javascript/extractor/src/com/semmle/js/extractor/ExtractorConfig.java @@ -205,6 +205,9 @@ public Set getPredefinedGlobals() { /** Should parse errors be reported as violations instead of aborting extraction? */ private boolean tolerateParseErrors; + /** Should minified files be allowed? */ + private boolean allowMinified; + /** How should HTML files be extracted? */ private HtmlPopulator.Config htmlHandling; @@ -236,6 +239,7 @@ public ExtractorConfig(boolean experimental) { this.sourceType = SourceType.AUTO; this.htmlHandling = HtmlPopulator.Config.ELEMENTS; this.tolerateParseErrors = true; + this.allowMinified = false; if (experimental) { this.mozExtensions = true; this.jscript = true; @@ -258,6 +262,7 @@ public ExtractorConfig(ExtractorConfig that) { this.v8Extensions = that.v8Extensions; this.e4x = that.e4x; this.tolerateParseErrors = that.tolerateParseErrors; + this.allowMinified = that.allowMinified; this.fileType = that.fileType; this.sourceType = that.sourceType; this.htmlHandling = that.htmlHandling; @@ -357,6 +362,16 @@ public ExtractorConfig withTolerateParseErrors(boolean tolerateParseErrors) { return res; } + public boolean isAllowMinified() { + return allowMinified; + } + + public ExtractorConfig withAllowMinified(boolean allowMinified) { + ExtractorConfig res = new ExtractorConfig(this); + res.allowMinified = allowMinified; + return res; + } + public boolean hasFileType() { return fileType != null; } @@ -467,6 +482,8 @@ public String toString() { + e4x + ", tolerateParseErrors=" + tolerateParseErrors + + ", allowMinified=" + + allowMinified + ", htmlHandling=" + htmlHandling + ", fileType=" diff --git a/javascript/extractor/src/com/semmle/js/extractor/ScriptExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/ScriptExtractor.java index 6c9bfd2725c0..bff9ccddad67 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/ScriptExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/ScriptExtractor.java @@ -62,7 +62,7 @@ public ParseResultInfo extract(TextualExtractor textualExtractor) { LocationManager locationManager = textualExtractor.getLocationManager(); String source = textualExtractor.getSource(); - if (isMinified(source)) { + if (!config.isAllowMinified() && isMinified(source)) { return ParseResultInfo.skipped("File appears to be minified."); } From 9ad80a36a1025b24edd0540106bc974c206b1177 Mon Sep 17 00:00:00 2001 From: Asger F Date: Fri, 5 Dec 2025 10:24:14 +0100 Subject: [PATCH 5/8] JS: Change note --- .../ql/src/change-notes/2025-12-05-skip-minified-files.md | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 javascript/ql/src/change-notes/2025-12-05-skip-minified-files.md diff --git a/javascript/ql/src/change-notes/2025-12-05-skip-minified-files.md b/javascript/ql/src/change-notes/2025-12-05-skip-minified-files.md new file mode 100644 index 000000000000..ce202b358669 --- /dev/null +++ b/javascript/ql/src/change-notes/2025-12-05-skip-minified-files.md @@ -0,0 +1,6 @@ +--- +category: majorAnalysis +--- +* JavaScript files with an average line length greater than 200 are now considered minified and will no longer be analyzed. + For use-cases where minified files should be analyzed, the orginal behaviour can be restored by setting the environment variable + `CODEQL_EXTRACTOR_JAVASCRIPT_ALLOW_MINIFIED_FILES=true`. From 94edeeb763adacbeede5d3a2619f4b980b305429 Mon Sep 17 00:00:00 2001 From: Asger F Date: Fri, 5 Dec 2025 11:14:59 +0100 Subject: [PATCH 6/8] Update javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .../extractor/src/com/semmle/js/extractor/FileExtractor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java index 42b4aa83c342..9cf5c3b295c5 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java @@ -552,7 +552,7 @@ private ParseResultInfo extractContents( if (loc.getSkipReason() != null) { System.err.println("Skipping file " + extractedFile + ": " + loc.getSkipReason()); System.err.flush(); - } else{ + } else { int numLines = textualExtractor.isSnippet() ? 0 : textualExtractor.getNumLines(); int linesOfCode = loc.getLinesOfCode(), linesOfComments = loc.getLinesOfComments(); trapwriter.addTuple("numlines", fileLabel, numLines, linesOfCode, linesOfComments); From c53b3474bcb9a08d3819589de2f78857b7cbab81 Mon Sep 17 00:00:00 2001 From: Asger F Date: Fri, 5 Dec 2025 11:15:05 +0100 Subject: [PATCH 7/8] Update javascript/ql/src/change-notes/2025-12-05-skip-minified-files.md Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .../ql/src/change-notes/2025-12-05-skip-minified-files.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/javascript/ql/src/change-notes/2025-12-05-skip-minified-files.md b/javascript/ql/src/change-notes/2025-12-05-skip-minified-files.md index ce202b358669..140f6eb08136 100644 --- a/javascript/ql/src/change-notes/2025-12-05-skip-minified-files.md +++ b/javascript/ql/src/change-notes/2025-12-05-skip-minified-files.md @@ -2,5 +2,5 @@ category: majorAnalysis --- * JavaScript files with an average line length greater than 200 are now considered minified and will no longer be analyzed. - For use-cases where minified files should be analyzed, the orginal behaviour can be restored by setting the environment variable + For use-cases where minified files should be analyzed, the original behaviour can be restored by setting the environment variable `CODEQL_EXTRACTOR_JAVASCRIPT_ALLOW_MINIFIED_FILES=true`. From 1a35148b7b9161e971570c51bf3f7133d8b6f25d Mon Sep 17 00:00:00 2001 From: Asger F Date: Fri, 5 Dec 2025 15:59:11 +0100 Subject: [PATCH 8/8] Update javascript/ql/src/change-notes/2025-12-05-skip-minified-files.md Co-authored-by: Taus --- .../ql/src/change-notes/2025-12-05-skip-minified-files.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/javascript/ql/src/change-notes/2025-12-05-skip-minified-files.md b/javascript/ql/src/change-notes/2025-12-05-skip-minified-files.md index 140f6eb08136..6d5309fa5c7a 100644 --- a/javascript/ql/src/change-notes/2025-12-05-skip-minified-files.md +++ b/javascript/ql/src/change-notes/2025-12-05-skip-minified-files.md @@ -2,5 +2,5 @@ category: majorAnalysis --- * JavaScript files with an average line length greater than 200 are now considered minified and will no longer be analyzed. - For use-cases where minified files should be analyzed, the original behaviour can be restored by setting the environment variable + For use-cases where minified files should be analyzed, the original behavior can be restored by setting the environment variable `CODEQL_EXTRACTOR_JAVASCRIPT_ALLOW_MINIFIED_FILES=true`.