fix(linter/unicorn): fix ASI hazard in prefer-spread rule fixer (#16440)

camc314 · camc314 · commit 678e43b5c365 · 2025-12-09T11:12:36.000Z
🤖 generated with help from Claude Opus 4.5
diff --git a/crates/oxc_linter/src/ast_util.rs b/crates/oxc_linter/src/ast_util.rs
@@ -10,7 +10,10 @@ use oxc_ast::{
 use oxc_ecmascript::{ToBoolean, WithoutGlobalReferenceInformation};
 use oxc_semantic::{AstNode, AstNodes, IsGlobalReference, NodeId, ReferenceId, Semantic, SymbolId};
 use oxc_span::{GetSpan, Span};
-use oxc_syntax::operator::{AssignmentOperator, BinaryOperator, LogicalOperator, UnaryOperator};
+use oxc_syntax::{
+    identifier::is_irregular_whitespace,
+    operator::{AssignmentOperator, BinaryOperator, LogicalOperator, UnaryOperator},
+};
 
 use crate::{LintContext, utils::get_function_nearest_jsdoc_node};
 
@@ -975,3 +978,159 @@ pub fn is_node_within_call_argument<'a>(
     let arg_span = target_arg.span();
     node_span.start >= arg_span.start && node_span.end <= arg_span.end
 }
+
+/// Determines if a semicolon is needed before inserting code that starts with
+/// certain characters (`[`, `(`, `/`, `+`, `-`, `` ` ``) that could be misinterpreted
+/// due to Automatic Semicolon Insertion (ASI) rules.
+///
+/// Returns `true` if the node is at the start of an `ExpressionStatement` and the
+/// character before it could cause the replacement to be parsed as a continuation
+/// of the previous expression.
+pub fn could_be_asi_hazard(node: &AstNode, ctx: &LintContext) -> bool {
+    let node_span = node.span();
+
+    // Find the enclosing ExpressionStatement, bailing early for nodes that can't
+    // be at statement start position
+    let mut expr_stmt_span = None;
+    for ancestor in ctx.nodes().ancestors(node.id()) {
+        match ancestor.kind() {
+            AstKind::ExpressionStatement(expr_stmt) => {
+                expr_stmt_span = Some(expr_stmt.span);
+                break;
+            }
+            // Expression types that can have our node at their start position
+            AstKind::CallExpression(_)
+            | AstKind::ComputedMemberExpression(_)
+            | AstKind::StaticMemberExpression(_)
+            | AstKind::PrivateFieldExpression(_)
+            | AstKind::ChainExpression(_)
+            | AstKind::TaggedTemplateExpression(_)
+            | AstKind::SequenceExpression(_)
+            | AstKind::AssignmentExpression(_)
+            | AstKind::LogicalExpression(_)
+            | AstKind::BinaryExpression(_)
+            | AstKind::ConditionalExpression(_)
+            | AstKind::AwaitExpression(_)
+            | AstKind::ParenthesizedExpression(_)
+            | AstKind::TSAsExpression(_)
+            | AstKind::TSSatisfiesExpression(_)
+            | AstKind::TSNonNullExpression(_)
+            | AstKind::TSTypeAssertion(_)
+            | AstKind::TSInstantiationExpression(_) => {}
+            _ => return false,
+        }
+    }
+
+    let Some(expr_stmt_span) = expr_stmt_span else {
+        return false;
+    };
+
+    // Node must be at the start of the statement for ASI hazard to apply
+    if node_span.start != expr_stmt_span.start {
+        return false;
+    }
+
+    if expr_stmt_span.start == 0 {
+        return false;
+    }
+
+    let source_text = ctx.source_text();
+    let last_char = find_last_meaningful_char(source_text, expr_stmt_span.start, ctx);
+
+    let Some(last_char) = last_char else {
+        return false;
+    };
+
+    // Characters that could cause ASI issues when followed by `[`, `(`, `/`, etc.
+    matches!(last_char, ')' | ']' | '}' | '"' | '\'' | '`' | '+' | '-' | '/' | '.')
+        || last_char.is_alphanumeric()
+        || last_char == '_'
+        || last_char == '$'
+}
+
+#[inline]
+#[expect(clippy::cast_possible_wrap)]
+fn is_utf8_char_boundary(b: u8) -> bool {
+    (b as i8) >= -0x40
+}
+
+/// Find the last meaningful (non-whitespace, non-comment) character before `end_pos`.
+fn find_last_meaningful_char(source_text: &str, end_pos: u32, ctx: &LintContext) -> Option<char> {
+    let bytes = source_text.as_bytes();
+    let comments = ctx.semantic().comments();
+
+    let mut comment_idx = comments.partition_point(|c| c.span.start < end_pos);
+    let mut current_comment_end: u32 = 0;
+    let mut i = end_pos;
+
+    // Handle case where end_pos is inside a comment
+    if comment_idx > 0 {
+        let prev_comment = &comments[comment_idx - 1];
+        if end_pos <= prev_comment.span.end {
+            i = prev_comment.span.start;
+            comment_idx -= 1;
+            if comment_idx > 0 {
+                current_comment_end = comments[comment_idx - 1].span.end;
+            }
+        }
+    }
+
+    while i > 0 {
+        if i <= current_comment_end && comment_idx > 0 {
+            comment_idx -= 1;
+            current_comment_end = comments[comment_idx].span.start;
+            i = current_comment_end;
+            continue;
+        }
+
+        i -= 1;
+
+        let byte = bytes[i as usize];
+
+        if byte.is_ascii_whitespace() {
+            continue;
+        }
+
+        // Check if we're entering a comment from the end
+        if comment_idx > 0 {
+            let comment = &comments[comment_idx - 1];
+            if i >= comment.span.start && i < comment.span.end {
+                i = comment.span.start;
+                comment_idx -= 1;
+                if comment_idx > 0 {
+                    current_comment_end = comments[comment_idx - 1].span.end;
+                }
+                continue;
+            }
+        }
+
+        if byte.is_ascii() {
+            return Some(byte as char);
+        }
+
+        // Multi-byte UTF-8: find the start byte (max 4 bytes per char)
+        let i_usize = i as usize;
+        let char_start = if is_utf8_char_boundary(bytes[i_usize.saturating_sub(1)]) {
+            i_usize - 1
+        } else if is_utf8_char_boundary(bytes[i_usize.saturating_sub(2)]) {
+            i_usize - 2
+        } else {
+            i_usize - 3
+        };
+
+        let c = source_text[char_start..].chars().next().unwrap();
+
+        // Skip irregular whitespace (NBSP, ZWNBSP, etc.)
+        if is_irregular_whitespace(c) {
+            #[expect(clippy::cast_possible_truncation)]
+            {
+                i = char_start as u32;
+            }
+            continue;
+        }
+
+        return Some(c);
+    }
+
+    None
+}
diff --git a/crates/oxc_linter/src/rules/unicorn/prefer_spread.rs b/crates/oxc_linter/src/rules/unicorn/prefer_spread.rs
@@ -52,11 +52,15 @@ impl Rule for PreferSpread {
             return;
         };
 
-        check_unicorn_prefer_spread(call_expr, ctx);
+        check_unicorn_prefer_spread(node, call_expr, ctx);
     }
 }
 
-fn check_unicorn_prefer_spread(call_expr: &CallExpression, ctx: &LintContext) {
+fn check_unicorn_prefer_spread<'a>(
+    node: &AstNode<'a>,
+    call_expr: &CallExpression<'a>,
+    ctx: &LintContext<'a>,
+) {
     let Some(member_expr) = call_expr.callee.without_parentheses().as_member_expression() else {
         return;
     };
@@ -87,7 +91,7 @@ fn check_unicorn_prefer_spread(call_expr: &CallExpression, ctx: &LintContext) {
                 return;
             }
 
-            report_with_spread_fixer(ctx, call_expr.span, "Array.from()", expr);
+            report_with_spread_fixer(node, ctx, call_expr.span, "Array.from()", expr);
         }
         // `array.concat()`
         "concat" => {
@@ -131,7 +135,7 @@ fn check_unicorn_prefer_spread(call_expr: &CallExpression, ctx: &LintContext) {
                 }
             }
 
-            report_with_spread_fixer(ctx, call_expr.span, "array.slice()", member_expr_obj);
+            report_with_spread_fixer(node, ctx, call_expr.span, "array.slice()", member_expr_obj);
         }
         // `array.toSpliced()`
         "toSpliced" => {
@@ -145,6 +149,7 @@ fn check_unicorn_prefer_spread(call_expr: &CallExpression, ctx: &LintContext) {
             }
 
             report_with_spread_fixer(
+                node,
                 ctx,
                 call_expr.span,
                 "array.toSpliced()",
@@ -171,10 +176,15 @@ fn check_unicorn_prefer_spread(call_expr: &CallExpression, ctx: &LintContext) {
             ctx.diagnostic_with_fix(
                 unicorn_prefer_spread_diagnostic(call_expr.span, "string.split()"),
                 |fixer| {
+                    let needs_semi = ast_util::could_be_asi_hazard(node, ctx);
                     let callee_obj = member_expr.object().without_parentheses();
+                    let prefix = if needs_semi { ";" } else { "" };
                     fixer.replace(
                         call_expr.span,
-                        format!("[...{}]", callee_obj.span().source_text(ctx.source_text())),
+                        format!(
+                            "{prefix}[...{}]",
+                            callee_obj.span().source_text(ctx.source_text())
+                        ),
                     )
                 },
             );
@@ -241,13 +251,18 @@ fn is_not_array(expr: &Expression, ctx: &LintContext) -> bool {
 }
 
 fn report_with_spread_fixer(
+    node: &AstNode,
     ctx: &LintContext,
     span: Span,
     bad_method: &str,
     expr_to_spread: &Expression,
 ) {
     ctx.diagnostic_with_fix(unicorn_prefer_spread_diagnostic(span, bad_method), |fixer| {
+        let needs_semi = ast_util::could_be_asi_hazard(node, ctx);
         let mut codegen = fixer.codegen();
+        if needs_semi {
+            codegen.print_str(";");
+        }
         codegen.print_str("[...");
         codegen.print_expression(expr_to_spread);
         codegen.print_str("]");
@@ -542,22 +557,118 @@ fn test() {
         // `Array.from()`
         ("const x = Array.from(set);", "const x = [...set];", None),
         ("Array.from(new Set([1, 2])).map(() => {});", "[...new Set([1, 2])].map(() => {});", None),
+        // `Array.from()` - ASI hazard cases (need semicolon prefix)
+        (
+            "const foo = bar\nArray.from(set).map(() => {})",
+            "const foo = bar\n;[...set].map(() => {})",
+            None,
+        ),
+        (
+            "foo()\nArray.from(set).forEach(doSomething)",
+            "foo()\n;[...set].forEach(doSomething)",
+            None,
+        ),
+        // `Array.from()` - No ASI hazard (semicolon already present)
+        (
+            "const foo = bar;\nArray.from(set).map(() => {})",
+            "const foo = bar;\n[...set].map(() => {})",
+            None,
+        ),
+        // `Array.from()` - ASI hazard with comments before
+        (
+            "foo() /* comment */\nArray.from(set).map(() => {})",
+            "foo() /* comment */\n;[...set].map(() => {})",
+            None,
+        ),
+        (
+            "foo() // comment\nArray.from(set).map(() => {})",
+            "foo() // comment\n;[...set].map(() => {})",
+            None,
+        ),
         // `array.slice()`
         ("array.slice()", "[...array]", None),
         ("array.slice(1).slice()", "[...array.slice(1)]", None),
+        // `array.slice()` - ASI hazard cases
+        ("foo()\narray.slice()", "foo()\n;[...array]", None),
         // `array.toSpliced()`
         ("array.toSpliced()", "[...array]", None),
         ("const copy = array.toSpliced()", "const copy = [...array]", None),
-        // ("", "", None),
-        // ("", "", None),
+        // `array.toSpliced()` - ASI hazard cases
+        ("foo()\narray.toSpliced()", "foo()\n;[...array]", None),
         // `string.split()`
         (r#""🦄".split("")"#, r#"[..."🦄"]"#, None),
         (r#""foo bar baz".split("")"#, r#"[..."foo bar baz"]"#, None),
+        // `string.split()` - ASI hazard cases
+        ("foo()\nstr.split(\"\")", "foo()\n;[...str]", None),
         (
             r"Array.from(path.matchAll(/\{([^{}?]+\??)\}/g))",
             "[...path.matchAll(/\\{([^{}?]+\\??)\\}/g)]",
             None,
         ),
+        // Cases where NO semicolon should be added (not an ExpressionStatement)
+        ("return Array.from(set)", "return [...set]", None),
+        ("const x = Array.from(set)", "const x = [...set]", None),
+        ("foo(Array.from(set))", "foo([...set])", None),
+        ("if (Array.from(set).length) {}", "if ([...set].length) {}", None),
+        // `Array.from()` - ASI hazard with multi-byte Unicode identifiers
+        ("日本語\nArray.from(set).map(() => {})", "日本語\n;[...set].map(() => {})", None),
+        (
+            "const foo = 日本語\nArray.from(set).map(() => {})",
+            "const foo = 日本語\n;[...set].map(() => {})",
+            None,
+        ),
+        ("/**/Array.from(set).map(() => {})", "/**/[...set].map(() => {})", None),
+        ("/regex/\nArray.from(set).map(() => {})", "/regex/\n;[...set].map(() => {})", None),
+        ("/regex/g\nArray.from(set).map(() => {})", "/regex/g\n;[...set].map(() => {})", None),
+        ("0.\nArray.from(set).map(() => {})", "0.\n;[...set].map(() => {})", None),
+        (
+            "foo()\u{00A0}\nArray.from(set).map(() => {})",
+            "foo()\u{00A0}\n;[...set].map(() => {})",
+            None,
+        ),
+        (
+            "foo()\u{FEFF}\nArray.from(set).map(() => {})",
+            "foo()\u{FEFF}\n;[...set].map(() => {})",
+            None,
+        ),
+        ("foo() /* a */ /* b */\nArray.from(set)", "foo() /* a */ /* b */\n;[...set]", None),
+        ("x++\narray.slice()", "x++\n;[...array]", None),
+        ("x--\narray.slice()", "x--\n;[...array]", None),
+        ("arr[0]\narray.slice()", "arr[0]\n;[...array]", None),
+        ("obj.prop\narray.slice()", "obj.prop\n;[...array]", None),
+        ("while (array.slice().length) {}", "while ([...array].length) {}", None),
+        ("do {} while (array.slice().length)", "do {} while ([...array].length)", None),
+        ("for (array.slice();;) {}", "for ([...array];;) {}", None),
+        ("switch (array.slice()[0]) {}", "switch ([...array][0]) {}", None),
+        ("`template`\narray.toSpliced()", "`template`\n;[...array]", None),
+        (
+            r#"'string'
+str.split("")"#,
+            "'string'\n;[...str]",
+            None,
+        ),
+        (
+            r#""string"
+str.split("")"#,
+            r#""string"
+;[...str]"#,
+            None,
+        ),
+        (
+            "foo()\nArray.from(set).map(x => x).filter(Boolean).length",
+            "foo()\n;[...set].map(x => x).filter(Boolean).length",
+            None,
+        ),
+        ("const fn = () => Array.from(set)", "const fn = () => [...set]", None),
+        ("foo ? Array.from(a) : b", "foo ? [...a] : b", None),
+        ("foo || Array.from(set)", "foo || [...set]", None),
+        ("foo && Array.from(set)", "foo && [...set]", None),
+        ("foo + Array.from(set).length", "foo + [...set].length", None),
+        ("x = Array.from(set)", "x = [...set]", None),
+        ("const obj = { arr: Array.from(set) }", "const obj = { arr: [...set] }", None),
+        ("(foo, Array.from(set))", "(foo, [...set])", None),
+        ("[Array.from(set)]", "[[...set]]", None),
+        ("async () => await Array.from(set)", "async () => await [...set]", None),
     ];
 
     Tester::new(PreferSpread::NAME, PreferSpread::PLUGIN, pass, fail)