From 7cff90920124a01a1e90690ce2651844a835aef3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Gro=C3=9F?= Date: Tue, 13 Jan 2026 22:29:22 +0100 Subject: [PATCH 1/4] . --- Rules/Languages/sv/ClearSpeak_Rules.yaml | 15 ++++ Rules/Languages/sv/SharedRules/calculus.yaml | 28 ++++-- Rules/Languages/sv/SharedRules/general.yaml | 95 ++++++++++++++++++++ Rules/Languages/sv/SharedRules/geometry.yaml | 16 ++++ Rules/Languages/sv/SimpleSpeak_Rules.yaml | 26 ++++++ 5 files changed, 174 insertions(+), 6 deletions(-) diff --git a/Rules/Languages/sv/ClearSpeak_Rules.yaml b/Rules/Languages/sv/ClearSpeak_Rules.yaml index 12c90c02..bbec4476 100644 --- a/Rules/Languages/sv/ClearSpeak_Rules.yaml +++ b/Rules/Languages/sv/ClearSpeak_Rules.yaml @@ -1,4 +1,19 @@ --- +- name: pause + tag: "!*" + match: "not(self::m:math) and not($MatchingPause) and @data-intent-property[contains(., ':pause')]" + replace: + - with: + variables: [MatchingPause: "true()"] + replace: + - test: + - if: "contains(@data-intent-property, ':pause-long')" + then: [pause: long] + - else_if: "contains(@data-intent-property, ':pause-short')" + then: [pause: short] + else: [pause: medium] + - x: "." + - name: intent-literal-silent tag: [mi, mo, mn] match: "contains(@data-intent-property, ':silent:')" diff --git a/Rules/Languages/sv/SharedRules/calculus.yaml b/Rules/Languages/sv/SharedRules/calculus.yaml index 20fcd91c..4e6a9e9c 100644 --- a/Rules/Languages/sv/SharedRules/calculus.yaml +++ b/Rules/Languages/sv/SharedRules/calculus.yaml @@ -1,14 +1,30 @@ --- +- name: laplacian + tag: laplacian + match: "count(*) <= 1" # can be on ∇^2 or on enclosing mrow + replace: + - T: "Laplaceoperatorn" # phrase('laplacian' of x) + - test: + if: "count(*) = 1" + then: + - test: + if: "$Verbosity!='Terse'" + then: [T: "av"] # phrase(function 'of' one variable) + - test: + if: "not(IsNode(*[1], 'leaf'))" + then: [pause: short] + - x: "*[1]" + - name: divergence tag: divergence match: "." replace: - test: if: "$Verbosity='Verbose'" - then: [t: "divergence"] # phrase('divergence' from the mean) - else: [t: "div"] # phrase('divergence' from the mean) - - t: "of" # phrase(systems 'of' linear equations) + then: [T: "divergens"] # phrase('divergence' from the mean) + else: [T: "div"] # phrase('divergence' from the mean) + - T: "av" # phrase(systems 'of' linear equations) - test: if: "not(IsNode(*[1], 'leaf'))" then: [pause: short] @@ -18,7 +34,7 @@ tag: curl match: "." replace: - - t: "curl of" # phrase(the 'curl of' a field) + - T: "rotationen av" # phrase(the 'curl of' a field) - test: if: "not(IsNode(*[1], 'leaf'))" then: [pause: short] @@ -30,8 +46,8 @@ replace: - test: if: "$Verbosity!='Terse'" - then: [t: "gradient of"] # phrase('divergence' from the mean) - else: [t: "del"] # phrase('divergence' from the mean) + then: [T: "gradienten av"] # phrase('divergence' from the mean) + else: [T: "grad"] # phrase('divergence' from the mean) - test: if: "not(IsNode(*[1], 'leaf'))" then: [pause: short] diff --git a/Rules/Languages/sv/SharedRules/general.yaml b/Rules/Languages/sv/SharedRules/general.yaml index 6f23a162..d6b4d420 100644 --- a/Rules/Languages/sv/SharedRules/general.yaml +++ b/Rules/Languages/sv/SharedRules/general.yaml @@ -213,6 +213,14 @@ - T: "av" # phrase(the square root 'of' 25 equals 5) - x: "*[2]" +- name: repeating-decimal + tag: repeating-decimal + match: "." + replace: + - x: "*[1]" + - T: "med repeterande siffror" # phrase('with repeating digits') + - spell: "*[2]" + - name: msubsup-skip-super # handles single, double, etc., prime tag: [skip-super, say-super] @@ -275,6 +283,14 @@ replace: - x: "IfThenElse($NeuterUnitFollows, 'ett', 1)" +# in terse mode, we just say "m" or "s", etc., not meters or seconds +- name: unit-terse + tag: unit + match: "$Verbosity = 'Terse' and string-length(.)=1" + replace: + - bookmark: "@id" + - spell: "text()" + # the order of matching is # 1. does it match the base of an SI unit # 2. does it match an English unit (if in an English language) @@ -498,6 +514,26 @@ then: [T: "cotan hype"] # phrase('cotanch' is shorthand for hyperbolic cotangent) else: [T: "cotangens hyperbolicus"] # phrase(the 'hyperbolic cotangent' is a mathematical function) +- name: exponential + tag: mi + match: ".='exp'" + replace: + - bookmark: "@id" + - test: + if: "$Verbosity='Terse'" + then: [T: "exp"] # phrase('exp' means exponential function) + else: [T: "exponentialfunktionen"] # phrase('exponential' function) + +- name: covariance + tag: mi + match: ".='Cov'" + replace: + - bookmark: "@id" + - test: + if: "$Verbosity='Terse'" + then: [T: "Cov"] # phrase('Cov' is shorthand for the covariance function) + else: [T: "kovarians"] # phrase('covariance' function) + - # handle both log and ln name: log tag: mi @@ -631,6 +667,57 @@ - T: "med element" # phrase(the 2 by 2 matrix 'with entry' x) - x: "*[1]/*" +# enklare uppläsning för specialfall av matriser +- name: zero-matrix + tag: matrix + # select all the non-zero entries -- if there are none of them, then it is a zero matrix + match: "not( */*/*[not(self::m:mn and .= 0)] )" + replace: + - x: count(*) + - T: "gånger" # phrase(the 1 'by' 2 matrix) + - x: count(*[self::m:mtr][1]/*) + - T: "nollmatris" # phrase(the 2 by 2 'zero matrix') + - pause: long + +- name: identity-matrix + tag: matrix + # select all the non-zero entries...if they are not on the diagonal, or are != 1 + # if there are any of them, then this isn't an identity matrix + match: + - "count(*) = count(*[1]/*) and " + - "not( */*/*[not(self::m:mn and .= 0)]" + - " [count(../preceding-sibling::*)!=count(../../preceding-sibling::*) or .!= 1]" + - " )" + replace: + - x: count(*) + - T: "gånger" # phrase(the 1 'by' 2 matrix) + - x: count(*[self::m:mtr][1]/*) + - T: "identitetsmatris" # phrase(the 2 by 2 'identity matrix') + - pause: long + +- name: diagonal-matrix + tag: matrix + # select all the non-zero entries...if they are not on the diagonal + # if there are any of them, then this isn't an identity matrix + match: + - "count(*) = count(*[1]/*) and " + - "not( */*/*[not(self::m:mn and .= 0)]" + - " [count(../preceding-sibling::*)!=count(../../preceding-sibling::*)]" + - " )" + replace: + - x: count(*) + - T: "gånger" # phrase(the 1 'by' 2 matrix) + - x: count(*[self::m:mtr][1]/*) + - T: "diagonalmatris" # phrase(the 2 by 2 'diagonal matrix') + - pause: long + - insert: + # this lists the diagonal 'mtd's to be read, and they say "column nnn" before reading the contents + # there seems to be an xpath bug -- without the parens, the match fails for the + # test Languages::en::mtable::diagonal_matrix due to match failure (the third matching element seems to be missing) + nodes: "(*/*/*[not(self::m:mn and .= 0)]/..)" + replace: [pause: auto] + - pause: long + # simpler reading methods for smaller matrices if the entries are simple - name: 2-or-3x1-matrix tag: matrix @@ -791,6 +878,14 @@ then: [T: "nedsänkt"] # phrase(H 'sub' 2) - x: "*[3]" +- name: dimension-by + tag: mrow + match: dimension-product + replace: + - insert: + nodes: "*" + replace: [T: "gånger", pause: auto] # phrase(3 'by' 5 matrix) + - name: chemistry-msup tag: [chemical-formula] match: "count(*)=3 and *[1][.='msup']" diff --git a/Rules/Languages/sv/SharedRules/geometry.yaml b/Rules/Languages/sv/SharedRules/geometry.yaml index b6393d37..7de43acb 100644 --- a/Rules/Languages/sv/SharedRules/geometry.yaml +++ b/Rules/Languages/sv/SharedRules/geometry.yaml @@ -54,3 +54,19 @@ - x: "*[2]" - x: "*[3]" +- name: coordinate + tag: coordinate + match: "." + replace: + - T: "punkten" # phrase(the 'point' at 1, 2) + - test: + if: "$Verbosity='Verbose'" + then: [T: "i"] # phrase(the point 'at' 1, 2) + - pause: short + - insert: + nodes: "*" + replace: [T: "komma", pause: auto] # phrase(f of x 'comma' y) + - pause: short + - test: + if: "($SpeechStyle='ClearSpeak' and $Verbosity='Verbose') or not(IsNode(*[last()],'leaf'))" + then: [T: "slutpunkt"] # phrase(start point, 'end point') diff --git a/Rules/Languages/sv/SimpleSpeak_Rules.yaml b/Rules/Languages/sv/SimpleSpeak_Rules.yaml index 36caefc2..eee7d080 100644 --- a/Rules/Languages/sv/SimpleSpeak_Rules.yaml +++ b/Rules/Languages/sv/SimpleSpeak_Rules.yaml @@ -1,5 +1,20 @@ --- +- name: pause + tag: "!*" + match: "not(self::m:math) and not($MatchingPause) and @data-intent-property[contains(., ':pause')]" + replace: + - with: + variables: [MatchingPause: "true()"] + replace: + - test: + - if: "contains(@data-intent-property, ':pause-long')" + then: [pause: long] + - else_if: "contains(@data-intent-property, ':pause-short')" + then: [pause: short] + else: [pause: medium] + - x: "." + - name: intent-literal-silent tag: [mi, mo, mn] match: "contains(@data-intent-property, ':silent:')" @@ -56,6 +71,17 @@ # Fraction rules # Mixed numbers mostly "just work" because the invisible char reads as "and" and other parts read properly on their own # Note: if the bounds change for what is a common fraction, change them in general.yaml for units +- name: common-fraction-mixed-number + tag: fraction + match: + - "preceding-sibling::*[1][self::m:mo][.='⁤'] and" # preceding element is invisible plus + - "*[1][self::m:mn][not(contains(., $DecimalSeparators))] and" + - "*[2][self::m:mn][not(contains(., $DecimalSeparators))]" + variables: [IsPlural: "*[1]!=1"] + replace: + - x: "*[1]" + - x: "ToOrdinal(*[2], true(), $IsPlural)" # extra args specify fractional ordinal and whether it is plural + - name: common-fraction tag: fraction match: From aa173225c18c88fb2aad47d1e29fae2a04e9e8ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Gro=C3=9F?= Date: Wed, 14 Jan 2026 17:22:42 +0100 Subject: [PATCH 2/4] Fix misleading line numbers for misaligned structure differences --- PythonScripts/audit_translations/auditor.py | 107 ++++++++++++-- .../fixtures/de/structure_misaligned.yaml | 15 ++ .../fixtures/de/structure_missing_else.yaml | 7 + .../fixtures/en/structure_misaligned.yaml | 18 +++ .../fixtures/en/structure_missing_else.yaml | 8 ++ .../tests/golden/jsonl/de.json | 36 +++++ .../audit_translations/tests/test_auditor.py | 133 ++++++++++++++++++ 7 files changed, 309 insertions(+), 15 deletions(-) create mode 100644 PythonScripts/audit_translations/tests/fixtures/de/structure_misaligned.yaml create mode 100644 PythonScripts/audit_translations/tests/fixtures/de/structure_missing_else.yaml create mode 100644 PythonScripts/audit_translations/tests/fixtures/en/structure_misaligned.yaml create mode 100644 PythonScripts/audit_translations/tests/fixtures/en/structure_missing_else.yaml diff --git a/PythonScripts/audit_translations/auditor.py b/PythonScripts/audit_translations/auditor.py index dd0c68ae..0fac4d2a 100644 --- a/PythonScripts/audit_translations/auditor.py +++ b/PythonScripts/audit_translations/auditor.py @@ -191,19 +191,67 @@ def issue_base(rule: RuleInfo, file_name: str, language: str) -> dict: def first_structure_mismatch( english_tokens: List[str], translated_tokens: List[str], -) -> Tuple[Optional[str], Optional[str]]: +) -> Tuple[Optional[str], Optional[str], int]: + """ + Find the first structural mismatch between two token lists. + + Returns (en_token, tr_token, mismatch_position). + Position is the index in the token list where they first differ. + """ min_len = min(len(english_tokens), len(translated_tokens)) for idx in range(min_len): if english_tokens[idx] != translated_tokens[idx]: - return english_tokens[idx], translated_tokens[idx] + return english_tokens[idx], translated_tokens[idx], idx if len(english_tokens) > min_len: - return english_tokens[min_len], None + return english_tokens[min_len], None, min_len if len(translated_tokens) > min_len: - return None, translated_tokens[min_len] - return None, None + return None, translated_tokens[min_len], min_len + return None, None, -1 + + +def resolve_issue_line_at_position( + rule: RuleInfo, + kind: str, + token: Optional[str] = None, + position: int = 0 +) -> Optional[int]: + """ + Resolve the line number for a specific occurrence of an element within a rule. + + Args: + rule: The rule to search in + kind: The kind of element ('match', 'condition', 'variables', 'structure') + token: For structure kind, the specific token to find + position: The occurrence index (0 for first, 1 for second, etc.) + + Returns: + The line number if found, None if the element doesn't exist at that position. + """ + if kind == "match": + lines = rule.line_map.get("match", []) + elif kind == "condition": + lines = rule.line_map.get("condition", []) + elif kind == "variables": + lines = rule.line_map.get("variables", []) + elif kind == "structure" and token: + token_key = f"structure:{token.rstrip(':')}" + lines = rule.line_map.get(token_key, []) + else: + lines = [] + if position < len(lines): + return lines[position] + return None -def resolve_issue_line(rule: RuleInfo, kind: str, token: Optional[str] = None) -> int: + +def resolve_issue_line(rule: RuleInfo, kind: str, token: Optional[str] = None) -> Optional[int]: + """ + Resolve the line number for an issue within a rule. + + Returns the line number if found, None if the element doesn't exist in the rule. + For 'structure' kind with a missing token, returns None instead of falling back + to rule.line_number to avoid misleading line numbers when elements are missing. + """ if kind == "match": lines = rule.line_map.get("match", []) elif kind == "condition": @@ -213,6 +261,9 @@ def resolve_issue_line(rule: RuleInfo, kind: str, token: Optional[str] = None) - elif kind == "structure" and token: token_key = f"structure:{token.rstrip(':')}" lines = rule.line_map.get(token_key, []) + # For structure differences, if the token doesn't exist, return None + # rather than falling back to rule.line_number which is misleading + return lines[0] if lines else None else: lines = [] return lines[0] if lines else rule.line_number @@ -274,9 +325,21 @@ def collect_issues( if diff.diff_type == "structure": en_tokens = extract_structure_elements(diff.english_rule.data) tr_tokens = extract_structure_elements(diff.translated_rule.data) - en_token, tr_token = first_structure_mismatch(en_tokens, tr_tokens) + en_token, tr_token, mismatch_pos = first_structure_mismatch(en_tokens, tr_tokens) + + # Skip reporting when tokens are misaligned (both exist but differ) + # This avoids misleading line numbers when entire blocks are missing/added + # We only report when one is None (clear case of missing element) + if en_token is not None and tr_token is not None and en_token != tr_token: + continue + issue_line_en = resolve_issue_line(diff.english_rule, "structure", en_token) issue_line_tr = resolve_issue_line(diff.translated_rule, "structure", tr_token) + + # Skip reporting structure differences where we can't find both tokens + # This avoids misleading line numbers when blocks are missing + if issue_line_en is None or issue_line_tr is None: + continue else: issue_line_en = resolve_issue_line(diff.english_rule, diff.diff_type) issue_line_tr = resolve_issue_line(diff.translated_rule, diff.diff_type) @@ -340,23 +403,37 @@ def print_warnings(result: ComparisonResult, file_name: str, verbose: bool = Fal issues += 1 if result.rule_differences: - total_diffs = len(result.rule_differences) - console.print( - f"\n [magenta]≠[/] [bold]Rule Differences[/] " - f"[[magenta]{total_diffs}[/]] [dim](structural differences between en and translation)[/]" - ) + # Count only diffs that will actually be displayed + displayable_diffs = [] for diff in result.rule_differences: if diff.diff_type == "structure": en_tokens = extract_structure_elements(diff.english_rule.data) tr_tokens = extract_structure_elements(diff.translated_rule.data) - en_token, tr_token = first_structure_mismatch(en_tokens, tr_tokens) + en_token, tr_token, mismatch_pos = first_structure_mismatch(en_tokens, tr_tokens) + + # Skip reporting when tokens are misaligned (both exist but differ) + # This avoids misleading line numbers when entire blocks are missing/added + if en_token is not None and tr_token is not None and en_token != tr_token: + continue + line_en = resolve_issue_line(diff.english_rule, "structure", en_token) line_tr = resolve_issue_line(diff.translated_rule, "structure", tr_token) + # Skip structure diffs where we can't find both tokens + if line_en is None or line_tr is None: + continue else: line_en = resolve_issue_line(diff.english_rule, diff.diff_type) line_tr = resolve_issue_line(diff.translated_rule, diff.diff_type) - print_diff_item(diff, line_en=line_en, line_tr=line_tr, verbose=verbose) - issues += 1 + displayable_diffs.append((diff, line_en, line_tr)) + + if displayable_diffs: + console.print( + f"\n [magenta]≠[/] [bold]Rule Differences[/] " + f"[[magenta]{len(displayable_diffs)}[/]] [dim](structural differences between en and translation)[/]" + ) + for diff, line_en, line_tr in displayable_diffs: + print_diff_item(diff, line_en=line_en, line_tr=line_tr, verbose=verbose) + issues += 1 if result.extra_rules: console.print(f"\n [blue]ℹ[/] [bold]Extra Rules[/] [[blue]{len(result.extra_rules)}[/]] [dim](may be intentional)[/]") diff --git a/PythonScripts/audit_translations/tests/fixtures/de/structure_misaligned.yaml b/PythonScripts/audit_translations/tests/fixtures/de/structure_misaligned.yaml new file mode 100644 index 00000000..36e3097d --- /dev/null +++ b/PythonScripts/audit_translations/tests/fixtures/de/structure_misaligned.yaml @@ -0,0 +1,15 @@ +- name: misaligned-structure + tag: root + match: "." + replace: + - test: + if: $Setting = 'Value' + then: + - test: + if: parent::m:minus + then: [T: "negativ"] + else: [T: "positiv"] + - test: + if: "*[2][.='2']" + then: [T: "quadrat"] + else: [T: "andere"] diff --git a/PythonScripts/audit_translations/tests/fixtures/de/structure_missing_else.yaml b/PythonScripts/audit_translations/tests/fixtures/de/structure_missing_else.yaml new file mode 100644 index 00000000..445f0670 --- /dev/null +++ b/PythonScripts/audit_translations/tests/fixtures/de/structure_missing_else.yaml @@ -0,0 +1,7 @@ +- name: missing-else-block + tag: root + match: "." + replace: + - test: + if: "@intent='foo'" + then: [T: "hat foo"] diff --git a/PythonScripts/audit_translations/tests/fixtures/en/structure_misaligned.yaml b/PythonScripts/audit_translations/tests/fixtures/en/structure_misaligned.yaml new file mode 100644 index 00000000..e4999d95 --- /dev/null +++ b/PythonScripts/audit_translations/tests/fixtures/en/structure_misaligned.yaml @@ -0,0 +1,18 @@ +- name: misaligned-structure + tag: root + match: "." + replace: + - test: + if: "$Verbosity!='Terse'" + then: [t: "the"] + - test: + if: "$Setting = 'Value'" + then: + - test: + if: "parent::m:minus" + then: [t: "negative"] + else: [t: "positive"] + - test: + if: "*[2][.='2']" + then: [t: "square"] + else: [t: "other"] diff --git a/PythonScripts/audit_translations/tests/fixtures/en/structure_missing_else.yaml b/PythonScripts/audit_translations/tests/fixtures/en/structure_missing_else.yaml new file mode 100644 index 00000000..a1016856 --- /dev/null +++ b/PythonScripts/audit_translations/tests/fixtures/en/structure_missing_else.yaml @@ -0,0 +1,8 @@ +- name: missing-else-block + tag: root + match: "." + replace: + - test: + if: "@intent='foo'" + then: [t: "has foo"] + else: [t: "no foo"] diff --git a/PythonScripts/audit_translations/tests/golden/jsonl/de.json b/PythonScripts/audit_translations/tests/golden/jsonl/de.json index f5c4600f..4c6373f0 100644 --- a/PythonScripts/audit_translations/tests/golden/jsonl/de.json +++ b/PythonScripts/audit_translations/tests/golden/jsonl/de.json @@ -145,6 +145,42 @@ "untranslated_texts": [], "_explanation": "rule_difference structure_empty.yaml struct-empty|mi" }, + { + "_explanation": "structure_misaligned.yaml: English has extra test block causing misalignment. Fix filters out misleading structure differences but reports condition difference.", + "language": "de", + "file": "structure_misaligned.yaml", + "rule_name": "misaligned-structure", + "rule_tag": "root", + "rule_key": "misaligned-structure|root", + "issue_line_en": 6, + "issue_line_tr": 6, + "rule_line_en": 1, + "rule_line_tr": 1, + "issue_type": "rule_difference", + "diff_type": "condition", + "description": "Conditions differ", + "english_snippet": "$Setting = 'Value', $Verbosity!='Terse', *[2][.='2'], parent::m:minus", + "translated_snippet": "$Setting = 'Value', *[2][.='2'], parent::m:minus", + "untranslated_texts": [] + }, + { + "_explanation": "structure_missing_else.yaml: Verifies fix still reports legitimate missing else blocks. Unlike misaligned case, this has clear missing element.", + "language": "de", + "file": "structure_missing_else.yaml", + "rule_name": "missing-else-block", + "rule_tag": "root", + "rule_key": "missing-else-block|root", + "issue_line_en": 8, + "issue_line_tr": 1, + "rule_line_en": 1, + "rule_line_tr": 1, + "issue_type": "rule_difference", + "diff_type": "structure", + "description": "Rule structure differs (test/if/then/else blocks)", + "english_snippet": "replace: test: if: then: else:", + "translated_snippet": "replace: test: if: then:", + "untranslated_texts": [] + }, { "language": "de", "file": "unicode.yaml", diff --git a/PythonScripts/audit_translations/tests/test_auditor.py b/PythonScripts/audit_translations/tests/test_auditor.py index 6b452014..6fcc5e07 100644 --- a/PythonScripts/audit_translations/tests/test_auditor.py +++ b/PythonScripts/audit_translations/tests/test_auditor.py @@ -219,3 +219,136 @@ def test_print_warnings_includes_snippets_when_verbose() -> None: output = capture.get() assert output == golden_path.read_text(encoding="utf-8") + + +def test_misaligned_structure_differences_are_skipped() -> None: + """ + Test that structure differences with misaligned tokens are skipped. + + When English has a "test" block that Norwegian doesn't have (and vice versa), + the structural tokens become misaligned. The fix skips reporting these + to avoid showing confusing line numbers. + """ + base_dir = Path(__file__).parent + fixtures_dir = base_dir / "fixtures" + + result = compare_files( + str(fixtures_dir / "en" / "structure_misaligned.yaml"), + str(fixtures_dir / "de" / "structure_misaligned.yaml"), + ) + + # The result should detect that structures differ + assert len(result.rule_differences) > 0 + assert any(diff.diff_type == "structure" for diff in result.rule_differences) + + # But when collecting issues, misaligned structure diffs should be filtered out + issues = collect_issues(result, "structure_misaligned.yaml", "de") + structure_issues = [i for i in issues if i["diff_type"] == "structure"] + + # CRITICAL: Before the fix, this would have structure issues with misleading line numbers + # After the fix, misaligned structures are skipped, so we should have 0 structure issues + assert len(structure_issues) == 0, ( + "Expected misaligned structure differences to be filtered out, " + f"but found {len(structure_issues)} structure issues" + ) + + # Other differences (like conditions) should still be reported + condition_issues = [i for i in issues if i["diff_type"] == "condition"] + assert len(condition_issues) > 0, "Expected condition differences to still be reported" + + +def test_missing_else_block_is_still_reported() -> None: + """ + Test that legitimate missing structural elements are still reported. + + When one file has an 'else' block and the other doesn't, this is a clear + structural difference that should be reported with accurate line numbers. + """ + base_dir = Path(__file__).parent + fixtures_dir = base_dir / "fixtures" + + result = compare_files( + str(fixtures_dir / "en" / "structure_missing_else.yaml"), + str(fixtures_dir / "de" / "structure_missing_else.yaml"), + ) + + # Should detect structure difference + assert len(result.rule_differences) > 0 + structure_diffs = [diff for diff in result.rule_differences if diff.diff_type == "structure"] + assert len(structure_diffs) == 1 + + # This case has one token None (missing else), so it should still be reported + issues = collect_issues(result, "structure_missing_else.yaml", "de") + structure_issues = [i for i in issues if i["diff_type"] == "structure"] + + # CRITICAL: This legitimate difference should still be reported + # One file has else:, the other doesn't - a clear missing element + assert len(structure_issues) == 1, ( + "Expected missing else block to be reported, " + f"but found {len(structure_issues)} structure issues" + ) + + # Verify the issue has proper line numbers + issue = structure_issues[0] + assert issue["issue_line_en"] is not None + # When else: doesn't exist in translation, we fall back to the rule line number + assert issue["issue_line_tr"] == 1 # start of the rule + + +def test_print_warnings_skips_misaligned_structures() -> None: + """ + Test that print_warnings doesn't display misaligned structure differences. + """ + base_dir = Path(__file__).parent + fixtures_dir = base_dir / "fixtures" + + result = compare_files( + str(fixtures_dir / "en" / "structure_misaligned.yaml"), + str(fixtures_dir / "de" / "structure_misaligned.yaml"), + ) + + # Raw result should have structure differences detected + structure_diffs = [diff for diff in result.rule_differences if diff.diff_type == "structure"] + assert len(structure_diffs) > 0 + + with console.capture() as capture: + issues_count = print_warnings(result, "structure_misaligned.yaml", verbose=False) + output = capture.get() + + # CRITICAL: The output should not contain "Rule structure differs" + # because misaligned structures are filtered during display + assert "Rule structure differs" not in output, ( + "Expected misaligned structure differences to be filtered from display" + ) + + # The issues count should not include filtered structure differences + # It should only count the condition differences + condition_diffs = [diff for diff in result.rule_differences if diff.diff_type == "condition"] + assert issues_count == len(condition_diffs), ( + f"Expected issues_count ({issues_count}) to equal condition_diffs ({len(condition_diffs)})" + ) + + +def test_print_warnings_still_shows_missing_else() -> None: + """ + Test that print_warnings still displays legitimate missing elements. + """ + base_dir = Path(__file__).parent + fixtures_dir = base_dir / "fixtures" + + result = compare_files( + str(fixtures_dir / "en" / "structure_missing_else.yaml"), + str(fixtures_dir / "de" / "structure_missing_else.yaml"), + ) + + with console.capture() as capture: + issues_count = print_warnings(result, "structure_missing_else.yaml", verbose=False) + output = capture.get() + + # CRITICAL: This legitimate difference should appear in output + assert "Rule structure differs" in output, ( + "Expected missing else block to be shown in output" + ) + + # Should report exactly 1 issue (the structure difference) + assert issues_count == 1, f"Expected 1 issue but got {issues_count}" From a8dc226d73ddc9b1878fba771bfdcd8f132f4905 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Gro=C3=9F?= Date: Wed, 14 Jan 2026 17:23:23 +0100 Subject: [PATCH 3/4] Revert "." This reverts commit 7cff90920124a01a1e90690ce2651844a835aef3. --- Rules/Languages/sv/ClearSpeak_Rules.yaml | 15 ---- Rules/Languages/sv/SharedRules/calculus.yaml | 28 ++---- Rules/Languages/sv/SharedRules/general.yaml | 95 -------------------- Rules/Languages/sv/SharedRules/geometry.yaml | 16 ---- Rules/Languages/sv/SimpleSpeak_Rules.yaml | 26 ------ 5 files changed, 6 insertions(+), 174 deletions(-) diff --git a/Rules/Languages/sv/ClearSpeak_Rules.yaml b/Rules/Languages/sv/ClearSpeak_Rules.yaml index bbec4476..12c90c02 100644 --- a/Rules/Languages/sv/ClearSpeak_Rules.yaml +++ b/Rules/Languages/sv/ClearSpeak_Rules.yaml @@ -1,19 +1,4 @@ --- -- name: pause - tag: "!*" - match: "not(self::m:math) and not($MatchingPause) and @data-intent-property[contains(., ':pause')]" - replace: - - with: - variables: [MatchingPause: "true()"] - replace: - - test: - - if: "contains(@data-intent-property, ':pause-long')" - then: [pause: long] - - else_if: "contains(@data-intent-property, ':pause-short')" - then: [pause: short] - else: [pause: medium] - - x: "." - - name: intent-literal-silent tag: [mi, mo, mn] match: "contains(@data-intent-property, ':silent:')" diff --git a/Rules/Languages/sv/SharedRules/calculus.yaml b/Rules/Languages/sv/SharedRules/calculus.yaml index 4e6a9e9c..20fcd91c 100644 --- a/Rules/Languages/sv/SharedRules/calculus.yaml +++ b/Rules/Languages/sv/SharedRules/calculus.yaml @@ -1,30 +1,14 @@ --- -- name: laplacian - tag: laplacian - match: "count(*) <= 1" # can be on ∇^2 or on enclosing mrow - replace: - - T: "Laplaceoperatorn" # phrase('laplacian' of x) - - test: - if: "count(*) = 1" - then: - - test: - if: "$Verbosity!='Terse'" - then: [T: "av"] # phrase(function 'of' one variable) - - test: - if: "not(IsNode(*[1], 'leaf'))" - then: [pause: short] - - x: "*[1]" - - name: divergence tag: divergence match: "." replace: - test: if: "$Verbosity='Verbose'" - then: [T: "divergens"] # phrase('divergence' from the mean) - else: [T: "div"] # phrase('divergence' from the mean) - - T: "av" # phrase(systems 'of' linear equations) + then: [t: "divergence"] # phrase('divergence' from the mean) + else: [t: "div"] # phrase('divergence' from the mean) + - t: "of" # phrase(systems 'of' linear equations) - test: if: "not(IsNode(*[1], 'leaf'))" then: [pause: short] @@ -34,7 +18,7 @@ tag: curl match: "." replace: - - T: "rotationen av" # phrase(the 'curl of' a field) + - t: "curl of" # phrase(the 'curl of' a field) - test: if: "not(IsNode(*[1], 'leaf'))" then: [pause: short] @@ -46,8 +30,8 @@ replace: - test: if: "$Verbosity!='Terse'" - then: [T: "gradienten av"] # phrase('divergence' from the mean) - else: [T: "grad"] # phrase('divergence' from the mean) + then: [t: "gradient of"] # phrase('divergence' from the mean) + else: [t: "del"] # phrase('divergence' from the mean) - test: if: "not(IsNode(*[1], 'leaf'))" then: [pause: short] diff --git a/Rules/Languages/sv/SharedRules/general.yaml b/Rules/Languages/sv/SharedRules/general.yaml index d6b4d420..6f23a162 100644 --- a/Rules/Languages/sv/SharedRules/general.yaml +++ b/Rules/Languages/sv/SharedRules/general.yaml @@ -213,14 +213,6 @@ - T: "av" # phrase(the square root 'of' 25 equals 5) - x: "*[2]" -- name: repeating-decimal - tag: repeating-decimal - match: "." - replace: - - x: "*[1]" - - T: "med repeterande siffror" # phrase('with repeating digits') - - spell: "*[2]" - - name: msubsup-skip-super # handles single, double, etc., prime tag: [skip-super, say-super] @@ -283,14 +275,6 @@ replace: - x: "IfThenElse($NeuterUnitFollows, 'ett', 1)" -# in terse mode, we just say "m" or "s", etc., not meters or seconds -- name: unit-terse - tag: unit - match: "$Verbosity = 'Terse' and string-length(.)=1" - replace: - - bookmark: "@id" - - spell: "text()" - # the order of matching is # 1. does it match the base of an SI unit # 2. does it match an English unit (if in an English language) @@ -514,26 +498,6 @@ then: [T: "cotan hype"] # phrase('cotanch' is shorthand for hyperbolic cotangent) else: [T: "cotangens hyperbolicus"] # phrase(the 'hyperbolic cotangent' is a mathematical function) -- name: exponential - tag: mi - match: ".='exp'" - replace: - - bookmark: "@id" - - test: - if: "$Verbosity='Terse'" - then: [T: "exp"] # phrase('exp' means exponential function) - else: [T: "exponentialfunktionen"] # phrase('exponential' function) - -- name: covariance - tag: mi - match: ".='Cov'" - replace: - - bookmark: "@id" - - test: - if: "$Verbosity='Terse'" - then: [T: "Cov"] # phrase('Cov' is shorthand for the covariance function) - else: [T: "kovarians"] # phrase('covariance' function) - - # handle both log and ln name: log tag: mi @@ -667,57 +631,6 @@ - T: "med element" # phrase(the 2 by 2 matrix 'with entry' x) - x: "*[1]/*" -# enklare uppläsning för specialfall av matriser -- name: zero-matrix - tag: matrix - # select all the non-zero entries -- if there are none of them, then it is a zero matrix - match: "not( */*/*[not(self::m:mn and .= 0)] )" - replace: - - x: count(*) - - T: "gånger" # phrase(the 1 'by' 2 matrix) - - x: count(*[self::m:mtr][1]/*) - - T: "nollmatris" # phrase(the 2 by 2 'zero matrix') - - pause: long - -- name: identity-matrix - tag: matrix - # select all the non-zero entries...if they are not on the diagonal, or are != 1 - # if there are any of them, then this isn't an identity matrix - match: - - "count(*) = count(*[1]/*) and " - - "not( */*/*[not(self::m:mn and .= 0)]" - - " [count(../preceding-sibling::*)!=count(../../preceding-sibling::*) or .!= 1]" - - " )" - replace: - - x: count(*) - - T: "gånger" # phrase(the 1 'by' 2 matrix) - - x: count(*[self::m:mtr][1]/*) - - T: "identitetsmatris" # phrase(the 2 by 2 'identity matrix') - - pause: long - -- name: diagonal-matrix - tag: matrix - # select all the non-zero entries...if they are not on the diagonal - # if there are any of them, then this isn't an identity matrix - match: - - "count(*) = count(*[1]/*) and " - - "not( */*/*[not(self::m:mn and .= 0)]" - - " [count(../preceding-sibling::*)!=count(../../preceding-sibling::*)]" - - " )" - replace: - - x: count(*) - - T: "gånger" # phrase(the 1 'by' 2 matrix) - - x: count(*[self::m:mtr][1]/*) - - T: "diagonalmatris" # phrase(the 2 by 2 'diagonal matrix') - - pause: long - - insert: - # this lists the diagonal 'mtd's to be read, and they say "column nnn" before reading the contents - # there seems to be an xpath bug -- without the parens, the match fails for the - # test Languages::en::mtable::diagonal_matrix due to match failure (the third matching element seems to be missing) - nodes: "(*/*/*[not(self::m:mn and .= 0)]/..)" - replace: [pause: auto] - - pause: long - # simpler reading methods for smaller matrices if the entries are simple - name: 2-or-3x1-matrix tag: matrix @@ -878,14 +791,6 @@ then: [T: "nedsänkt"] # phrase(H 'sub' 2) - x: "*[3]" -- name: dimension-by - tag: mrow - match: dimension-product - replace: - - insert: - nodes: "*" - replace: [T: "gånger", pause: auto] # phrase(3 'by' 5 matrix) - - name: chemistry-msup tag: [chemical-formula] match: "count(*)=3 and *[1][.='msup']" diff --git a/Rules/Languages/sv/SharedRules/geometry.yaml b/Rules/Languages/sv/SharedRules/geometry.yaml index 7de43acb..b6393d37 100644 --- a/Rules/Languages/sv/SharedRules/geometry.yaml +++ b/Rules/Languages/sv/SharedRules/geometry.yaml @@ -54,19 +54,3 @@ - x: "*[2]" - x: "*[3]" -- name: coordinate - tag: coordinate - match: "." - replace: - - T: "punkten" # phrase(the 'point' at 1, 2) - - test: - if: "$Verbosity='Verbose'" - then: [T: "i"] # phrase(the point 'at' 1, 2) - - pause: short - - insert: - nodes: "*" - replace: [T: "komma", pause: auto] # phrase(f of x 'comma' y) - - pause: short - - test: - if: "($SpeechStyle='ClearSpeak' and $Verbosity='Verbose') or not(IsNode(*[last()],'leaf'))" - then: [T: "slutpunkt"] # phrase(start point, 'end point') diff --git a/Rules/Languages/sv/SimpleSpeak_Rules.yaml b/Rules/Languages/sv/SimpleSpeak_Rules.yaml index eee7d080..36caefc2 100644 --- a/Rules/Languages/sv/SimpleSpeak_Rules.yaml +++ b/Rules/Languages/sv/SimpleSpeak_Rules.yaml @@ -1,20 +1,5 @@ --- -- name: pause - tag: "!*" - match: "not(self::m:math) and not($MatchingPause) and @data-intent-property[contains(., ':pause')]" - replace: - - with: - variables: [MatchingPause: "true()"] - replace: - - test: - - if: "contains(@data-intent-property, ':pause-long')" - then: [pause: long] - - else_if: "contains(@data-intent-property, ':pause-short')" - then: [pause: short] - else: [pause: medium] - - x: "." - - name: intent-literal-silent tag: [mi, mo, mn] match: "contains(@data-intent-property, ':silent:')" @@ -71,17 +56,6 @@ # Fraction rules # Mixed numbers mostly "just work" because the invisible char reads as "and" and other parts read properly on their own # Note: if the bounds change for what is a common fraction, change them in general.yaml for units -- name: common-fraction-mixed-number - tag: fraction - match: - - "preceding-sibling::*[1][self::m:mo][.='⁤'] and" # preceding element is invisible plus - - "*[1][self::m:mn][not(contains(., $DecimalSeparators))] and" - - "*[2][self::m:mn][not(contains(., $DecimalSeparators))]" - variables: [IsPlural: "*[1]!=1"] - replace: - - x: "*[1]" - - x: "ToOrdinal(*[2], true(), $IsPlural)" # extra args specify fractional ordinal and whether it is plural - - name: common-fraction tag: fraction match: From c13f3c7b1da0444d0070f5b4140c7eab058fbd23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Gro=C3=9F?= Date: Sun, 18 Jan 2026 17:42:11 +0100 Subject: [PATCH 4/4] . --- .../audit_translations/tests/golden/jsonl/de.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/PythonScripts/audit_translations/tests/golden/jsonl/de.json b/PythonScripts/audit_translations/tests/golden/jsonl/de.json index 4c6373f0..66cb0214 100644 --- a/PythonScripts/audit_translations/tests/golden/jsonl/de.json +++ b/PythonScripts/audit_translations/tests/golden/jsonl/de.json @@ -146,7 +146,6 @@ "_explanation": "rule_difference structure_empty.yaml struct-empty|mi" }, { - "_explanation": "structure_misaligned.yaml: English has extra test block causing misalignment. Fix filters out misleading structure differences but reports condition difference.", "language": "de", "file": "structure_misaligned.yaml", "rule_name": "misaligned-structure", @@ -161,10 +160,10 @@ "description": "Conditions differ", "english_snippet": "$Setting = 'Value', $Verbosity!='Terse', *[2][.='2'], parent::m:minus", "translated_snippet": "$Setting = 'Value', *[2][.='2'], parent::m:minus", - "untranslated_texts": [] + "untranslated_texts": [], + "_explanation": "structure_misaligned.yaml: English has extra test block causing misalignment. Fix filters out misleading structure differences but reports condition difference." }, { - "_explanation": "structure_missing_else.yaml: Verifies fix still reports legitimate missing else blocks. Unlike misaligned case, this has clear missing element.", "language": "de", "file": "structure_missing_else.yaml", "rule_name": "missing-else-block", @@ -179,7 +178,8 @@ "description": "Rule structure differs (test/if/then/else blocks)", "english_snippet": "replace: test: if: then: else:", "translated_snippet": "replace: test: if: then:", - "untranslated_texts": [] + "untranslated_texts": [], + "_explanation": "structure_missing_else.yaml: Verifies fix still reports legitimate missing else blocks. Unlike misaligned case, this has clear missing element." }, { "language": "de",