Skip to content

Commit 87edf05

Browse files
authored
Case ignore (#3272)
1 parent f4697e8 commit 87edf05

File tree

3 files changed

+94
-12
lines changed

3 files changed

+94
-12
lines changed

codespell_lib/_codespell.py

Lines changed: 55 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,24 @@
2020
import configparser
2121
import ctypes
2222
import fnmatch
23+
import itertools
2324
import os
2425
import re
2526
import sys
2627
import textwrap
2728
from ctypes import wintypes
28-
from typing import Any, Dict, List, Match, Optional, Pattern, Sequence, Set, Tuple
29+
from typing import (
30+
Any,
31+
Dict,
32+
Iterable,
33+
List,
34+
Match,
35+
Optional,
36+
Pattern,
37+
Sequence,
38+
Set,
39+
Tuple,
40+
)
2941

3042
# autogenerated by setuptools_scm
3143
from ._version import ( # type: ignore[import-not-found]
@@ -655,24 +667,44 @@ def parse_options(
655667
return options, parser, used_cfg_files
656668

657669

658-
def parse_ignore_words_option(ignore_words_option: List[str]) -> Set[str]:
670+
def process_ignore_words(
671+
words: Iterable[str], ignore_words: Set[str], ignore_words_cased: Set[str]
672+
) -> None:
673+
for word in words:
674+
word = word.strip()
675+
if word == word.lower():
676+
ignore_words.add(word)
677+
else:
678+
ignore_words_cased.add(word)
679+
680+
681+
def parse_ignore_words_option(
682+
ignore_words_option: List[str],
683+
) -> Tuple[Set[str], Set[str]]:
659684
ignore_words: Set[str] = set()
685+
ignore_words_cased: Set[str] = set()
660686
if ignore_words_option:
661687
for comma_separated_words in ignore_words_option:
662-
ignore_words.update(
663-
word.strip() for word in comma_separated_words.split(",")
688+
process_ignore_words(
689+
(word.strip() for word in comma_separated_words.split(",")),
690+
ignore_words,
691+
ignore_words_cased,
664692
)
665-
return ignore_words
693+
return (ignore_words, ignore_words_cased)
666694

667695

668696
def build_exclude_hashes(filename: str, exclude_lines: Set[str]) -> None:
669697
with open(filename, encoding="utf-8") as f:
670698
exclude_lines.update(line.rstrip() for line in f)
671699

672700

673-
def build_ignore_words(filename: str, ignore_words: Set[str]) -> None:
701+
def build_ignore_words(
702+
filename: str, ignore_words: Set[str], ignore_words_cased: Set[str]
703+
) -> None:
674704
with open(filename, encoding="utf-8") as f:
675-
ignore_words.update(line.strip() for line in f)
705+
process_ignore_words(
706+
(line.strip() for line in f), ignore_words, ignore_words_cased
707+
)
676708

677709

678710
def add_misspelling(
@@ -865,6 +897,7 @@ def parse_file(
865897
colors: TermColors,
866898
summary: Optional[Summary],
867899
misspellings: Dict[str, Misspelling],
900+
ignore_words_cased: Set[str],
868901
exclude_lines: Set[str],
869902
file_opener: FileOpener,
870903
word_regex: Pattern[str],
@@ -885,6 +918,8 @@ def parse_file(
885918
else:
886919
if options.check_filenames:
887920
for word in extract_words(filename, word_regex, ignore_word_regex):
921+
if word in ignore_words_cased:
922+
continue
888923
lword = word.lower()
889924
if lword not in misspellings:
890925
continue
@@ -958,6 +993,8 @@ def parse_file(
958993
)
959994
for match in check_matches:
960995
word = match.group()
996+
if word in ignore_words_cased:
997+
continue
961998
lword = word.lower()
962999
if lword in misspellings:
9631000
# Sometimes we find a 'misspelling' which is actually a valid word
@@ -1112,7 +1149,10 @@ def main(*args: str) -> int:
11121149
ignore_word_regex = None
11131150

11141151
ignore_words_files = options.ignore_words or []
1115-
ignore_words = parse_ignore_words_option(options.ignore_words_list)
1152+
ignore_words, ignore_words_cased = parse_ignore_words_option(
1153+
options.ignore_words_list
1154+
)
1155+
11161156
for ignore_words_file in ignore_words_files:
11171157
if not os.path.isfile(ignore_words_file):
11181158
print(
@@ -1121,7 +1161,7 @@ def main(*args: str) -> int:
11211161
)
11221162
parser.print_help()
11231163
return EX_USAGE
1124-
build_ignore_words(ignore_words_file, ignore_words)
1164+
build_ignore_words(ignore_words_file, ignore_words, ignore_words_cased)
11251165

11261166
uri_regex = options.uri_regex or uri_regex_def
11271167
try:
@@ -1133,7 +1173,10 @@ def main(*args: str) -> int:
11331173
)
11341174
parser.print_help()
11351175
return EX_USAGE
1136-
uri_ignore_words = parse_ignore_words_option(options.uri_ignore_words_list)
1176+
1177+
uri_ignore_words = set(
1178+
itertools.chain(*parse_ignore_words_option(options.uri_ignore_words_list))
1179+
)
11371180

11381181
dictionaries = options.dictionary or ["-"]
11391182

@@ -1242,6 +1285,7 @@ def main(*args: str) -> int:
12421285
colors,
12431286
summary,
12441287
misspellings,
1288+
ignore_words_cased,
12451289
exclude_lines,
12461290
file_opener,
12471291
word_regex,
@@ -1266,6 +1310,7 @@ def main(*args: str) -> int:
12661310
colors,
12671311
summary,
12681312
misspellings,
1313+
ignore_words_cased,
12691314
exclude_lines,
12701315
file_opener,
12711316
word_regex,

codespell_lib/tests/test_basic.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,43 @@ def test_ignore_dictionary(
343343
assert cs.main("-I", fname, bad_name) == 1
344344

345345

346+
def test_ignore_words_with_cases(
347+
tmp_path: Path,
348+
capsys: pytest.CaptureFixture[str],
349+
) -> None:
350+
"""Test case-sensitivity implemented for -I and -L options in #3272."""
351+
bad_name = tmp_path / "MIS.txt"
352+
bad_name.write_text(
353+
"1 MIS (Management Information System) 1\n"
354+
"2 Les Mis (1980 musical) 2\n"
355+
"3 mis 3\n"
356+
)
357+
assert cs.main(bad_name) == 3
358+
assert cs.main(bad_name, "-f") == 4
359+
fname = tmp_path / "ignore.txt"
360+
361+
fname.write_text("miS")
362+
assert cs.main("-I", fname, bad_name) == 3
363+
assert cs.main("-LmiS", bad_name) == 3
364+
assert cs.main("-I", fname, "-f", bad_name) == 4
365+
assert cs.main("-LmiS", "-f", bad_name) == 4
366+
fname.write_text("MIS")
367+
assert cs.main("-I", fname, bad_name) == 2
368+
assert cs.main("-LMIS", bad_name) == 2
369+
assert cs.main("-I", fname, "-f", bad_name) == 2
370+
assert cs.main("-LMIS", "-f", bad_name) == 2
371+
fname.write_text("MIS\nMis")
372+
assert cs.main("-I", fname, bad_name) == 1
373+
assert cs.main("-LMIS,Mis", bad_name) == 1
374+
assert cs.main("-I", fname, "-f", bad_name) == 1
375+
assert cs.main("-LMIS,Mis", "-f", bad_name) == 1
376+
fname.write_text("mis")
377+
assert cs.main("-I", fname, bad_name) == 0
378+
assert cs.main("-Lmis", bad_name) == 0
379+
assert cs.main("-I", fname, "-f", bad_name) == 0
380+
assert cs.main("-Lmis", "-f", bad_name) == 0
381+
382+
346383
def test_ignore_word_list(
347384
tmp_path: Path,
348385
capsys: pytest.CaptureFixture[str],

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ max-complexity = 45
160160

161161
[tool.ruff.lint.pylint]
162162
allow-magic-value-types = ["bytes", "int", "str",]
163-
max-args = 12
163+
max-args = 13
164164
max-branches = 49
165165
max-returns = 11
166-
max-statements = 111
166+
max-statements = 113

0 commit comments

Comments
 (0)