Skip to content

Commit 1a66e86

Browse files
Alexandre LandeauAlexandre Landeau
authored andcommitted
fixed unit tests
1 parent 1939ed4 commit 1a66e86

File tree

2 files changed

+6
-5
lines changed

2 files changed

+6
-5
lines changed

tests/python/unit/test_spacy_tokenizer.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,16 +36,17 @@ def test_tokenize_df_multilingual():
3636
"I hope nothing. I fear nothing. I am free.",
3737
" Les sanglots longs des violons d'automne",
3838
"子曰:“學而不思則罔,思而不學則殆。”",
39-
"期一会。 異体同心。 そうです。",
4039
],
41-
"language": ["en", "fr", "zh", "ja"],
40+
"language": ["en", "fr", "zh"],
4241
}
4342
)
4443
tokenizer = MultilingualTokenizer(stopwords_folder_path=stopwords_folder_path)
45-
output_df = tokenizer.tokenize_df(df=input_df, text_column="input_text", language_column="language")
44+
output_df = tokenizer.tokenize_df(
45+
df=input_df, text_column="input_text", language_column="language"
46+
)
4647
tokenized_documents = output_df[tokenizer.tokenized_column]
4748
tokenized_documents_length = [len(doc) for doc in tokenized_documents]
48-
assert tokenized_documents_length == [12, 8, 19, 9]
49+
assert tokenized_documents_length == [12, 8, 13]
4950

5051

5152
def test_tokenize_df_long_text():

tests/python/unit/test_wordcloud_visualizer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def test_tokenize_and_count_multilingual():
5454
assert frequencies == [
5555
("en", Counter({"hope": 1, "Nothing": 3, "fear": 1, "free": 1})),
5656
("fr", Counter({"sanglots": 1, "longs": 1, "violons": 1, "automne": 1})),
57-
('zh', Counter({'': 2, '': 2, '': 2, '': 1, '曰': 1, '罔': 1}))
57+
('zh', Counter({'不學則': 1, '不思則': 1, '': 1, '學而': 1, '思而': 1, '曰': 1, '罔': 1}))
5858
]
5959

6060

0 commit comments

Comments
 (0)