Skip to content

Commit 4c1346e

Browse files
committed
feat: support sentence split
1 parent eb13cc6 commit 4c1346e

File tree

4 files changed

+95
-22
lines changed

4 files changed

+95
-22
lines changed

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
},
6060
"dependencies": {
6161
"kuromojin": "^3.0.0",
62+
"sentence-splitter": "^3.2.1",
6263
"textlint-util-to-string": "^3.1.1"
6364
}
6465
}

src/textlint-rule-no-filler.ts

Lines changed: 26 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
import type { TxtParentNode, TxtTextNode } from "@textlint/ast-node-types";
1+
import type { TxtTextNode } from "@textlint/ast-node-types";
22
import type { TextlintRuleReporter } from "@textlint/types";
33
import { StringSource } from "textlint-util-to-string";
44
import { tokenize } from "kuromojin";
5+
import { splitAST, Syntax as SentenceSyntax, SentenceNode } from "sentence-splitter";
56

67
export type Options = {};
78
/**
@@ -17,7 +18,7 @@ const maskCodeNode = (codeNode: TxtTextNode) => {
1718
value: codeNode.value.replace(/./g, "X")
1819
};
1920
};
20-
const sourceWithoutStyle = (node: TxtParentNode) => {
21+
const sourceWithoutStyle = (node: SentenceNode) => {
2122
const nodeMaskedCode = {
2223
...node,
2324
children: node.children.map((childNode) => {
@@ -34,25 +35,31 @@ const report: TextlintRuleReporter<Options> = (context) => {
3435
const { Syntax, RuleError, report } = context;
3536
return {
3637
async [Syntax.Paragraph](node) {
37-
const source = sourceWithoutStyle(node);
38-
const tokens = await tokenize(source.toString());
39-
tokens.forEach((token) => {
40-
if (token.pos === "フィラー") {
41-
const index = token.word_position - 1;
42-
const originalIndex = source.originalIndexFromIndex(index);
43-
report(
44-
node,
45-
new RuleError(
46-
`フィラー(つなぎ表現)である「${token.surface_form}」を検知しました。
38+
const splitNode = splitAST(node);
39+
const sentences = splitNode.children.filter(
40+
(node) => node.type === SentenceSyntax.Sentence
41+
) as SentenceNode[];
42+
for (const sentence of sentences) {
43+
const source = sourceWithoutStyle(sentence);
44+
const tokens = await tokenize(source.toString());
45+
tokens.forEach((token) => {
46+
if (token.pos === "フィラー") {
47+
const index = token.word_position - 1;
48+
const originalIndex = source.originalIndexFromIndex(index);
49+
report(
50+
sentence,
51+
new RuleError(
52+
`フィラー(つなぎ表現)である「${token.surface_form}」を検知しました。
4753
4854
「えーと」「あの」「まあ」などのつなぎ表現は話し言葉(口語)であるため、文章を読みにくくします。`,
49-
{
50-
index: originalIndex
51-
}
52-
)
53-
);
54-
}
55-
});
55+
{
56+
index: originalIndex
57+
}
58+
)
59+
);
60+
}
61+
});
62+
}
5663
}
5764
};
5865
};

test/textlint-rule-no-filler.test.ts

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ const errorMessage = (word: string) => {
88
「えーと」「あの」「まあ」などのつなぎ表現は話し言葉(口語)であるため、文章を読みにくくします。`;
99
};
1010
tester.run("textlint-rule-no-filler", rule, {
11-
valid: ["これは問題ない文章です。"],
11+
valid: ["これは問題ない文章です。", "`code`と`code`"],
1212
invalid: [
1313
{
1414
text: "えーと、フィラーについてですね。",
@@ -54,6 +54,25 @@ tester.run("textlint-rule-no-filler", rule, {
5454
index: 0
5555
}
5656
]
57+
},
58+
// mutiline
59+
{
60+
text: `なんか、これは問題あるかも。
61+
サーバとサーバーの表記揺れがある。
62+
この雇入と雇入れの違いを見つける。
63+
なんか、これは問題あるかも。
64+
`,
65+
errors: [
66+
{
67+
message: errorMessage("なんか"),
68+
index: 0
69+
},
70+
{
71+
message: errorMessage("なんか"),
72+
line: 4,
73+
column: 1
74+
}
75+
]
5776
}
5877
]
5978
});

yarn.lock

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -948,7 +948,7 @@
948948
readdirp "^2.2.1"
949949
upath "^1.1.1"
950950

951-
"@textlint/ast-node-types@^4.2.4", "@textlint/ast-node-types@^4.4.3":
951+
"@textlint/ast-node-types@^4.2.4", "@textlint/ast-node-types@^4.4.2", "@textlint/ast-node-types@^4.4.3":
952952
version "4.4.3"
953953
resolved "https://registry.yarnpkg.com/@textlint/ast-node-types/-/ast-node-types-4.4.3.tgz#fdba16e8126cddc50f45433ce7f6c55e7829566c"
954954
integrity sha512-qi2jjgO6Tn3KNPGnm6B7p6QTEPvY95NFsIAaJuwbulur8iJUEenp1OnoUfiDaC/g2WPPEFkcfXpmnu8XEMFo2A==
@@ -1664,6 +1664,16 @@ concat-stream@^1.6.2:
16641664
readable-stream "^2.2.2"
16651665
typedarray "^0.0.6"
16661666

1667+
concat-stream@^2.0.0:
1668+
version "2.0.0"
1669+
resolved "https://registry.yarnpkg.com/concat-stream/-/concat-stream-2.0.0.tgz#414cf5af790a48c60ab9be4527d56d5e41133cb1"
1670+
integrity sha512-MWufYdFw53ccGjCA+Ol7XJYpAlW6/prSMzuPOTRnJGcGzuhLn4Scrz7qf6o8bROZ514ltazcIFJZevcfbo0x7A==
1671+
dependencies:
1672+
buffer-from "^1.0.0"
1673+
inherits "^2.0.3"
1674+
readable-stream "^3.0.2"
1675+
typedarray "^0.0.6"
1676+
16671677
confirmer@^1.1.2:
16681678
version "1.1.2"
16691679
resolved "https://registry.yarnpkg.com/confirmer/-/confirmer-1.1.2.tgz#df36b3eb5ca5992750de8eea9db24781bb4cc254"
@@ -3238,6 +3248,11 @@ object.pick@^1.3.0:
32383248
dependencies:
32393249
isobject "^3.0.1"
32403250

3251+
object_values@^0.1.2:
3252+
version "0.1.2"
3253+
resolved "https://registry.yarnpkg.com/object_values/-/object_values-0.1.2.tgz#f8fbc31d2e537170a4cbcfb28dd61501b3207334"
3254+
integrity sha512-tZgUiKLraVH+4OAedBYrr4/K6KmAQw2RPNd1AuNdhLsuz5WP3VB7WuiKBWbOcjeqqAjus2ChIIWC8dSfmg7ReA==
3255+
32413256
once@^1.3.0:
32423257
version "1.4.0"
32433258
resolved "https://registry.yarnpkg.com/once/-/once-1.4.0.tgz#583b1aa775961d4b113ac17d9c50baef9dd76bd1"
@@ -3575,6 +3590,15 @@ readable-stream@^2.0.2, readable-stream@^2.2.2:
35753590
string_decoder "~1.1.1"
35763591
util-deprecate "~1.0.1"
35773592

3593+
readable-stream@^3.0.2:
3594+
version "3.6.0"
3595+
resolved "https://registry.yarnpkg.com/readable-stream/-/readable-stream-3.6.0.tgz#337bbda3adc0706bd3e024426a286d4b4b2c9198"
3596+
integrity sha512-BViHy7LKeTz4oNnkcLJ+lVSL6vpiFeX6/d3oSH8zCW7UxP2onchk+vTGB143xuFjHS3deTgkKoXXymXqymiIdA==
3597+
dependencies:
3598+
inherits "^2.0.3"
3599+
string_decoder "^1.1.1"
3600+
util-deprecate "^1.0.1"
3601+
35783602
readdirp@^2.2.1:
35793603
version "2.2.1"
35803604
resolved "https://registry.yarnpkg.com/readdirp/-/readdirp-2.2.1.tgz#0e87622a3325aa33e892285caf8b4e846529a525"
@@ -3798,6 +3822,11 @@ safe-buffer@~5.1.0, safe-buffer@~5.1.1:
37983822
resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.1.2.tgz#991ec69d296e0313747d59bdfd2b745c35f8828d"
37993823
integrity sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==
38003824

3825+
safe-buffer@~5.2.0:
3826+
version "5.2.1"
3827+
resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.2.1.tgz#1eaf9fa9bdb1fdd4ec75f58f9cdb4e6b7827eec6"
3828+
integrity sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==
3829+
38013830
safe-regex@^1.1.0:
38023831
version "1.1.0"
38033832
resolved "https://registry.yarnpkg.com/safe-regex/-/safe-regex-1.1.0.tgz#40a3669f3b077d1e943d44629e157dd48023bf2e"
@@ -3825,6 +3854,16 @@ semver@^6.1.1, semver@^6.1.2, semver@^6.3.0:
38253854
resolved "https://registry.yarnpkg.com/semver/-/semver-6.3.0.tgz#ee0a64c8af5e8ceea67687b133761e1becbd1d3d"
38263855
integrity sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw==
38273856

3857+
sentence-splitter@^3.2.1:
3858+
version "3.2.1"
3859+
resolved "https://registry.yarnpkg.com/sentence-splitter/-/sentence-splitter-3.2.1.tgz#d6f5b66f4ab130f511d50e41e6f42070e2936ffb"
3860+
integrity sha512-aG+Tf8M1wVUd2uPSUtdMXdJlKZLcdh+oVE8iEn8KwfxYZ87qDpe7+o0nGZdr+96g2H76Qz/8TrG9dIxyp7c70w==
3861+
dependencies:
3862+
"@textlint/ast-node-types" "^4.4.2"
3863+
concat-stream "^2.0.0"
3864+
object_values "^0.1.2"
3865+
structured-source "^3.0.2"
3866+
38283867
set-blocking@^2.0.0:
38293868
version "2.0.0"
38303869
resolved "https://registry.yarnpkg.com/set-blocking/-/set-blocking-2.0.0.tgz#045f9782d011ae9a6803ddd382b24392b3d890f7"
@@ -4068,6 +4107,13 @@ string.prototype.trimstart@^1.0.4:
40684107
call-bind "^1.0.2"
40694108
define-properties "^1.1.3"
40704109

4110+
string_decoder@^1.1.1:
4111+
version "1.3.0"
4112+
resolved "https://registry.yarnpkg.com/string_decoder/-/string_decoder-1.3.0.tgz#42f114594a46cf1a8e30b0a84f56c78c3edac21e"
4113+
integrity sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==
4114+
dependencies:
4115+
safe-buffer "~5.2.0"
4116+
40714117
string_decoder@~1.1.1:
40724118
version "1.1.1"
40734119
resolved "https://registry.yarnpkg.com/string_decoder/-/string_decoder-1.1.1.tgz#9cf1611ba62685d7030ae9e4ba34149c3af03fc8"
@@ -4514,7 +4560,7 @@ use@^3.1.0:
45144560
resolved "https://registry.yarnpkg.com/use/-/use-3.1.1.tgz#d50c8cac79a19fbc20f2911f56eb973f4e10070f"
45154561
integrity sha512-cwESVXlO3url9YWlFW/TA9cshCEhtu7IKJ/p5soJ/gGpj7vbvFrAY/eIioQ6Dw23KjZhYgiIo8HOs1nQ2vr/oQ==
45164562

4517-
util-deprecate@~1.0.1:
4563+
util-deprecate@^1.0.1, util-deprecate@~1.0.1:
45184564
version "1.0.2"
45194565
resolved "https://registry.yarnpkg.com/util-deprecate/-/util-deprecate-1.0.2.tgz#450d4dc9fa70de732762fbd2d4a28981419a0ccf"
45204566
integrity sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8=

0 commit comments

Comments
 (0)