From f32fa0e45314cfecbbb0a9ac44103a97917e7b53 Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Tue, 7 Aug 2018 12:09:32 +0200 Subject: [PATCH 1/6] switch to fnv --- Cargo.toml | 21 +++++----- src/constants.rs | 105 ++++++++++++++++++++++++++++------------------- src/lib.rs | 5 +-- 3 files changed, 74 insertions(+), 57 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2a2a036..ccfce3d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,19 +1,18 @@ [package] -name = "tinysegmenter" -version = "0.1.1" -authors = [ "woxtu " ] +authors = ["woxtu "] description = "Compact Japanese tokenizer" -repository = "https://github.com/woxtu/rust-tinysegmenter" license = "MIT" - -[dependencies] -lazy_static = "1.0" -maplit = "1.0" +name = "tinysegmenter" +repository = "https://github.com/woxtu/rust-tinysegmenter" +version = "0.1.1" +[[bench]] +name = "benchmark" +path = "benchmark/benchmark.rs" [[test]] name = "test" path = "test/test.rs" -[[bench]] -name = "benchmark" -path = "benchmark/benchmark.rs" +[dependencies] +fnv = "1.0.6" +lazy_static = "1.0" diff --git a/src/constants.rs b/src/constants.rs index d40d615..31bfb77 100644 --- a/src/constants.rs +++ b/src/constants.rs @@ -1,3 +1,4 @@ +use fnv::FnvHashMap; const BIAS: i32 = -332; lazy_static! { @@ -9,53 +10,71 @@ lazy_static! { static ref E3: char = unsafe { char::from_u32_unchecked(0x110006) }; } -lazy_static! { - static ref BC1: HashMap<(char, char), i32> = hashmap! { ('H', 'H') => 6, ('I', 'I') => 2461, ('K', 'H') => 406, ('O', 'H') => -1378, }; - static ref BC2: HashMap<(char, char), i32> = hashmap! { ('A', 'A') => -3267, ('A', 'I') => 2744, ('A', 'N') => -878, ('H', 'H') => -4070, ('H', 'M') => -1711, ('H', 'N') => 4012, ('H', 'O') => 3761, ('I', 'A') => 1327, ('I', 'H') => -1184, ('I', 'I') => -1332, ('I', 'K') => 1721, ('I', 'O') => 5492, ('K', 'I') => 3831, ('K', 'K') => -8741, ('M', 'H') => -3132, ('M', 'K') => 3334, ('O', 'O') => -2920, }; - static ref BC3: HashMap<(char, char), i32> = hashmap! { ('H', 'H') => 996, ('H', 'I') => 626, ('H', 'K') => -721, ('H', 'N') => -1307, ('H', 'O') => -836, ('I', 'H') => -301, ('K', 'K') => 2762, ('M', 'K') => 1079, ('M', 'M') => 4034, ('O', 'A') => -1652, ('O', 'H') => 266, }; - static ref BP1: HashMap<(char, char), i32> = hashmap! { ('B', 'B') => 295, ('O', 'B') => 304, ('O', 'O') => -125, ('U', 'B') => 352, }; - static ref BP2: HashMap<(char, char), i32> = hashmap! { ('B', 'O') => 60, ('O', 'O') => -1762, }; - static ref BQ1: HashMap<(char, char, char), i32> = hashmap! { ('B', 'H', 'H') => 1150, ('B', 'H', 'M') => 1521, ('B', 'I', 'I') => -1158, ('B', 'I', 'M') => 886, ('B', 'M', 'H') => 1208, ('B', 'N', 'H') => 449, ('B', 'O', 'H') => -91, ('B', 'O', 'O') => -2597, ('O', 'H', 'I') => 451, ('O', 'I', 'H') => -296, ('O', 'K', 'A') => 1851, ('O', 'K', 'H') => -1020, ('O', 'K', 'K') => 904, ('O', 'O', 'O') => 2965, }; - static ref BQ2: HashMap<(char, char, char), i32> = hashmap! { ('B', 'H', 'H') => 118, ('B', 'H', 'I') => -1159, ('B', 'H', 'M') => 466, ('B', 'I', 'H') => -919, ('B', 'K', 'K') => -1720, ('B', 'K', 'O') => 864, ('O', 'H', 'H') => -1139, ('O', 'H', 'M') => -181, ('O', 'I', 'H') => 153, ('U', 'H', 'I') => -1146, }; - static ref BQ3: HashMap<(char, char, char), i32> = hashmap! { ('B', 'H', 'H') => -792, ('B', 'H', 'I') => 2664, ('B', 'I', 'I') => -299, ('B', 'K', 'I') => 419, ('B', 'M', 'H') => 937, ('B', 'M', 'M') => 8335, ('B', 'N', 'N') => 998, ('B', 'O', 'H') => 775, ('O', 'H', 'H') => 2174, ('O', 'H', 'M') => 439, ('O', 'I', 'I') => 280, ('O', 'K', 'H') => 1798, ('O', 'K', 'I') => -793, ('O', 'K', 'O') => -2242, ('O', 'M', 'H') => -2402, ('O', 'O', 'O') => 11699, }; - static ref BQ4: HashMap<(char, char, char), i32> = hashmap! { ('B', 'H', 'H') => -3895, ('B', 'I', 'H') => 3761, ('B', 'I', 'I') => -4654, ('B', 'I', 'K') => 1348, ('B', 'K', 'K') => -1806, ('B', 'M', 'I') => -3385, ('B', 'O', 'O') => -12396, ('O', 'A', 'H') => 926, ('O', 'H', 'H') => 266, ('O', 'H', 'K') => -2036, ('O', 'N', 'N') => -973, }; - static ref BW1: HashMap<(char, char), i32> = hashmap! { (',', 'と') => 660, (',', '同') => 727, (*B1, 'あ') => 1404, (*B1, '同') => 542, ('、', 'と') => 660, ('、', '同') => 727, ('」', 'と') => 1682, ('あ', 'っ') => 1505, ('い', 'う') => 1743, ('い', 'っ') => -2055, ('い', 'る') => 672, ('う', 'し') => -4817, ('う', 'ん') => 665, ('か', 'ら') => 3472, ('が', 'ら') => 600, ('こ', 'う') => -790, ('こ', 'と') => 2083, ('こ', 'ん') => -1262, ('さ', 'ら') => -4143, ('さ', 'ん') => 4573, ('し', 'た') => 2641, ('し', 'て') => 1104, ('す', 'で') => -3399, ('そ', 'こ') => 1977, ('そ', 'れ') => -871, ('た', 'ち') => 1122, ('た', 'め') => 601, ('っ', 'た') => 3463, ('つ', 'い') => -802, ('て', 'い') => 805, ('て', 'き') => 1249, ('で', 'き') => 1127, ('で', 'す') => 3445, ('で', 'は') => 844, ('と', 'い') => -4915, ('と', 'み') => 1922, ('ど', 'こ') => 3887, ('な', 'い') => 5713, ('な', 'っ') => 3015, ('な', 'ど') => 7379, ('な', 'ん') => -1113, ('に', 'し') => 2468, ('に', 'は') => 1498, ('に', 'も') => 1671, ('に', '対') => -912, ('の', '一') => -501, ('の', '中') => 741, ('ま', 'せ') => 2448, ('ま', 'で') => 1711, ('ま', 'ま') => 2600, ('ま', 'る') => -2155, ('や', 'む') => -1947, ('よ', 'っ') => -2565, ('れ', 'た') => 2369, ('れ', 'で') => -913, ('を', 'し') => 1860, ('を', '見') => 731, ('亡', 'く') => -1886, ('京', '都') => 2558, ('取', 'り') => -2784, ('大', 'き') => -2604, ('大', '阪') => 1497, ('平', '方') => -2314, ('引', 'き') => -1336, ('日', '本') => -195, ('本', '当') => -2423, ('毎', '日') => -2113, ('目', '指') => -724, ('」', 'と') => 1682, }; - static ref BW2: HashMap<(char, char), i32> = hashmap! { ('.', '.') => -11822, ('1', '1') => -669, ('―', '―') => -5730, ('−', '−') => -13175, ('い', 'う') => -1609, ('う', 'か') => 2490, ('か', 'し') => -1350, ('か', 'も') => -602, ('か', 'ら') => -7194, ('か', 'れ') => 4612, ('が', 'い') => 853, ('が', 'ら') => -3198, ('き', 'た') => 1941, ('く', 'な') => -1597, ('こ', 'と') => -8392, ('こ', 'の') => -4193, ('さ', 'せ') => 4533, ('さ', 'れ') => 13168, ('さ', 'ん') => -3977, ('し', 'い') => -1819, ('し', 'か') => -545, ('し', 'た') => 5078, ('し', 'て') => 972, ('し', 'な') => 939, ('そ', 'の') => -3744, ('た', 'い') => -1253, ('た', 'た') => -662, ('た', 'だ') => -3857, ('た', 'ち') => -786, ('た', 'と') => 1224, ('た', 'は') => -939, ('っ', 'た') => 4589, ('っ', 'て') => 1647, ('っ', 'と') => -2094, ('て', 'い') => 6144, ('て', 'き') => 3640, ('て', 'く') => 2551, ('て', 'は') => -3110, ('て', 'も') => -3065, ('で', 'い') => 2666, ('で', 'き') => -1528, ('で', 'し') => -3828, ('で', 'す') => -4761, ('で', 'も') => -4203, ('と', 'い') => 1890, ('と', 'こ') => -1746, ('と', 'と') => -2279, ('と', 'の') => 720, ('と', 'み') => 5168, ('と', 'も') => -3941, ('な', 'い') => -2488, ('な', 'が') => -1313, ('な', 'ど') => -6509, ('な', 'の') => 2614, ('な', 'ん') => 3099, ('に', 'お') => -1615, ('に', 'し') => 2748, ('に', 'な') => 2454, ('に', 'よ') => -7236, ('に', '対') => -14943, ('に', '従') => -4688, ('に', '関') => -11388, ('の', 'か') => 2093, ('の', 'で') => -7059, ('の', 'に') => -6041, ('の', 'の') => -6125, ('は', 'い') => 1073, ('は', 'が') => -1033, ('は', 'ず') => -2532, ('ば', 'れ') => 1813, ('ま', 'し') => -1316, ('ま', 'で') => -6621, ('ま', 'れ') => 5409, ('め', 'て') => -3153, ('も', 'い') => 2230, ('も', 'の') => -10713, ('ら', 'か') => -944, ('ら', 'し') => -1611, ('ら', 'に') => -1897, ('り', 'し') => 651, ('り', 'ま') => 1620, ('れ', 'た') => 4270, ('れ', 'て') => 849, ('れ', 'ば') => 4114, ('ろ', 'う') => 6067, ('わ', 'れ') => 7901, ('を', '通') => -11877, ('ん', 'だ') => 728, ('ん', 'な') => -4115, ('一', '人') => 602, ('一', '方') => -1375, ('一', '日') => 970, ('一', '部') => -1051, ('上', 'が') => -4479, ('会', '社') => -1116, ('出', 'て') => 2163, ('分', 'の') => -7758, ('同', '党') => 970, ('同', '日') => -913, ('大', '阪') => -2471, ('委', '員') => -1250, ('少', 'な') => -1050, ('年', '度') => -8669, ('年', '間') => -1626, ('府', '県') => -2363, ('手', '権') => -1982, ('新', '聞') => -4066, ('日', '新') => -722, ('日', '本') => -7068, ('日', '米') => 3372, ('曜', '日') => -601, ('朝', '鮮') => -2355, ('本', '人') => -2697, ('東', '京') => -1543, ('然', 'と') => -1384, ('社', '会') => -1276, ('立', 'て') => -990, ('第', 'に') => -1612, ('米', '国') => -4268, ('1', '1') => -669, ('ク', '゙') => 1319,}; - static ref BW3: HashMap<(char, char), i32> = hashmap! { ('あ', 'た') => -2194, ('あ', 'り') => 719, ('あ', 'る') => 3846, ('い', '.') => -1185, ('い', '。') => -1185, ('い', 'い') => 5308, ('い', 'え') => 2079, ('い', 'く') => 3029, ('い', 'た') => 2056, ('い', 'っ') => 1883, ('い', 'る') => 5600, ('い', 'わ') => 1527, ('う', 'ち') => 1117, ('う', 'と') => 4798, ('え', 'と') => 1454, ('か', '.') => 2857, ('か', '。') => 2857, ('か', 'け') => -743, ('か', 'っ') => -4098, ('か', 'に') => -669, ('か', 'ら') => 6520, ('か', 'り') => -2670, ('が', ',') => 1816, ('が', '、') => 1816, ('が', 'き') => -4855, ('が', 'け') => -1127, ('が', 'っ') => -913, ('が', 'ら') => -4977, ('が', 'り') => -2064, ('き', 'た') => 1645, ('け', 'ど') => 1374, ('こ', 'と') => 7397, ('こ', 'の') => 1542, ('こ', 'ろ') => -2757, ('さ', 'い') => -714, ('さ', 'を') => 976, ('し', ',') => 1557, ('し', '、') => 1557, ('し', 'い') => -3714, ('し', 'た') => 3562, ('し', 'て') => 1449, ('し', 'な') => 2608, ('し', 'ま') => 1200, ('す', '.') => -1310, ('す', '。') => -1310, ('す', 'る') => 6521, ('ず', ',') => 3426, ('ず', '、') => 3426, ('ず', 'に') => 841, ('そ', 'う') => 428, ('た', '.') => 8875, ('た', '。') => 8875, ('た', 'い') => -594, ('た', 'の') => 812, ('た', 'り') => -1183, ('た', 'る') => -853, ('だ', '.') => 4098, ('だ', '。') => 4098, ('だ', 'っ') => 1004, ('っ', 'た') => -4748, ('っ', 'て') => 300, ('て', 'い') => 6240, ('て', 'お') => 855, ('て', 'も') => 302, ('で', 'す') => 1437, ('で', 'に') => -1482, ('で', 'は') => 2295, ('と', 'う') => -1387, ('と', 'し') => 2266, ('と', 'の') => 541, ('と', 'も') => -3543, ('ど', 'う') => 4664, ('な', 'い') => 1796, ('な', 'く') => -903, ('な', 'ど') => 2135, ('に', ',') => -1021, ('に', '、') => -1021, ('に', 'し') => 1771, ('に', 'な') => 1906, ('に', 'は') => 2644, ('の', ',') => -724, ('の', '、') => -724, ('の', '子') => -1000, ('は', ',') => 1337, ('は', '、') => 1337, ('べ', 'き') => 2181, ('ま', 'し') => 1113, ('ま', 'す') => 6943, ('ま', 'っ') => -1549, ('ま', 'で') => 6154, ('ま', 'れ') => -793, ('ら', 'し') => 1479, ('ら', 'れ') => 6820, ('る', 'る') => 3818, ('れ', ',') => 854, ('れ', '、') => 854, ('れ', 'た') => 1850, ('れ', 'て') => 1375, ('れ', 'ば') => -3246, ('れ', 'る') => 1091, ('わ', 'れ') => -605, ('ん', 'だ') => 606, ('ん', 'で') => 798, ('カ', '月') => 990, ('会', '議') => 860, ('入', 'り') => 1232, ('大', '会') => 2217, ('始', 'め') => 1681, ('市', ' ') => 965, ('新', '聞') => -5055, ('日', ',') => 974, ('日', '、') => 974, ('社', '会') => 2024, ('カ', '月') => 990, }; +macro_rules! hashmap { + (@single $($x:tt)*) => (()); + (@count $($rest:expr),*) => (<[()]>::len(&[$(hashmap!(@single $rest)),*])); + + ($($key:expr => $value:expr,)+) => { hashmap!($($key => $value),+) }; + ($($key:expr => $value:expr),*) => { + { + let _cap = hashmap!(@count $($key),*); + let mut _map = ::fnv::FnvHashMap::with_capacity_and_hasher(_cap, Default::default()); + $( + let _ = _map.insert($key, $value); + )* + _map + } + }; +} + + +lazy_static! { + static ref BC1: FnvHashMap<(char, char), i32> = hashmap! { ('H', 'H') => 6, ('I', 'I') => 2461, ('K', 'H') => 406, ('O', 'H') => -1378, }; + static ref BC2: FnvHashMap<(char, char), i32> = hashmap! { ('A', 'A') => -3267, ('A', 'I') => 2744, ('A', 'N') => -878, ('H', 'H') => -4070, ('H', 'M') => -1711, ('H', 'N') => 4012, ('H', 'O') => 3761, ('I', 'A') => 1327, ('I', 'H') => -1184, ('I', 'I') => -1332, ('I', 'K') => 1721, ('I', 'O') => 5492, ('K', 'I') => 3831, ('K', 'K') => -8741, ('M', 'H') => -3132, ('M', 'K') => 3334, ('O', 'O') => -2920, }; + static ref BC3: FnvHashMap<(char, char), i32> = hashmap! { ('H', 'H') => 996, ('H', 'I') => 626, ('H', 'K') => -721, ('H', 'N') => -1307, ('H', 'O') => -836, ('I', 'H') => -301, ('K', 'K') => 2762, ('M', 'K') => 1079, ('M', 'M') => 4034, ('O', 'A') => -1652, ('O', 'H') => 266, }; + static ref BP1: FnvHashMap<(char, char), i32> = hashmap! { ('B', 'B') => 295, ('O', 'B') => 304, ('O', 'O') => -125, ('U', 'B') => 352, }; + static ref BP2: FnvHashMap<(char, char), i32> = hashmap! { ('B', 'O') => 60, ('O', 'O') => -1762, }; + static ref BQ1: FnvHashMap<(char, char, char), i32> = hashmap! { ('B', 'H', 'H') => 1150, ('B', 'H', 'M') => 1521, ('B', 'I', 'I') => -1158, ('B', 'I', 'M') => 886, ('B', 'M', 'H') => 1208, ('B', 'N', 'H') => 449, ('B', 'O', 'H') => -91, ('B', 'O', 'O') => -2597, ('O', 'H', 'I') => 451, ('O', 'I', 'H') => -296, ('O', 'K', 'A') => 1851, ('O', 'K', 'H') => -1020, ('O', 'K', 'K') => 904, ('O', 'O', 'O') => 2965, }; + static ref BQ2: FnvHashMap<(char, char, char), i32> = hashmap! { ('B', 'H', 'H') => 118, ('B', 'H', 'I') => -1159, ('B', 'H', 'M') => 466, ('B', 'I', 'H') => -919, ('B', 'K', 'K') => -1720, ('B', 'K', 'O') => 864, ('O', 'H', 'H') => -1139, ('O', 'H', 'M') => -181, ('O', 'I', 'H') => 153, ('U', 'H', 'I') => -1146, }; + static ref BQ3: FnvHashMap<(char, char, char), i32> = hashmap! { ('B', 'H', 'H') => -792, ('B', 'H', 'I') => 2664, ('B', 'I', 'I') => -299, ('B', 'K', 'I') => 419, ('B', 'M', 'H') => 937, ('B', 'M', 'M') => 8335, ('B', 'N', 'N') => 998, ('B', 'O', 'H') => 775, ('O', 'H', 'H') => 2174, ('O', 'H', 'M') => 439, ('O', 'I', 'I') => 280, ('O', 'K', 'H') => 1798, ('O', 'K', 'I') => -793, ('O', 'K', 'O') => -2242, ('O', 'M', 'H') => -2402, ('O', 'O', 'O') => 11699, }; + static ref BQ4: FnvHashMap<(char, char, char), i32> = hashmap! { ('B', 'H', 'H') => -3895, ('B', 'I', 'H') => 3761, ('B', 'I', 'I') => -4654, ('B', 'I', 'K') => 1348, ('B', 'K', 'K') => -1806, ('B', 'M', 'I') => -3385, ('B', 'O', 'O') => -12396, ('O', 'A', 'H') => 926, ('O', 'H', 'H') => 266, ('O', 'H', 'K') => -2036, ('O', 'N', 'N') => -973, }; + static ref BW1: FnvHashMap<(char, char), i32> = hashmap! { (',', 'と') => 660, (',', '同') => 727, (*B1, 'あ') => 1404, (*B1, '同') => 542, ('、', 'と') => 660, ('、', '同') => 727, ('」', 'と') => 1682, ('あ', 'っ') => 1505, ('い', 'う') => 1743, ('い', 'っ') => -2055, ('い', 'る') => 672, ('う', 'し') => -4817, ('う', 'ん') => 665, ('か', 'ら') => 3472, ('が', 'ら') => 600, ('こ', 'う') => -790, ('こ', 'と') => 2083, ('こ', 'ん') => -1262, ('さ', 'ら') => -4143, ('さ', 'ん') => 4573, ('し', 'た') => 2641, ('し', 'て') => 1104, ('す', 'で') => -3399, ('そ', 'こ') => 1977, ('そ', 'れ') => -871, ('た', 'ち') => 1122, ('た', 'め') => 601, ('っ', 'た') => 3463, ('つ', 'い') => -802, ('て', 'い') => 805, ('て', 'き') => 1249, ('で', 'き') => 1127, ('で', 'す') => 3445, ('で', 'は') => 844, ('と', 'い') => -4915, ('と', 'み') => 1922, ('ど', 'こ') => 3887, ('な', 'い') => 5713, ('な', 'っ') => 3015, ('な', 'ど') => 7379, ('な', 'ん') => -1113, ('に', 'し') => 2468, ('に', 'は') => 1498, ('に', 'も') => 1671, ('に', '対') => -912, ('の', '一') => -501, ('の', '中') => 741, ('ま', 'せ') => 2448, ('ま', 'で') => 1711, ('ま', 'ま') => 2600, ('ま', 'る') => -2155, ('や', 'む') => -1947, ('よ', 'っ') => -2565, ('れ', 'た') => 2369, ('れ', 'で') => -913, ('を', 'し') => 1860, ('を', '見') => 731, ('亡', 'く') => -1886, ('京', '都') => 2558, ('取', 'り') => -2784, ('大', 'き') => -2604, ('大', '阪') => 1497, ('平', '方') => -2314, ('引', 'き') => -1336, ('日', '本') => -195, ('本', '当') => -2423, ('毎', '日') => -2113, ('目', '指') => -724, ('」', 'と') => 1682, }; + static ref BW2: FnvHashMap<(char, char), i32> = hashmap! { ('.', '.') => -11822, ('1', '1') => -669, ('―', '―') => -5730, ('−', '−') => -13175, ('い', 'う') => -1609, ('う', 'か') => 2490, ('か', 'し') => -1350, ('か', 'も') => -602, ('か', 'ら') => -7194, ('か', 'れ') => 4612, ('が', 'い') => 853, ('が', 'ら') => -3198, ('き', 'た') => 1941, ('く', 'な') => -1597, ('こ', 'と') => -8392, ('こ', 'の') => -4193, ('さ', 'せ') => 4533, ('さ', 'れ') => 13168, ('さ', 'ん') => -3977, ('し', 'い') => -1819, ('し', 'か') => -545, ('し', 'た') => 5078, ('し', 'て') => 972, ('し', 'な') => 939, ('そ', 'の') => -3744, ('た', 'い') => -1253, ('た', 'た') => -662, ('た', 'だ') => -3857, ('た', 'ち') => -786, ('た', 'と') => 1224, ('た', 'は') => -939, ('っ', 'た') => 4589, ('っ', 'て') => 1647, ('っ', 'と') => -2094, ('て', 'い') => 6144, ('て', 'き') => 3640, ('て', 'く') => 2551, ('て', 'は') => -3110, ('て', 'も') => -3065, ('で', 'い') => 2666, ('で', 'き') => -1528, ('で', 'し') => -3828, ('で', 'す') => -4761, ('で', 'も') => -4203, ('と', 'い') => 1890, ('と', 'こ') => -1746, ('と', 'と') => -2279, ('と', 'の') => 720, ('と', 'み') => 5168, ('と', 'も') => -3941, ('な', 'い') => -2488, ('な', 'が') => -1313, ('な', 'ど') => -6509, ('な', 'の') => 2614, ('な', 'ん') => 3099, ('に', 'お') => -1615, ('に', 'し') => 2748, ('に', 'な') => 2454, ('に', 'よ') => -7236, ('に', '対') => -14943, ('に', '従') => -4688, ('に', '関') => -11388, ('の', 'か') => 2093, ('の', 'で') => -7059, ('の', 'に') => -6041, ('の', 'の') => -6125, ('は', 'い') => 1073, ('は', 'が') => -1033, ('は', 'ず') => -2532, ('ば', 'れ') => 1813, ('ま', 'し') => -1316, ('ま', 'で') => -6621, ('ま', 'れ') => 5409, ('め', 'て') => -3153, ('も', 'い') => 2230, ('も', 'の') => -10713, ('ら', 'か') => -944, ('ら', 'し') => -1611, ('ら', 'に') => -1897, ('り', 'し') => 651, ('り', 'ま') => 1620, ('れ', 'た') => 4270, ('れ', 'て') => 849, ('れ', 'ば') => 4114, ('ろ', 'う') => 6067, ('わ', 'れ') => 7901, ('を', '通') => -11877, ('ん', 'だ') => 728, ('ん', 'な') => -4115, ('一', '人') => 602, ('一', '方') => -1375, ('一', '日') => 970, ('一', '部') => -1051, ('上', 'が') => -4479, ('会', '社') => -1116, ('出', 'て') => 2163, ('分', 'の') => -7758, ('同', '党') => 970, ('同', '日') => -913, ('大', '阪') => -2471, ('委', '員') => -1250, ('少', 'な') => -1050, ('年', '度') => -8669, ('年', '間') => -1626, ('府', '県') => -2363, ('手', '権') => -1982, ('新', '聞') => -4066, ('日', '新') => -722, ('日', '本') => -7068, ('日', '米') => 3372, ('曜', '日') => -601, ('朝', '鮮') => -2355, ('本', '人') => -2697, ('東', '京') => -1543, ('然', 'と') => -1384, ('社', '会') => -1276, ('立', 'て') => -990, ('第', 'に') => -1612, ('米', '国') => -4268, ('1', '1') => -669, ('ク', '゙') => 1319,}; + static ref BW3: FnvHashMap<(char, char), i32> = hashmap! { ('あ', 'た') => -2194, ('あ', 'り') => 719, ('あ', 'る') => 3846, ('い', '.') => -1185, ('い', '。') => -1185, ('い', 'い') => 5308, ('い', 'え') => 2079, ('い', 'く') => 3029, ('い', 'た') => 2056, ('い', 'っ') => 1883, ('い', 'る') => 5600, ('い', 'わ') => 1527, ('う', 'ち') => 1117, ('う', 'と') => 4798, ('え', 'と') => 1454, ('か', '.') => 2857, ('か', '。') => 2857, ('か', 'け') => -743, ('か', 'っ') => -4098, ('か', 'に') => -669, ('か', 'ら') => 6520, ('か', 'り') => -2670, ('が', ',') => 1816, ('が', '、') => 1816, ('が', 'き') => -4855, ('が', 'け') => -1127, ('が', 'っ') => -913, ('が', 'ら') => -4977, ('が', 'り') => -2064, ('き', 'た') => 1645, ('け', 'ど') => 1374, ('こ', 'と') => 7397, ('こ', 'の') => 1542, ('こ', 'ろ') => -2757, ('さ', 'い') => -714, ('さ', 'を') => 976, ('し', ',') => 1557, ('し', '、') => 1557, ('し', 'い') => -3714, ('し', 'た') => 3562, ('し', 'て') => 1449, ('し', 'な') => 2608, ('し', 'ま') => 1200, ('す', '.') => -1310, ('す', '。') => -1310, ('す', 'る') => 6521, ('ず', ',') => 3426, ('ず', '、') => 3426, ('ず', 'に') => 841, ('そ', 'う') => 428, ('た', '.') => 8875, ('た', '。') => 8875, ('た', 'い') => -594, ('た', 'の') => 812, ('た', 'り') => -1183, ('た', 'る') => -853, ('だ', '.') => 4098, ('だ', '。') => 4098, ('だ', 'っ') => 1004, ('っ', 'た') => -4748, ('っ', 'て') => 300, ('て', 'い') => 6240, ('て', 'お') => 855, ('て', 'も') => 302, ('で', 'す') => 1437, ('で', 'に') => -1482, ('で', 'は') => 2295, ('と', 'う') => -1387, ('と', 'し') => 2266, ('と', 'の') => 541, ('と', 'も') => -3543, ('ど', 'う') => 4664, ('な', 'い') => 1796, ('な', 'く') => -903, ('な', 'ど') => 2135, ('に', ',') => -1021, ('に', '、') => -1021, ('に', 'し') => 1771, ('に', 'な') => 1906, ('に', 'は') => 2644, ('の', ',') => -724, ('の', '、') => -724, ('の', '子') => -1000, ('は', ',') => 1337, ('は', '、') => 1337, ('べ', 'き') => 2181, ('ま', 'し') => 1113, ('ま', 'す') => 6943, ('ま', 'っ') => -1549, ('ま', 'で') => 6154, ('ま', 'れ') => -793, ('ら', 'し') => 1479, ('ら', 'れ') => 6820, ('る', 'る') => 3818, ('れ', ',') => 854, ('れ', '、') => 854, ('れ', 'た') => 1850, ('れ', 'て') => 1375, ('れ', 'ば') => -3246, ('れ', 'る') => 1091, ('わ', 'れ') => -605, ('ん', 'だ') => 606, ('ん', 'で') => 798, ('カ', '月') => 990, ('会', '議') => 860, ('入', 'り') => 1232, ('大', '会') => 2217, ('始', 'め') => 1681, ('市', ' ') => 965, ('新', '聞') => -5055, ('日', ',') => 974, ('日', '、') => 974, ('社', '会') => 2024, ('カ', '月') => 990, }; } lazy_static! { - static ref TC1: HashMap<(char, char, char), i32> = hashmap! { ('A', 'A', 'A') => 1093, ('H', 'H', 'H') => 1029, ('H', 'H', 'M') => 580, ('H', 'I', 'I') => 998, ('H', 'O', 'H') => -390, ('H', 'O', 'M') => -331, ('I', 'H', 'I') => 1169, ('I', 'O', 'H') => -142, ('I', 'O', 'I') => -1015, ('I', 'O', 'M') => 467, ('M', 'M', 'H') => 187, ('O', 'O', 'I') => -1832, }; - static ref TC2: HashMap<(char, char, char), i32> = hashmap! { ('H', 'H', 'O') => 2088, ('H', 'I', 'I') => -1023, ('H', 'M', 'M') => -1154, ('I', 'H', 'I') => -1965, ('K', 'K', 'H') => 703, ('O', 'I', 'I') => -2649, }; - static ref TC3: HashMap<(char, char, char), i32> = hashmap! { ('A', 'A', 'A') => -294, ('H', 'H', 'H') => 346, ('H', 'H', 'I') => -341, ('H', 'I', 'I') => -1088, ('H', 'I', 'K') => 731, ('H', 'O', 'H') => -1486, ('I', 'H', 'H') => 128, ('I', 'H', 'I') => -3041, ('I', 'H', 'O') => -1935, ('I', 'I', 'H') => -825, ('I', 'I', 'M') => -1035, ('I', 'O', 'I') => -542, ('K', 'H', 'H') => -1216, ('K', 'K', 'A') => 491, ('K', 'K', 'H') => -1217, ('K', 'O', 'K') => -1009, ('M', 'H', 'H') => -2694, ('M', 'H', 'M') => -457, ('M', 'H', 'O') => 123, ('M', 'M', 'H') => -471, ('N', 'N', 'H') => -1689, ('N', 'N', 'O') => 662, ('O', 'H', 'O') => -3393, }; - static ref TC4: HashMap<(char, char, char), i32> = hashmap! { ('H', 'H', 'H') => -203, ('H', 'H', 'I') => 1344, ('H', 'H', 'K') => 365, ('H', 'H', 'M') => -122, ('H', 'H', 'N') => 182, ('H', 'H', 'O') => 669, ('H', 'I', 'H') => 804, ('H', 'I', 'I') => 679, ('H', 'O', 'H') => 446, ('I', 'H', 'H') => 695, ('I', 'H', 'O') => -2324, ('I', 'I', 'H') => 321, ('I', 'I', 'I') => 1497, ('I', 'I', 'O') => 656, ('I', 'O', 'O') => 54, ('K', 'A', 'K') => 4845, ('K', 'K', 'A') => 3386, ('K', 'K', 'K') => 3065, ('M', 'H', 'H') => -405, ('M', 'H', 'I') => 201, ('M', 'M', 'H') => -241, ('M', 'M', 'M') => 661, ('M', 'O', 'M') => 841, }; - static ref TQ1: HashMap<(char, char, char, char), i32> = hashmap! { ('B', 'H', 'H', 'H') => -227, ('B', 'H', 'H', 'I') => 316, ('B', 'H', 'I', 'H') => -132, ('B', 'I', 'H', 'H') => 60, ('B', 'I', 'I', 'I') => 1595, ('B', 'N', 'H', 'H') => -744, ('B', 'O', 'H', 'H') => 225, ('B', 'O', 'O', 'O') => -908, ('O', 'A', 'K', 'K') => 482, ('O', 'H', 'H', 'H') => 281, ('O', 'H', 'I', 'H') => 249, ('O', 'I', 'H', 'I') => 200, ('O', 'I', 'I', 'H') => -68, }; - static ref TQ2: HashMap<(char, char, char, char), i32> = hashmap! { ('B', 'I', 'H', 'H') => -1401, ('B', 'I', 'I', 'I') => -1033, ('B', 'K', 'A', 'K') => -543, ('B', 'O', 'O', 'O') => -5591, }; - static ref TQ3: HashMap<(char, char, char, char), i32> = hashmap! { ('B', 'H', 'H', 'H') => 478, ('B', 'H', 'H', 'M') => -1073, ('B', 'H', 'I', 'H') => 222, ('B', 'H', 'I', 'I') => -504, ('B', 'I', 'I', 'H') => -116, ('B', 'I', 'I', 'I') => -105, ('B', 'M', 'H', 'I') => -863, ('B', 'M', 'H', 'M') => -464, ('B', 'O', 'M', 'H') => 620, ('O', 'H', 'H', 'H') => 346, ('O', 'H', 'H', 'I') => 1729, ('O', 'H', 'I', 'I') => 997, ('O', 'H', 'M', 'H') => 481, ('O', 'I', 'H', 'H') => 623, ('O', 'I', 'I', 'H') => 1344, ('O', 'K', 'A', 'K') => 2792, ('O', 'K', 'H', 'H') => 587, ('O', 'K', 'K', 'A') => 679, ('O', 'O', 'H', 'H') => 110, ('O', 'O', 'I', 'I') => -685, }; - static ref TQ4: HashMap<(char, char, char, char), i32> = hashmap! { ('B', 'H', 'H', 'H') => -721, ('B', 'H', 'H', 'M') => -3604, ('B', 'H', 'I', 'I') => -966, ('B', 'I', 'I', 'H') => -607, ('B', 'I', 'I', 'I') => -2181, ('O', 'A', 'A', 'A') => -2763, ('O', 'A', 'K', 'K') => 180, ('O', 'H', 'H', 'H') => -294, ('O', 'H', 'H', 'I') => 2446, ('O', 'H', 'H', 'O') => 480, ('O', 'H', 'I', 'H') => -1573, ('O', 'I', 'H', 'H') => 1935, ('O', 'I', 'H', 'I') => -493, ('O', 'I', 'I', 'H') => 626, ('O', 'I', 'I', 'I') => -4007, ('O', 'K', 'A', 'K') => -8156, }; - static ref TW1: HashMap<(char, char, char), i32> = hashmap! { ('に', 'つ', 'い') => -4681, ('東', '京', '都') => 2026, }; - static ref TW2: HashMap<(char, char, char), i32> = hashmap! { ('あ', 'る', '程') => -2049, ('い', 'っ', 'た') => -1256, ('こ', 'ろ', 'が') => -2434, ('し', 'ょ', 'う') => 3873, ('そ', 'の', '後') => -4430, ('だ', 'っ', 'て') => -1049, ('て', 'い', 'た') => 1833, ('と', 'し', 'て') => -4657, ('と', 'も', 'に') => -4517, ('も', 'の', 'で') => 1882, ('一', '気', 'に') => -792, ('初', 'め', 'て') => -1512, ('同', '時', 'に') => -8097, ('大', 'き', 'な') => -1255, ('対', 'し', 'て') => -2721, ('社', '会', '党') => -3216, }; - static ref TW3: HashMap<(char, char, char), i32> = hashmap! { ('い', 'た', 'だ') => -1734, ('し', 'て', 'い') => 1314, ('と', 'し', 'て') => -4314, ('に', 'つ', 'い') => -5483, ('に', 'と', 'っ') => -5989, ('に', '当', 'た') => -6247, ('の', 'で', ',') => -727, ('の', 'で', '、') => -727, ('の', 'も', 'の') => -600, ('れ', 'か', 'ら') => -3752, ('十', '二', '月') => -2287, }; - static ref TW4: HashMap<(char, char, char), i32> = hashmap! { ('い', 'う', '.') => 8576, ('い', 'う', '。') => 8576, ('か', 'ら', 'な') => -2348, ('し', 'て', 'い') => 2958, ('た', 'が', ',') => 1516, ('た', 'が', '、') => 1516, ('て', 'い', 'る') => 1538, ('と', 'い', 'う') => 1349, ('ま', 'し', 'た') => 5543, ('ま', 'せ', 'ん') => 1097, ('よ', 'う', 'と') => -4258, ('よ', 'る', 'と') => 5865, }; + static ref TC1: FnvHashMap<(char, char, char), i32> = hashmap! { ('A', 'A', 'A') => 1093, ('H', 'H', 'H') => 1029, ('H', 'H', 'M') => 580, ('H', 'I', 'I') => 998, ('H', 'O', 'H') => -390, ('H', 'O', 'M') => -331, ('I', 'H', 'I') => 1169, ('I', 'O', 'H') => -142, ('I', 'O', 'I') => -1015, ('I', 'O', 'M') => 467, ('M', 'M', 'H') => 187, ('O', 'O', 'I') => -1832, }; + static ref TC2: FnvHashMap<(char, char, char), i32> = hashmap! { ('H', 'H', 'O') => 2088, ('H', 'I', 'I') => -1023, ('H', 'M', 'M') => -1154, ('I', 'H', 'I') => -1965, ('K', 'K', 'H') => 703, ('O', 'I', 'I') => -2649, }; + static ref TC3: FnvHashMap<(char, char, char), i32> = hashmap! { ('A', 'A', 'A') => -294, ('H', 'H', 'H') => 346, ('H', 'H', 'I') => -341, ('H', 'I', 'I') => -1088, ('H', 'I', 'K') => 731, ('H', 'O', 'H') => -1486, ('I', 'H', 'H') => 128, ('I', 'H', 'I') => -3041, ('I', 'H', 'O') => -1935, ('I', 'I', 'H') => -825, ('I', 'I', 'M') => -1035, ('I', 'O', 'I') => -542, ('K', 'H', 'H') => -1216, ('K', 'K', 'A') => 491, ('K', 'K', 'H') => -1217, ('K', 'O', 'K') => -1009, ('M', 'H', 'H') => -2694, ('M', 'H', 'M') => -457, ('M', 'H', 'O') => 123, ('M', 'M', 'H') => -471, ('N', 'N', 'H') => -1689, ('N', 'N', 'O') => 662, ('O', 'H', 'O') => -3393, }; + static ref TC4: FnvHashMap<(char, char, char), i32> = hashmap! { ('H', 'H', 'H') => -203, ('H', 'H', 'I') => 1344, ('H', 'H', 'K') => 365, ('H', 'H', 'M') => -122, ('H', 'H', 'N') => 182, ('H', 'H', 'O') => 669, ('H', 'I', 'H') => 804, ('H', 'I', 'I') => 679, ('H', 'O', 'H') => 446, ('I', 'H', 'H') => 695, ('I', 'H', 'O') => -2324, ('I', 'I', 'H') => 321, ('I', 'I', 'I') => 1497, ('I', 'I', 'O') => 656, ('I', 'O', 'O') => 54, ('K', 'A', 'K') => 4845, ('K', 'K', 'A') => 3386, ('K', 'K', 'K') => 3065, ('M', 'H', 'H') => -405, ('M', 'H', 'I') => 201, ('M', 'M', 'H') => -241, ('M', 'M', 'M') => 661, ('M', 'O', 'M') => 841, }; + static ref TQ1: FnvHashMap<(char, char, char, char), i32> = hashmap! { ('B', 'H', 'H', 'H') => -227, ('B', 'H', 'H', 'I') => 316, ('B', 'H', 'I', 'H') => -132, ('B', 'I', 'H', 'H') => 60, ('B', 'I', 'I', 'I') => 1595, ('B', 'N', 'H', 'H') => -744, ('B', 'O', 'H', 'H') => 225, ('B', 'O', 'O', 'O') => -908, ('O', 'A', 'K', 'K') => 482, ('O', 'H', 'H', 'H') => 281, ('O', 'H', 'I', 'H') => 249, ('O', 'I', 'H', 'I') => 200, ('O', 'I', 'I', 'H') => -68, }; + static ref TQ2: FnvHashMap<(char, char, char, char), i32> = hashmap! { ('B', 'I', 'H', 'H') => -1401, ('B', 'I', 'I', 'I') => -1033, ('B', 'K', 'A', 'K') => -543, ('B', 'O', 'O', 'O') => -5591, }; + static ref TQ3: FnvHashMap<(char, char, char, char), i32> = hashmap! { ('B', 'H', 'H', 'H') => 478, ('B', 'H', 'H', 'M') => -1073, ('B', 'H', 'I', 'H') => 222, ('B', 'H', 'I', 'I') => -504, ('B', 'I', 'I', 'H') => -116, ('B', 'I', 'I', 'I') => -105, ('B', 'M', 'H', 'I') => -863, ('B', 'M', 'H', 'M') => -464, ('B', 'O', 'M', 'H') => 620, ('O', 'H', 'H', 'H') => 346, ('O', 'H', 'H', 'I') => 1729, ('O', 'H', 'I', 'I') => 997, ('O', 'H', 'M', 'H') => 481, ('O', 'I', 'H', 'H') => 623, ('O', 'I', 'I', 'H') => 1344, ('O', 'K', 'A', 'K') => 2792, ('O', 'K', 'H', 'H') => 587, ('O', 'K', 'K', 'A') => 679, ('O', 'O', 'H', 'H') => 110, ('O', 'O', 'I', 'I') => -685, }; + static ref TQ4: FnvHashMap<(char, char, char, char), i32> = hashmap! { ('B', 'H', 'H', 'H') => -721, ('B', 'H', 'H', 'M') => -3604, ('B', 'H', 'I', 'I') => -966, ('B', 'I', 'I', 'H') => -607, ('B', 'I', 'I', 'I') => -2181, ('O', 'A', 'A', 'A') => -2763, ('O', 'A', 'K', 'K') => 180, ('O', 'H', 'H', 'H') => -294, ('O', 'H', 'H', 'I') => 2446, ('O', 'H', 'H', 'O') => 480, ('O', 'H', 'I', 'H') => -1573, ('O', 'I', 'H', 'H') => 1935, ('O', 'I', 'H', 'I') => -493, ('O', 'I', 'I', 'H') => 626, ('O', 'I', 'I', 'I') => -4007, ('O', 'K', 'A', 'K') => -8156, }; + static ref TW1: FnvHashMap<(char, char, char), i32> = hashmap! { ('に', 'つ', 'い') => -4681, ('東', '京', '都') => 2026, }; + static ref TW2: FnvHashMap<(char, char, char), i32> = hashmap! { ('あ', 'る', '程') => -2049, ('い', 'っ', 'た') => -1256, ('こ', 'ろ', 'が') => -2434, ('し', 'ょ', 'う') => 3873, ('そ', 'の', '後') => -4430, ('だ', 'っ', 'て') => -1049, ('て', 'い', 'た') => 1833, ('と', 'し', 'て') => -4657, ('と', 'も', 'に') => -4517, ('も', 'の', 'で') => 1882, ('一', '気', 'に') => -792, ('初', 'め', 'て') => -1512, ('同', '時', 'に') => -8097, ('大', 'き', 'な') => -1255, ('対', 'し', 'て') => -2721, ('社', '会', '党') => -3216, }; + static ref TW3: FnvHashMap<(char, char, char), i32> = hashmap! { ('い', 'た', 'だ') => -1734, ('し', 'て', 'い') => 1314, ('と', 'し', 'て') => -4314, ('に', 'つ', 'い') => -5483, ('に', 'と', 'っ') => -5989, ('に', '当', 'た') => -6247, ('の', 'で', ',') => -727, ('の', 'で', '、') => -727, ('の', 'も', 'の') => -600, ('れ', 'か', 'ら') => -3752, ('十', '二', '月') => -2287, }; + static ref TW4: FnvHashMap<(char, char, char), i32> = hashmap! { ('い', 'う', '.') => 8576, ('い', 'う', '。') => 8576, ('か', 'ら', 'な') => -2348, ('し', 'て', 'い') => 2958, ('た', 'が', ',') => 1516, ('た', 'が', '、') => 1516, ('て', 'い', 'る') => 1538, ('と', 'い', 'う') => 1349, ('ま', 'し', 'た') => 5543, ('ま', 'せ', 'ん') => 1097, ('よ', 'う', 'と') => -4258, ('よ', 'る', 'と') => 5865, }; } lazy_static! { - static ref UC1: HashMap = hashmap! { 'A' => 484, 'K' => 93, 'M' => 645, 'O' => -505, }; - static ref UC2: HashMap = hashmap! { 'A' => 819, 'H' => 1059, 'I' => 409, 'M' => 3987, 'N' => 5775, 'O' => 646, }; - static ref UC3: HashMap = hashmap! { 'A' => -1370, 'I' => 2311, }; - static ref UC4: HashMap = hashmap! { 'A' => -2643, 'H' => 1809, 'I' => -1032, 'K' => -3450, 'M' => 3565, 'N' => 3876, 'O' => 6646, }; - static ref UC5: HashMap = hashmap! { 'H' => 313, 'I' => -1238, 'K' => -799, 'M' => 539, 'O' => -831, }; - static ref UC6: HashMap = hashmap! { 'H' => -506, 'I' => -253, 'K' => 87, 'M' => 247, 'O' => -387, }; - static ref UP1: HashMap = hashmap! { 'O' => -214, }; - static ref UP2: HashMap = hashmap! { 'B' => 69, 'O' => 935, }; - static ref UP3: HashMap = hashmap! { 'B' => 189, }; - static ref UQ1: HashMap<(char, char), i32> = hashmap! { ('B', 'H') => 21, ('B', 'I') => -12, ('B', 'K') => -99, ('B', 'N') => 142, ('B', 'O') => -56, ('O', 'H') => -95, ('O', 'I') => 477, ('O', 'K') => 410, ('O', 'O') => -2422, }; - static ref UQ2: HashMap<(char, char), i32> = hashmap! { ('B', 'H') => 216, ('B', 'I') => 113, ('O', 'K') => 1759, }; - static ref UQ3: HashMap<(char, char), i32> = hashmap! { ('B', 'A') => -479, ('B', 'H') => 42, ('B', 'I') => 1913, ('B', 'K') => -7198, ('B', 'M') => 3160, ('B', 'N') => 6427, ('B', 'O') => 14761, ('O', 'I') => -827, ('O', 'N') => -3212, }; - static ref UW1: HashMap = hashmap! { ',' => 156, '、' => 156, '「' => -463, 'あ' => -941, 'う' => -127, 'が' => -553, 'き' => 121, 'こ' => 505, 'で' => -201, 'と' => -547, 'ど' => -123, 'に' => -789, 'の' => -185, 'は' => -847, 'も' => -466, 'や' => -470, 'よ' => 182, 'ら' => -292, 'り' => 208, 'れ' => 169, 'を' => -446, 'ん' => -137, '・' => -135, '主' => -402, '京' => -268, '区' => -912, '午' => 871, '国' => -460, '大' => 561, '委' => 729, '市' => -411, '日' => -141, '理' => 361, '生' => -408, '県' => -386, '都' => -718, '「' => -463, '・' => -135, }; - static ref UW2: HashMap = hashmap! { ',' => -829, '、' => -829, '〇' => 892, '「' => -645, '」' => 3145, 'あ' => -538, 'い' => 505, 'う' => 134, 'お' => -502, 'か' => 1454, 'が' => -856, 'く' => -412, 'こ' => 1141, 'さ' => 878, 'ざ' => 540, 'し' => 1529, 'す' => -675, 'せ' => 300, 'そ' => -1011, 'た' => 188, 'だ' => 1837, 'つ' => -949, 'て' => -291, 'で' => -268, 'と' => -981, 'ど' => 1273, 'な' => 1063, 'に' => -1764, 'の' => 130, 'は' => -409, 'ひ' => -1273, 'べ' => 1261, 'ま' => 600, 'も' => -1263, 'や' => -402, 'よ' => 1639, 'り' => -579, 'る' => -694, 'れ' => 571, 'を' => -2516, 'ん' => 2095, 'ア' => -587, 'カ' => 306, 'キ' => 568, 'ッ' => 831, '三' => -758, '不' => -2150, '世' => -302, '中' => -968, '主' => -861, '事' => 492, '人' => -123, '会' => 978, '保' => 362, '入' => 548, '初' => -3025, '副' => -1566, '北' => -3414, '区' => -422, '大' => -1769, '天' => -865, '太' => -483, '子' => -1519, '学' => 760, '実' => 1023, '小' => -2009, '市' => -813, '年' => -1060, '強' => 1067, '手' => -1519, '揺' => -1033, '政' => 1522, '文' => -1355, '新' => -1682, '日' => -1815, '明' => -1462, '最' => -630, '朝' => -1843, '本' => -1650, '東' => -931, '果' => -665, '次' => -2378, '民' => -180, '気' => -1740, '理' => 752, '発' => 529, '目' => -1584, '相' => -242, '県' => -1165, '立' => -763, '第' => 810, '米' => 509, '自' => -1353, '行' => 838, '西' => -744, '見' => -3874, '調' => 1010, '議' => 1198, '込' => 3041, '開' => 1758, '間' => -1257, '「' => -645, '」' => 3145, 'ッ' => 831, 'ア' => -587, 'カ' => 306, 'キ' => 568, }; - static ref UW3: HashMap = hashmap! { ',' => 4889, '1' => -800, '−' => -1723, '、' => 4889, '々' => -2311, '〇' => 5827, '」' => 2670, '〓' => -3573, 'あ' => -2696, 'い' => 1006, 'う' => 2342, 'え' => 1983, 'お' => -4864, 'か' => -1163, 'が' => 3271, 'く' => 1004, 'け' => 388, 'げ' => 401, 'こ' => -3552, 'ご' => -3116, 'さ' => -1058, 'し' => -395, 'す' => 584, 'せ' => 3685, 'そ' => -5228, 'た' => 842, 'ち' => -521, 'っ' => -1444, 'つ' => -1081, 'て' => 6167, 'で' => 2318, 'と' => 1691, 'ど' => -899, 'な' => -2788, 'に' => 2745, 'の' => 4056, 'は' => 4555, 'ひ' => -2171, 'ふ' => -1798, 'へ' => 1199, 'ほ' => -5516, 'ま' => -4384, 'み' => -120, 'め' => 1205, 'も' => 2323, 'や' => -788, 'よ' => -202, 'ら' => 727, 'り' => 649, 'る' => 5905, 'れ' => 2773, 'わ' => -1207, 'を' => 6620, 'ん' => -518, 'ア' => 551, 'グ' => 1319, 'ス' => 874, 'ッ' => -1350, 'ト' => 521, 'ム' => 1109, 'ル' => 1591, 'ロ' => 2201, 'ン' => 278, '・' => -3794, '一' => -1619, '下' => -1759, '世' => -2087, '両' => 3815, '中' => 653, '主' => -758, '予' => -1193, '二' => 974, '人' => 2742, '今' => 792, '他' => 1889, '以' => -1368, '低' => 811, '何' => 4265, '作' => -361, '保' => -2439, '元' => 4858, '党' => 3593, '全' => 1574, '公' => -3030, '六' => 755, '共' => -1880, '円' => 5807, '再' => 3095, '分' => 457, '初' => 2475, '別' => 1129, '前' => 2286, '副' => 4437, '力' => 365, '動' => -949, '務' => -1872, '化' => 1327, '北' => -1038, '区' => 4646, '千' => -2309, '午' => -783, '協' => -1006, '口' => 483, '右' => 1233, '各' => 3588, '合' => -241, '同' => 3906, '和' => -837, '員' => 4513, '国' => 642, '型' => 1389, '場' => 1219, '外' => -241, '妻' => 2016, '学' => -1356, '安' => -423, '実' => -1008, '家' => 1078, '小' => -513, '少' => -3102, '州' => 1155, '市' => 3197, '平' => -1804, '年' => 2416, '広' => -1030, '府' => 1605, '度' => 1452, '建' => -2352, '当' => -3885, '得' => 1905, '思' => -1291, '性' => 1822, '戸' => -488, '指' => -3973, '政' => -2013, '教' => -1479, '数' => 3222, '文' => -1489, '新' => 1764, '日' => 2099, '旧' => 5792, '昨' => -661, '時' => -1248, '曜' => -951, '最' => -937, '月' => 4125, '期' => 360, '李' => 3094, '村' => 364, '東' => -805, '核' => 5156, '森' => 2438, '業' => 484, '氏' => 2613, '民' => -1694, '決' => -1073, '法' => 1868, '海' => -495, '無' => 979, '物' => 461, '特' => -3850, '生' => -273, '用' => 914, '町' => 1215, '的' => 7313, '直' => -1835, '省' => 792, '県' => 6293, '知' => -1528, '私' => 4231, '税' => 401, '立' => -960, '第' => 1201, '米' => 7767, '系' => 3066, '約' => 3663, '級' => 1384, '統' => -4229, '総' => 1163, '線' => 1255, '者' => 6457, '能' => 725, '自' => -2869, '英' => 785, '見' => 1044, '調' => -562, '財' => -733, '費' => 1777, '車' => 1835, '軍' => 1375, '込' => -1504, '通' => -1136, '選' => -681, '郎' => 1026, '郡' => 4404, '部' => 1200, '金' => 2163, '長' => 421, '開' => -1432, '間' => 1302, '関' => -1282, '雨' => 2009, '電' => -1045, '非' => 2066, '駅' => 1620, '1' => -800, '」' => 2670, '・' => -3794, 'ッ' => -1350, 'ア' => 551, 'ス' => 874, 'ト' => 521, 'ム' => 1109, 'ル' => 1591, 'ロ' => 2201, 'ン' => 278, }; - static ref UW4: HashMap = hashmap! { ',' => 3930, '.' => 3508, '―' => -4841, '、' => 3930, '。' => 3508, '〇' => 4999, '「' => 1895, '」' => 3798, '〓' => -5156, 'あ' => 4752, 'い' => -3435, 'う' => -640, 'え' => -2514, 'お' => 2405, 'か' => 530, 'が' => 6006, 'き' => -4482, 'ぎ' => -3821, 'く' => -3788, 'け' => -4376, 'げ' => -4734, 'こ' => 2255, 'ご' => 1979, 'さ' => 2864, 'し' => -843, 'じ' => -2506, 'す' => -731, 'ず' => 1251, 'せ' => 181, 'そ' => 4091, 'た' => 5034, 'だ' => 5408, 'ち' => -3654, 'っ' => -5882, 'つ' => -1659, 'て' => 3994, 'で' => 7410, 'と' => 4547, 'な' => 5433, 'に' => 6499, 'ぬ' => 1853, 'ね' => 1413, 'の' => 7396, 'は' => 8578, 'ば' => 1940, 'ひ' => 4249, 'び' => -4134, 'ふ' => 1345, 'へ' => 6665, 'べ' => -744, 'ほ' => 1464, 'ま' => 1051, 'み' => -2082, 'む' => -882, 'め' => -5046, 'も' => 4169, 'ゃ' => -2666, 'や' => 2795, 'ょ' => -1544, 'よ' => 3351, 'ら' => -2922, 'り' => -9726, 'る' => -14896, 'れ' => -2613, 'ろ' => -4570, 'わ' => -1783, 'を' => 13150, 'ん' => -2352, 'カ' => 2145, 'コ' => 1789, 'セ' => 1287, 'ッ' => -724, 'ト' => -403, 'メ' => -1635, 'ラ' => -881, 'リ' => -541, 'ル' => -856, 'ン' => -3637, '・' => -4371, 'ー' => -11870, '一' => -2069, '中' => 2210, '予' => 782, '事' => -190, '井' => -1768, '人' => 1036, '以' => 544, '会' => 950, '体' => -1286, '作' => 530, '側' => 4292, '先' => 601, '党' => -2006, '共' => -1212, '内' => 584, '円' => 788, '初' => 1347, '前' => 1623, '副' => 3879, '力' => -302, '動' => -740, '務' => -2715, '化' => 776, '区' => 4517, '協' => 1013, '参' => 1555, '合' => -1834, '和' => -681, '員' => -910, '器' => -851, '回' => 1500, '国' => -619, '園' => -1200, '地' => 866, '場' => -1410, '塁' => -2094, '士' => -1413, '多' => 1067, '大' => 571, '子' => -4802, '学' => -1397, '定' => -1057, '寺' => -809, '小' => 1910, '屋' => -1328, '山' => -1500, '島' => -2056, '川' => -2667, '市' => 2771, '年' => 374, '庁' => -4556, '後' => 456, '性' => 553, '感' => 916, '所' => -1566, '支' => 856, '改' => 787, '政' => 2182, '教' => 704, '文' => 522, '方' => -856, '日' => 1798, '時' => 1829, '最' => 845, '月' => -9066, '木' => -485, '来' => -442, '校' => -360, '業' => -1043, '氏' => 5388, '民' => -2716, '気' => -910, '沢' => -939, '済' => -543, '物' => -735, '率' => 672, '球' => -1267, '生' => -1286, '産' => -1101, '田' => -2900, '町' => 1826, '的' => 2586, '目' => 922, '省' => -3485, '県' => 2997, '空' => -867, '立' => -2112, '第' => 788, '米' => 2937, '系' => 786, '約' => 2171, '経' => 1146, '統' => -1169, '総' => 940, '線' => -994, '署' => 749, '者' => 2145, '能' => -730, '般' => -852, '行' => -792, '規' => 792, '警' => -1184, '議' => -244, '谷' => -1000, '賞' => 730, '車' => -1481, '軍' => 1158, '輪' => -1433, '込' => -3370, '近' => 929, '道' => -1291, '選' => 2596, '郎' => -4866, '都' => 1192, '野' => -1100, '銀' => -2213, '長' => 357, '間' => -2344, '院' => -2297, '際' => -2604, '電' => -878, '領' => -1659, '題' => -792, '館' => -1984, '首' => 1749, '高' => 2120, '「' => 1895, '」' => 3798, '・' => -4371, 'ッ' => -724, 'ー' => -11870, 'カ' => 2145, 'コ' => 1789, 'セ' => 1287, 'ト' => -403, 'メ' => -1635, 'ラ' => -881, 'リ' => -541, 'ル' => -856, 'ン' => -3637, }; - static ref UW5: HashMap = hashmap! { ',' => 465, '.' => -299, '1' => -514, *E2 => -32768, ']' => -2762, '、' => 465, '。' => -299, '「' => 363, 'あ' => 1655, 'い' => 331, 'う' => -503, 'え' => 1199, 'お' => 527, 'か' => 647, 'が' => -421, 'き' => 1624, 'ぎ' => 1971, 'く' => 312, 'げ' => -983, 'さ' => -1537, 'し' => -1371, 'す' => -852, 'だ' => -1186, 'ち' => 1093, 'っ' => 52, 'つ' => 921, 'て' => -18, 'で' => -850, 'と' => -127, 'ど' => 1682, 'な' => -787, 'に' => -1224, 'の' => -635, 'は' => -578, 'べ' => 1001, 'み' => 502, 'め' => 865, 'ゃ' => 3350, 'ょ' => 854, 'り' => -208, 'る' => 429, 'れ' => 504, 'わ' => 419, 'を' => -1264, 'ん' => 327, 'イ' => 241, 'ル' => 451, 'ン' => -343, '中' => -871, '京' => 722, '会' => -1153, '党' => -654, '務' => 3519, '区' => -901, '告' => 848, '員' => 2104, '大' => -1296, '学' => -548, '定' => 1785, '嵐' => -1304, '市' => -2991, '席' => 921, '年' => 1763, '思' => 872, '所' => -814, '挙' => 1618, '新' => -1682, '日' => 218, '月' => -4353, '査' => 932, '格' => 1356, '機' => -1508, '氏' => -1347, '田' => 240, '町' => -3912, '的' => -3149, '相' => 1319, '省' => -1052, '県' => -4003, '研' => -997, '社' => -278, '空' => -813, '統' => 1955, '者' => -2233, '表' => 663, '語' => -1073, '議' => 1219, '選' => -1018, '郎' => -368, '長' => 786, '間' => 1191, '題' => 2368, '館' => -689, '1' => -514, '「' => 363, 'イ' => 241, 'ル' => 451, 'ン' => -343, }; - static ref UW6: HashMap = hashmap! { ',' => 227, '.' => 808, '1' => -270, *E1 => 306, '、' => 227, '。' => 808, 'あ' => -307, 'う' => 189, 'か' => 241, 'が' => -73, 'く' => -121, 'こ' => -200, 'じ' => 1782, 'す' => 383, 'た' => -428, 'っ' => 573, 'て' => -1014, 'で' => 101, 'と' => -105, 'な' => -253, 'に' => -149, 'の' => -417, 'は' => -236, 'も' => -206, 'り' => 187, 'る' => -135, 'を' => 195, 'ル' => -673, 'ン' => -496, '一' => -277, '中' => 201, '件' => -800, '会' => 624, '前' => 302, '区' => 1792, '員' => -1212, '委' => 798, '学' => -960, '市' => 887, '広' => -695, '後' => 535, '業' => -697, '相' => 753, '社' => -507, '福' => 974, '空' => -822, '者' => 1811, '連' => 463, '郎' => 1082, '1' => -270, 'ル' => -673, 'ン' => -496, }; + static ref UC1: FnvHashMap = hashmap! { 'A' => 484, 'K' => 93, 'M' => 645, 'O' => -505, }; + static ref UC2: FnvHashMap = hashmap! { 'A' => 819, 'H' => 1059, 'I' => 409, 'M' => 3987, 'N' => 5775, 'O' => 646, }; + static ref UC3: FnvHashMap = hashmap! { 'A' => -1370, 'I' => 2311, }; + static ref UC4: FnvHashMap = hashmap! { 'A' => -2643, 'H' => 1809, 'I' => -1032, 'K' => -3450, 'M' => 3565, 'N' => 3876, 'O' => 6646, }; + static ref UC5: FnvHashMap = hashmap! { 'H' => 313, 'I' => -1238, 'K' => -799, 'M' => 539, 'O' => -831, }; + static ref UC6: FnvHashMap = hashmap! { 'H' => -506, 'I' => -253, 'K' => 87, 'M' => 247, 'O' => -387, }; + static ref UP1: FnvHashMap = hashmap! { 'O' => -214, }; + static ref UP2: FnvHashMap = hashmap! { 'B' => 69, 'O' => 935, }; + static ref UP3: FnvHashMap = hashmap! { 'B' => 189, }; + static ref UQ1: FnvHashMap<(char, char), i32> = hashmap! { ('B', 'H') => 21, ('B', 'I') => -12, ('B', 'K') => -99, ('B', 'N') => 142, ('B', 'O') => -56, ('O', 'H') => -95, ('O', 'I') => 477, ('O', 'K') => 410, ('O', 'O') => -2422, }; + static ref UQ2: FnvHashMap<(char, char), i32> = hashmap! { ('B', 'H') => 216, ('B', 'I') => 113, ('O', 'K') => 1759, }; + static ref UQ3: FnvHashMap<(char, char), i32> = hashmap! { ('B', 'A') => -479, ('B', 'H') => 42, ('B', 'I') => 1913, ('B', 'K') => -7198, ('B', 'M') => 3160, ('B', 'N') => 6427, ('B', 'O') => 14761, ('O', 'I') => -827, ('O', 'N') => -3212, }; + static ref UW1: FnvHashMap = hashmap! { ',' => 156, '、' => 156, '「' => -463, 'あ' => -941, 'う' => -127, 'が' => -553, 'き' => 121, 'こ' => 505, 'で' => -201, 'と' => -547, 'ど' => -123, 'に' => -789, 'の' => -185, 'は' => -847, 'も' => -466, 'や' => -470, 'よ' => 182, 'ら' => -292, 'り' => 208, 'れ' => 169, 'を' => -446, 'ん' => -137, '・' => -135, '主' => -402, '京' => -268, '区' => -912, '午' => 871, '国' => -460, '大' => 561, '委' => 729, '市' => -411, '日' => -141, '理' => 361, '生' => -408, '県' => -386, '都' => -718, '「' => -463, '・' => -135, }; + static ref UW2: FnvHashMap = hashmap! { ',' => -829, '、' => -829, '〇' => 892, '「' => -645, '」' => 3145, 'あ' => -538, 'い' => 505, 'う' => 134, 'お' => -502, 'か' => 1454, 'が' => -856, 'く' => -412, 'こ' => 1141, 'さ' => 878, 'ざ' => 540, 'し' => 1529, 'す' => -675, 'せ' => 300, 'そ' => -1011, 'た' => 188, 'だ' => 1837, 'つ' => -949, 'て' => -291, 'で' => -268, 'と' => -981, 'ど' => 1273, 'な' => 1063, 'に' => -1764, 'の' => 130, 'は' => -409, 'ひ' => -1273, 'べ' => 1261, 'ま' => 600, 'も' => -1263, 'や' => -402, 'よ' => 1639, 'り' => -579, 'る' => -694, 'れ' => 571, 'を' => -2516, 'ん' => 2095, 'ア' => -587, 'カ' => 306, 'キ' => 568, 'ッ' => 831, '三' => -758, '不' => -2150, '世' => -302, '中' => -968, '主' => -861, '事' => 492, '人' => -123, '会' => 978, '保' => 362, '入' => 548, '初' => -3025, '副' => -1566, '北' => -3414, '区' => -422, '大' => -1769, '天' => -865, '太' => -483, '子' => -1519, '学' => 760, '実' => 1023, '小' => -2009, '市' => -813, '年' => -1060, '強' => 1067, '手' => -1519, '揺' => -1033, '政' => 1522, '文' => -1355, '新' => -1682, '日' => -1815, '明' => -1462, '最' => -630, '朝' => -1843, '本' => -1650, '東' => -931, '果' => -665, '次' => -2378, '民' => -180, '気' => -1740, '理' => 752, '発' => 529, '目' => -1584, '相' => -242, '県' => -1165, '立' => -763, '第' => 810, '米' => 509, '自' => -1353, '行' => 838, '西' => -744, '見' => -3874, '調' => 1010, '議' => 1198, '込' => 3041, '開' => 1758, '間' => -1257, '「' => -645, '」' => 3145, 'ッ' => 831, 'ア' => -587, 'カ' => 306, 'キ' => 568, }; + static ref UW3: FnvHashMap = hashmap! { ',' => 4889, '1' => -800, '−' => -1723, '、' => 4889, '々' => -2311, '〇' => 5827, '」' => 2670, '〓' => -3573, 'あ' => -2696, 'い' => 1006, 'う' => 2342, 'え' => 1983, 'お' => -4864, 'か' => -1163, 'が' => 3271, 'く' => 1004, 'け' => 388, 'げ' => 401, 'こ' => -3552, 'ご' => -3116, 'さ' => -1058, 'し' => -395, 'す' => 584, 'せ' => 3685, 'そ' => -5228, 'た' => 842, 'ち' => -521, 'っ' => -1444, 'つ' => -1081, 'て' => 6167, 'で' => 2318, 'と' => 1691, 'ど' => -899, 'な' => -2788, 'に' => 2745, 'の' => 4056, 'は' => 4555, 'ひ' => -2171, 'ふ' => -1798, 'へ' => 1199, 'ほ' => -5516, 'ま' => -4384, 'み' => -120, 'め' => 1205, 'も' => 2323, 'や' => -788, 'よ' => -202, 'ら' => 727, 'り' => 649, 'る' => 5905, 'れ' => 2773, 'わ' => -1207, 'を' => 6620, 'ん' => -518, 'ア' => 551, 'グ' => 1319, 'ス' => 874, 'ッ' => -1350, 'ト' => 521, 'ム' => 1109, 'ル' => 1591, 'ロ' => 2201, 'ン' => 278, '・' => -3794, '一' => -1619, '下' => -1759, '世' => -2087, '両' => 3815, '中' => 653, '主' => -758, '予' => -1193, '二' => 974, '人' => 2742, '今' => 792, '他' => 1889, '以' => -1368, '低' => 811, '何' => 4265, '作' => -361, '保' => -2439, '元' => 4858, '党' => 3593, '全' => 1574, '公' => -3030, '六' => 755, '共' => -1880, '円' => 5807, '再' => 3095, '分' => 457, '初' => 2475, '別' => 1129, '前' => 2286, '副' => 4437, '力' => 365, '動' => -949, '務' => -1872, '化' => 1327, '北' => -1038, '区' => 4646, '千' => -2309, '午' => -783, '協' => -1006, '口' => 483, '右' => 1233, '各' => 3588, '合' => -241, '同' => 3906, '和' => -837, '員' => 4513, '国' => 642, '型' => 1389, '場' => 1219, '外' => -241, '妻' => 2016, '学' => -1356, '安' => -423, '実' => -1008, '家' => 1078, '小' => -513, '少' => -3102, '州' => 1155, '市' => 3197, '平' => -1804, '年' => 2416, '広' => -1030, '府' => 1605, '度' => 1452, '建' => -2352, '当' => -3885, '得' => 1905, '思' => -1291, '性' => 1822, '戸' => -488, '指' => -3973, '政' => -2013, '教' => -1479, '数' => 3222, '文' => -1489, '新' => 1764, '日' => 2099, '旧' => 5792, '昨' => -661, '時' => -1248, '曜' => -951, '最' => -937, '月' => 4125, '期' => 360, '李' => 3094, '村' => 364, '東' => -805, '核' => 5156, '森' => 2438, '業' => 484, '氏' => 2613, '民' => -1694, '決' => -1073, '法' => 1868, '海' => -495, '無' => 979, '物' => 461, '特' => -3850, '生' => -273, '用' => 914, '町' => 1215, '的' => 7313, '直' => -1835, '省' => 792, '県' => 6293, '知' => -1528, '私' => 4231, '税' => 401, '立' => -960, '第' => 1201, '米' => 7767, '系' => 3066, '約' => 3663, '級' => 1384, '統' => -4229, '総' => 1163, '線' => 1255, '者' => 6457, '能' => 725, '自' => -2869, '英' => 785, '見' => 1044, '調' => -562, '財' => -733, '費' => 1777, '車' => 1835, '軍' => 1375, '込' => -1504, '通' => -1136, '選' => -681, '郎' => 1026, '郡' => 4404, '部' => 1200, '金' => 2163, '長' => 421, '開' => -1432, '間' => 1302, '関' => -1282, '雨' => 2009, '電' => -1045, '非' => 2066, '駅' => 1620, '1' => -800, '」' => 2670, '・' => -3794, 'ッ' => -1350, 'ア' => 551, 'ス' => 874, 'ト' => 521, 'ム' => 1109, 'ル' => 1591, 'ロ' => 2201, 'ン' => 278, }; + static ref UW4: FnvHashMap = hashmap! { ',' => 3930, '.' => 3508, '―' => -4841, '、' => 3930, '。' => 3508, '〇' => 4999, '「' => 1895, '」' => 3798, '〓' => -5156, 'あ' => 4752, 'い' => -3435, 'う' => -640, 'え' => -2514, 'お' => 2405, 'か' => 530, 'が' => 6006, 'き' => -4482, 'ぎ' => -3821, 'く' => -3788, 'け' => -4376, 'げ' => -4734, 'こ' => 2255, 'ご' => 1979, 'さ' => 2864, 'し' => -843, 'じ' => -2506, 'す' => -731, 'ず' => 1251, 'せ' => 181, 'そ' => 4091, 'た' => 5034, 'だ' => 5408, 'ち' => -3654, 'っ' => -5882, 'つ' => -1659, 'て' => 3994, 'で' => 7410, 'と' => 4547, 'な' => 5433, 'に' => 6499, 'ぬ' => 1853, 'ね' => 1413, 'の' => 7396, 'は' => 8578, 'ば' => 1940, 'ひ' => 4249, 'び' => -4134, 'ふ' => 1345, 'へ' => 6665, 'べ' => -744, 'ほ' => 1464, 'ま' => 1051, 'み' => -2082, 'む' => -882, 'め' => -5046, 'も' => 4169, 'ゃ' => -2666, 'や' => 2795, 'ょ' => -1544, 'よ' => 3351, 'ら' => -2922, 'り' => -9726, 'る' => -14896, 'れ' => -2613, 'ろ' => -4570, 'わ' => -1783, 'を' => 13150, 'ん' => -2352, 'カ' => 2145, 'コ' => 1789, 'セ' => 1287, 'ッ' => -724, 'ト' => -403, 'メ' => -1635, 'ラ' => -881, 'リ' => -541, 'ル' => -856, 'ン' => -3637, '・' => -4371, 'ー' => -11870, '一' => -2069, '中' => 2210, '予' => 782, '事' => -190, '井' => -1768, '人' => 1036, '以' => 544, '会' => 950, '体' => -1286, '作' => 530, '側' => 4292, '先' => 601, '党' => -2006, '共' => -1212, '内' => 584, '円' => 788, '初' => 1347, '前' => 1623, '副' => 3879, '力' => -302, '動' => -740, '務' => -2715, '化' => 776, '区' => 4517, '協' => 1013, '参' => 1555, '合' => -1834, '和' => -681, '員' => -910, '器' => -851, '回' => 1500, '国' => -619, '園' => -1200, '地' => 866, '場' => -1410, '塁' => -2094, '士' => -1413, '多' => 1067, '大' => 571, '子' => -4802, '学' => -1397, '定' => -1057, '寺' => -809, '小' => 1910, '屋' => -1328, '山' => -1500, '島' => -2056, '川' => -2667, '市' => 2771, '年' => 374, '庁' => -4556, '後' => 456, '性' => 553, '感' => 916, '所' => -1566, '支' => 856, '改' => 787, '政' => 2182, '教' => 704, '文' => 522, '方' => -856, '日' => 1798, '時' => 1829, '最' => 845, '月' => -9066, '木' => -485, '来' => -442, '校' => -360, '業' => -1043, '氏' => 5388, '民' => -2716, '気' => -910, '沢' => -939, '済' => -543, '物' => -735, '率' => 672, '球' => -1267, '生' => -1286, '産' => -1101, '田' => -2900, '町' => 1826, '的' => 2586, '目' => 922, '省' => -3485, '県' => 2997, '空' => -867, '立' => -2112, '第' => 788, '米' => 2937, '系' => 786, '約' => 2171, '経' => 1146, '統' => -1169, '総' => 940, '線' => -994, '署' => 749, '者' => 2145, '能' => -730, '般' => -852, '行' => -792, '規' => 792, '警' => -1184, '議' => -244, '谷' => -1000, '賞' => 730, '車' => -1481, '軍' => 1158, '輪' => -1433, '込' => -3370, '近' => 929, '道' => -1291, '選' => 2596, '郎' => -4866, '都' => 1192, '野' => -1100, '銀' => -2213, '長' => 357, '間' => -2344, '院' => -2297, '際' => -2604, '電' => -878, '領' => -1659, '題' => -792, '館' => -1984, '首' => 1749, '高' => 2120, '「' => 1895, '」' => 3798, '・' => -4371, 'ッ' => -724, 'ー' => -11870, 'カ' => 2145, 'コ' => 1789, 'セ' => 1287, 'ト' => -403, 'メ' => -1635, 'ラ' => -881, 'リ' => -541, 'ル' => -856, 'ン' => -3637, }; + static ref UW5: FnvHashMap = hashmap! { ',' => 465, '.' => -299, '1' => -514, *E2 => -32768, ']' => -2762, '、' => 465, '。' => -299, '「' => 363, 'あ' => 1655, 'い' => 331, 'う' => -503, 'え' => 1199, 'お' => 527, 'か' => 647, 'が' => -421, 'き' => 1624, 'ぎ' => 1971, 'く' => 312, 'げ' => -983, 'さ' => -1537, 'し' => -1371, 'す' => -852, 'だ' => -1186, 'ち' => 1093, 'っ' => 52, 'つ' => 921, 'て' => -18, 'で' => -850, 'と' => -127, 'ど' => 1682, 'な' => -787, 'に' => -1224, 'の' => -635, 'は' => -578, 'べ' => 1001, 'み' => 502, 'め' => 865, 'ゃ' => 3350, 'ょ' => 854, 'り' => -208, 'る' => 429, 'れ' => 504, 'わ' => 419, 'を' => -1264, 'ん' => 327, 'イ' => 241, 'ル' => 451, 'ン' => -343, '中' => -871, '京' => 722, '会' => -1153, '党' => -654, '務' => 3519, '区' => -901, '告' => 848, '員' => 2104, '大' => -1296, '学' => -548, '定' => 1785, '嵐' => -1304, '市' => -2991, '席' => 921, '年' => 1763, '思' => 872, '所' => -814, '挙' => 1618, '新' => -1682, '日' => 218, '月' => -4353, '査' => 932, '格' => 1356, '機' => -1508, '氏' => -1347, '田' => 240, '町' => -3912, '的' => -3149, '相' => 1319, '省' => -1052, '県' => -4003, '研' => -997, '社' => -278, '空' => -813, '統' => 1955, '者' => -2233, '表' => 663, '語' => -1073, '議' => 1219, '選' => -1018, '郎' => -368, '長' => 786, '間' => 1191, '題' => 2368, '館' => -689, '1' => -514, '「' => 363, 'イ' => 241, 'ル' => 451, 'ン' => -343, }; + static ref UW6: FnvHashMap = hashmap! { ',' => 227, '.' => 808, '1' => -270, *E1 => 306, '、' => 227, '。' => 808, 'あ' => -307, 'う' => 189, 'か' => 241, 'が' => -73, 'く' => -121, 'こ' => -200, 'じ' => 1782, 'す' => 383, 'た' => -428, 'っ' => 573, 'て' => -1014, 'で' => 101, 'と' => -105, 'な' => -253, 'に' => -149, 'の' => -417, 'は' => -236, 'も' => -206, 'り' => 187, 'る' => -135, 'を' => 195, 'ル' => -673, 'ン' => -496, '一' => -277, '中' => 201, '件' => -800, '会' => 624, '前' => 302, '区' => 1792, '員' => -1212, '委' => 798, '学' => -960, '市' => 887, '広' => -695, '後' => 535, '業' => -697, '相' => 753, '社' => -507, '福' => 974, '空' => -822, '者' => 1811, '連' => 463, '郎' => 1082, '1' => -270, 'ル' => -673, 'ン' => -496, }; } \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 85e033e..90ed5df 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,13 +1,12 @@ #[macro_use] extern crate lazy_static; -#[macro_use] extern crate maplit; +extern crate fnv; use std::char; -use std::collections::HashMap; use std::hash::Hash; include!("constants.rs"); -fn get_score(d: &HashMap, s: &T) -> i32 { +fn get_score(d: &FnvHashMap, s: &T) -> i32 { d.get(s).cloned().unwrap_or(0) } From 4c66bb1319f163c7fe52ae248a359f5e0f3154d1 Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Wed, 15 Aug 2018 17:10:22 +0900 Subject: [PATCH 2/6] add small benchmark --- benchmark/benchmark.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/benchmark/benchmark.rs b/benchmark/benchmark.rs index 1f77c12..39da21b 100644 --- a/benchmark/benchmark.rs +++ b/benchmark/benchmark.rs @@ -18,3 +18,7 @@ fn run(b: &mut Bencher) { b.iter(|| tinysegmenter::tokenize(&s)); } +#[bench] +fn test_small(b: &mut test::Bencher) { + b.iter(|| tinysegmenter::tokenize("私はおでぶです")) +} \ No newline at end of file From 0cd81f3bbdd7bfdc5c24df2ac6fbf750fa60dcb8 Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Thu, 16 Aug 2018 09:03:31 +0900 Subject: [PATCH 3/6] switch to match --- src/constants.rs | 71 +------------------------------ src/lib.rs | 108 ++++++++++++++++++++++++++--------------------- 2 files changed, 60 insertions(+), 119 deletions(-) diff --git a/src/constants.rs b/src/constants.rs index 31bfb77..97c7023 100644 --- a/src/constants.rs +++ b/src/constants.rs @@ -1,6 +1,6 @@ -use fnv::FnvHashMap; const BIAS: i32 = -332; + lazy_static! { static ref B1: char = unsafe { char::from_u32_unchecked(0x110001) }; static ref B2: char = unsafe { char::from_u32_unchecked(0x110002) }; @@ -8,73 +8,4 @@ lazy_static! { static ref E1: char = unsafe { char::from_u32_unchecked(0x110004) }; static ref E2: char = unsafe { char::from_u32_unchecked(0x110005) }; static ref E3: char = unsafe { char::from_u32_unchecked(0x110006) }; -} - -macro_rules! hashmap { - (@single $($x:tt)*) => (()); - (@count $($rest:expr),*) => (<[()]>::len(&[$(hashmap!(@single $rest)),*])); - - ($($key:expr => $value:expr,)+) => { hashmap!($($key => $value),+) }; - ($($key:expr => $value:expr),*) => { - { - let _cap = hashmap!(@count $($key),*); - let mut _map = ::fnv::FnvHashMap::with_capacity_and_hasher(_cap, Default::default()); - $( - let _ = _map.insert($key, $value); - )* - _map - } - }; -} - - -lazy_static! { - static ref BC1: FnvHashMap<(char, char), i32> = hashmap! { ('H', 'H') => 6, ('I', 'I') => 2461, ('K', 'H') => 406, ('O', 'H') => -1378, }; - static ref BC2: FnvHashMap<(char, char), i32> = hashmap! { ('A', 'A') => -3267, ('A', 'I') => 2744, ('A', 'N') => -878, ('H', 'H') => -4070, ('H', 'M') => -1711, ('H', 'N') => 4012, ('H', 'O') => 3761, ('I', 'A') => 1327, ('I', 'H') => -1184, ('I', 'I') => -1332, ('I', 'K') => 1721, ('I', 'O') => 5492, ('K', 'I') => 3831, ('K', 'K') => -8741, ('M', 'H') => -3132, ('M', 'K') => 3334, ('O', 'O') => -2920, }; - static ref BC3: FnvHashMap<(char, char), i32> = hashmap! { ('H', 'H') => 996, ('H', 'I') => 626, ('H', 'K') => -721, ('H', 'N') => -1307, ('H', 'O') => -836, ('I', 'H') => -301, ('K', 'K') => 2762, ('M', 'K') => 1079, ('M', 'M') => 4034, ('O', 'A') => -1652, ('O', 'H') => 266, }; - static ref BP1: FnvHashMap<(char, char), i32> = hashmap! { ('B', 'B') => 295, ('O', 'B') => 304, ('O', 'O') => -125, ('U', 'B') => 352, }; - static ref BP2: FnvHashMap<(char, char), i32> = hashmap! { ('B', 'O') => 60, ('O', 'O') => -1762, }; - static ref BQ1: FnvHashMap<(char, char, char), i32> = hashmap! { ('B', 'H', 'H') => 1150, ('B', 'H', 'M') => 1521, ('B', 'I', 'I') => -1158, ('B', 'I', 'M') => 886, ('B', 'M', 'H') => 1208, ('B', 'N', 'H') => 449, ('B', 'O', 'H') => -91, ('B', 'O', 'O') => -2597, ('O', 'H', 'I') => 451, ('O', 'I', 'H') => -296, ('O', 'K', 'A') => 1851, ('O', 'K', 'H') => -1020, ('O', 'K', 'K') => 904, ('O', 'O', 'O') => 2965, }; - static ref BQ2: FnvHashMap<(char, char, char), i32> = hashmap! { ('B', 'H', 'H') => 118, ('B', 'H', 'I') => -1159, ('B', 'H', 'M') => 466, ('B', 'I', 'H') => -919, ('B', 'K', 'K') => -1720, ('B', 'K', 'O') => 864, ('O', 'H', 'H') => -1139, ('O', 'H', 'M') => -181, ('O', 'I', 'H') => 153, ('U', 'H', 'I') => -1146, }; - static ref BQ3: FnvHashMap<(char, char, char), i32> = hashmap! { ('B', 'H', 'H') => -792, ('B', 'H', 'I') => 2664, ('B', 'I', 'I') => -299, ('B', 'K', 'I') => 419, ('B', 'M', 'H') => 937, ('B', 'M', 'M') => 8335, ('B', 'N', 'N') => 998, ('B', 'O', 'H') => 775, ('O', 'H', 'H') => 2174, ('O', 'H', 'M') => 439, ('O', 'I', 'I') => 280, ('O', 'K', 'H') => 1798, ('O', 'K', 'I') => -793, ('O', 'K', 'O') => -2242, ('O', 'M', 'H') => -2402, ('O', 'O', 'O') => 11699, }; - static ref BQ4: FnvHashMap<(char, char, char), i32> = hashmap! { ('B', 'H', 'H') => -3895, ('B', 'I', 'H') => 3761, ('B', 'I', 'I') => -4654, ('B', 'I', 'K') => 1348, ('B', 'K', 'K') => -1806, ('B', 'M', 'I') => -3385, ('B', 'O', 'O') => -12396, ('O', 'A', 'H') => 926, ('O', 'H', 'H') => 266, ('O', 'H', 'K') => -2036, ('O', 'N', 'N') => -973, }; - static ref BW1: FnvHashMap<(char, char), i32> = hashmap! { (',', 'と') => 660, (',', '同') => 727, (*B1, 'あ') => 1404, (*B1, '同') => 542, ('、', 'と') => 660, ('、', '同') => 727, ('」', 'と') => 1682, ('あ', 'っ') => 1505, ('い', 'う') => 1743, ('い', 'っ') => -2055, ('い', 'る') => 672, ('う', 'し') => -4817, ('う', 'ん') => 665, ('か', 'ら') => 3472, ('が', 'ら') => 600, ('こ', 'う') => -790, ('こ', 'と') => 2083, ('こ', 'ん') => -1262, ('さ', 'ら') => -4143, ('さ', 'ん') => 4573, ('し', 'た') => 2641, ('し', 'て') => 1104, ('す', 'で') => -3399, ('そ', 'こ') => 1977, ('そ', 'れ') => -871, ('た', 'ち') => 1122, ('た', 'め') => 601, ('っ', 'た') => 3463, ('つ', 'い') => -802, ('て', 'い') => 805, ('て', 'き') => 1249, ('で', 'き') => 1127, ('で', 'す') => 3445, ('で', 'は') => 844, ('と', 'い') => -4915, ('と', 'み') => 1922, ('ど', 'こ') => 3887, ('な', 'い') => 5713, ('な', 'っ') => 3015, ('な', 'ど') => 7379, ('な', 'ん') => -1113, ('に', 'し') => 2468, ('に', 'は') => 1498, ('に', 'も') => 1671, ('に', '対') => -912, ('の', '一') => -501, ('の', '中') => 741, ('ま', 'せ') => 2448, ('ま', 'で') => 1711, ('ま', 'ま') => 2600, ('ま', 'る') => -2155, ('や', 'む') => -1947, ('よ', 'っ') => -2565, ('れ', 'た') => 2369, ('れ', 'で') => -913, ('を', 'し') => 1860, ('を', '見') => 731, ('亡', 'く') => -1886, ('京', '都') => 2558, ('取', 'り') => -2784, ('大', 'き') => -2604, ('大', '阪') => 1497, ('平', '方') => -2314, ('引', 'き') => -1336, ('日', '本') => -195, ('本', '当') => -2423, ('毎', '日') => -2113, ('目', '指') => -724, ('」', 'と') => 1682, }; - static ref BW2: FnvHashMap<(char, char), i32> = hashmap! { ('.', '.') => -11822, ('1', '1') => -669, ('―', '―') => -5730, ('−', '−') => -13175, ('い', 'う') => -1609, ('う', 'か') => 2490, ('か', 'し') => -1350, ('か', 'も') => -602, ('か', 'ら') => -7194, ('か', 'れ') => 4612, ('が', 'い') => 853, ('が', 'ら') => -3198, ('き', 'た') => 1941, ('く', 'な') => -1597, ('こ', 'と') => -8392, ('こ', 'の') => -4193, ('さ', 'せ') => 4533, ('さ', 'れ') => 13168, ('さ', 'ん') => -3977, ('し', 'い') => -1819, ('し', 'か') => -545, ('し', 'た') => 5078, ('し', 'て') => 972, ('し', 'な') => 939, ('そ', 'の') => -3744, ('た', 'い') => -1253, ('た', 'た') => -662, ('た', 'だ') => -3857, ('た', 'ち') => -786, ('た', 'と') => 1224, ('た', 'は') => -939, ('っ', 'た') => 4589, ('っ', 'て') => 1647, ('っ', 'と') => -2094, ('て', 'い') => 6144, ('て', 'き') => 3640, ('て', 'く') => 2551, ('て', 'は') => -3110, ('て', 'も') => -3065, ('で', 'い') => 2666, ('で', 'き') => -1528, ('で', 'し') => -3828, ('で', 'す') => -4761, ('で', 'も') => -4203, ('と', 'い') => 1890, ('と', 'こ') => -1746, ('と', 'と') => -2279, ('と', 'の') => 720, ('と', 'み') => 5168, ('と', 'も') => -3941, ('な', 'い') => -2488, ('な', 'が') => -1313, ('な', 'ど') => -6509, ('な', 'の') => 2614, ('な', 'ん') => 3099, ('に', 'お') => -1615, ('に', 'し') => 2748, ('に', 'な') => 2454, ('に', 'よ') => -7236, ('に', '対') => -14943, ('に', '従') => -4688, ('に', '関') => -11388, ('の', 'か') => 2093, ('の', 'で') => -7059, ('の', 'に') => -6041, ('の', 'の') => -6125, ('は', 'い') => 1073, ('は', 'が') => -1033, ('は', 'ず') => -2532, ('ば', 'れ') => 1813, ('ま', 'し') => -1316, ('ま', 'で') => -6621, ('ま', 'れ') => 5409, ('め', 'て') => -3153, ('も', 'い') => 2230, ('も', 'の') => -10713, ('ら', 'か') => -944, ('ら', 'し') => -1611, ('ら', 'に') => -1897, ('り', 'し') => 651, ('り', 'ま') => 1620, ('れ', 'た') => 4270, ('れ', 'て') => 849, ('れ', 'ば') => 4114, ('ろ', 'う') => 6067, ('わ', 'れ') => 7901, ('を', '通') => -11877, ('ん', 'だ') => 728, ('ん', 'な') => -4115, ('一', '人') => 602, ('一', '方') => -1375, ('一', '日') => 970, ('一', '部') => -1051, ('上', 'が') => -4479, ('会', '社') => -1116, ('出', 'て') => 2163, ('分', 'の') => -7758, ('同', '党') => 970, ('同', '日') => -913, ('大', '阪') => -2471, ('委', '員') => -1250, ('少', 'な') => -1050, ('年', '度') => -8669, ('年', '間') => -1626, ('府', '県') => -2363, ('手', '権') => -1982, ('新', '聞') => -4066, ('日', '新') => -722, ('日', '本') => -7068, ('日', '米') => 3372, ('曜', '日') => -601, ('朝', '鮮') => -2355, ('本', '人') => -2697, ('東', '京') => -1543, ('然', 'と') => -1384, ('社', '会') => -1276, ('立', 'て') => -990, ('第', 'に') => -1612, ('米', '国') => -4268, ('1', '1') => -669, ('ク', '゙') => 1319,}; - static ref BW3: FnvHashMap<(char, char), i32> = hashmap! { ('あ', 'た') => -2194, ('あ', 'り') => 719, ('あ', 'る') => 3846, ('い', '.') => -1185, ('い', '。') => -1185, ('い', 'い') => 5308, ('い', 'え') => 2079, ('い', 'く') => 3029, ('い', 'た') => 2056, ('い', 'っ') => 1883, ('い', 'る') => 5600, ('い', 'わ') => 1527, ('う', 'ち') => 1117, ('う', 'と') => 4798, ('え', 'と') => 1454, ('か', '.') => 2857, ('か', '。') => 2857, ('か', 'け') => -743, ('か', 'っ') => -4098, ('か', 'に') => -669, ('か', 'ら') => 6520, ('か', 'り') => -2670, ('が', ',') => 1816, ('が', '、') => 1816, ('が', 'き') => -4855, ('が', 'け') => -1127, ('が', 'っ') => -913, ('が', 'ら') => -4977, ('が', 'り') => -2064, ('き', 'た') => 1645, ('け', 'ど') => 1374, ('こ', 'と') => 7397, ('こ', 'の') => 1542, ('こ', 'ろ') => -2757, ('さ', 'い') => -714, ('さ', 'を') => 976, ('し', ',') => 1557, ('し', '、') => 1557, ('し', 'い') => -3714, ('し', 'た') => 3562, ('し', 'て') => 1449, ('し', 'な') => 2608, ('し', 'ま') => 1200, ('す', '.') => -1310, ('す', '。') => -1310, ('す', 'る') => 6521, ('ず', ',') => 3426, ('ず', '、') => 3426, ('ず', 'に') => 841, ('そ', 'う') => 428, ('た', '.') => 8875, ('た', '。') => 8875, ('た', 'い') => -594, ('た', 'の') => 812, ('た', 'り') => -1183, ('た', 'る') => -853, ('だ', '.') => 4098, ('だ', '。') => 4098, ('だ', 'っ') => 1004, ('っ', 'た') => -4748, ('っ', 'て') => 300, ('て', 'い') => 6240, ('て', 'お') => 855, ('て', 'も') => 302, ('で', 'す') => 1437, ('で', 'に') => -1482, ('で', 'は') => 2295, ('と', 'う') => -1387, ('と', 'し') => 2266, ('と', 'の') => 541, ('と', 'も') => -3543, ('ど', 'う') => 4664, ('な', 'い') => 1796, ('な', 'く') => -903, ('な', 'ど') => 2135, ('に', ',') => -1021, ('に', '、') => -1021, ('に', 'し') => 1771, ('に', 'な') => 1906, ('に', 'は') => 2644, ('の', ',') => -724, ('の', '、') => -724, ('の', '子') => -1000, ('は', ',') => 1337, ('は', '、') => 1337, ('べ', 'き') => 2181, ('ま', 'し') => 1113, ('ま', 'す') => 6943, ('ま', 'っ') => -1549, ('ま', 'で') => 6154, ('ま', 'れ') => -793, ('ら', 'し') => 1479, ('ら', 'れ') => 6820, ('る', 'る') => 3818, ('れ', ',') => 854, ('れ', '、') => 854, ('れ', 'た') => 1850, ('れ', 'て') => 1375, ('れ', 'ば') => -3246, ('れ', 'る') => 1091, ('わ', 'れ') => -605, ('ん', 'だ') => 606, ('ん', 'で') => 798, ('カ', '月') => 990, ('会', '議') => 860, ('入', 'り') => 1232, ('大', '会') => 2217, ('始', 'め') => 1681, ('市', ' ') => 965, ('新', '聞') => -5055, ('日', ',') => 974, ('日', '、') => 974, ('社', '会') => 2024, ('カ', '月') => 990, }; -} - -lazy_static! { - static ref TC1: FnvHashMap<(char, char, char), i32> = hashmap! { ('A', 'A', 'A') => 1093, ('H', 'H', 'H') => 1029, ('H', 'H', 'M') => 580, ('H', 'I', 'I') => 998, ('H', 'O', 'H') => -390, ('H', 'O', 'M') => -331, ('I', 'H', 'I') => 1169, ('I', 'O', 'H') => -142, ('I', 'O', 'I') => -1015, ('I', 'O', 'M') => 467, ('M', 'M', 'H') => 187, ('O', 'O', 'I') => -1832, }; - static ref TC2: FnvHashMap<(char, char, char), i32> = hashmap! { ('H', 'H', 'O') => 2088, ('H', 'I', 'I') => -1023, ('H', 'M', 'M') => -1154, ('I', 'H', 'I') => -1965, ('K', 'K', 'H') => 703, ('O', 'I', 'I') => -2649, }; - static ref TC3: FnvHashMap<(char, char, char), i32> = hashmap! { ('A', 'A', 'A') => -294, ('H', 'H', 'H') => 346, ('H', 'H', 'I') => -341, ('H', 'I', 'I') => -1088, ('H', 'I', 'K') => 731, ('H', 'O', 'H') => -1486, ('I', 'H', 'H') => 128, ('I', 'H', 'I') => -3041, ('I', 'H', 'O') => -1935, ('I', 'I', 'H') => -825, ('I', 'I', 'M') => -1035, ('I', 'O', 'I') => -542, ('K', 'H', 'H') => -1216, ('K', 'K', 'A') => 491, ('K', 'K', 'H') => -1217, ('K', 'O', 'K') => -1009, ('M', 'H', 'H') => -2694, ('M', 'H', 'M') => -457, ('M', 'H', 'O') => 123, ('M', 'M', 'H') => -471, ('N', 'N', 'H') => -1689, ('N', 'N', 'O') => 662, ('O', 'H', 'O') => -3393, }; - static ref TC4: FnvHashMap<(char, char, char), i32> = hashmap! { ('H', 'H', 'H') => -203, ('H', 'H', 'I') => 1344, ('H', 'H', 'K') => 365, ('H', 'H', 'M') => -122, ('H', 'H', 'N') => 182, ('H', 'H', 'O') => 669, ('H', 'I', 'H') => 804, ('H', 'I', 'I') => 679, ('H', 'O', 'H') => 446, ('I', 'H', 'H') => 695, ('I', 'H', 'O') => -2324, ('I', 'I', 'H') => 321, ('I', 'I', 'I') => 1497, ('I', 'I', 'O') => 656, ('I', 'O', 'O') => 54, ('K', 'A', 'K') => 4845, ('K', 'K', 'A') => 3386, ('K', 'K', 'K') => 3065, ('M', 'H', 'H') => -405, ('M', 'H', 'I') => 201, ('M', 'M', 'H') => -241, ('M', 'M', 'M') => 661, ('M', 'O', 'M') => 841, }; - static ref TQ1: FnvHashMap<(char, char, char, char), i32> = hashmap! { ('B', 'H', 'H', 'H') => -227, ('B', 'H', 'H', 'I') => 316, ('B', 'H', 'I', 'H') => -132, ('B', 'I', 'H', 'H') => 60, ('B', 'I', 'I', 'I') => 1595, ('B', 'N', 'H', 'H') => -744, ('B', 'O', 'H', 'H') => 225, ('B', 'O', 'O', 'O') => -908, ('O', 'A', 'K', 'K') => 482, ('O', 'H', 'H', 'H') => 281, ('O', 'H', 'I', 'H') => 249, ('O', 'I', 'H', 'I') => 200, ('O', 'I', 'I', 'H') => -68, }; - static ref TQ2: FnvHashMap<(char, char, char, char), i32> = hashmap! { ('B', 'I', 'H', 'H') => -1401, ('B', 'I', 'I', 'I') => -1033, ('B', 'K', 'A', 'K') => -543, ('B', 'O', 'O', 'O') => -5591, }; - static ref TQ3: FnvHashMap<(char, char, char, char), i32> = hashmap! { ('B', 'H', 'H', 'H') => 478, ('B', 'H', 'H', 'M') => -1073, ('B', 'H', 'I', 'H') => 222, ('B', 'H', 'I', 'I') => -504, ('B', 'I', 'I', 'H') => -116, ('B', 'I', 'I', 'I') => -105, ('B', 'M', 'H', 'I') => -863, ('B', 'M', 'H', 'M') => -464, ('B', 'O', 'M', 'H') => 620, ('O', 'H', 'H', 'H') => 346, ('O', 'H', 'H', 'I') => 1729, ('O', 'H', 'I', 'I') => 997, ('O', 'H', 'M', 'H') => 481, ('O', 'I', 'H', 'H') => 623, ('O', 'I', 'I', 'H') => 1344, ('O', 'K', 'A', 'K') => 2792, ('O', 'K', 'H', 'H') => 587, ('O', 'K', 'K', 'A') => 679, ('O', 'O', 'H', 'H') => 110, ('O', 'O', 'I', 'I') => -685, }; - static ref TQ4: FnvHashMap<(char, char, char, char), i32> = hashmap! { ('B', 'H', 'H', 'H') => -721, ('B', 'H', 'H', 'M') => -3604, ('B', 'H', 'I', 'I') => -966, ('B', 'I', 'I', 'H') => -607, ('B', 'I', 'I', 'I') => -2181, ('O', 'A', 'A', 'A') => -2763, ('O', 'A', 'K', 'K') => 180, ('O', 'H', 'H', 'H') => -294, ('O', 'H', 'H', 'I') => 2446, ('O', 'H', 'H', 'O') => 480, ('O', 'H', 'I', 'H') => -1573, ('O', 'I', 'H', 'H') => 1935, ('O', 'I', 'H', 'I') => -493, ('O', 'I', 'I', 'H') => 626, ('O', 'I', 'I', 'I') => -4007, ('O', 'K', 'A', 'K') => -8156, }; - static ref TW1: FnvHashMap<(char, char, char), i32> = hashmap! { ('に', 'つ', 'い') => -4681, ('東', '京', '都') => 2026, }; - static ref TW2: FnvHashMap<(char, char, char), i32> = hashmap! { ('あ', 'る', '程') => -2049, ('い', 'っ', 'た') => -1256, ('こ', 'ろ', 'が') => -2434, ('し', 'ょ', 'う') => 3873, ('そ', 'の', '後') => -4430, ('だ', 'っ', 'て') => -1049, ('て', 'い', 'た') => 1833, ('と', 'し', 'て') => -4657, ('と', 'も', 'に') => -4517, ('も', 'の', 'で') => 1882, ('一', '気', 'に') => -792, ('初', 'め', 'て') => -1512, ('同', '時', 'に') => -8097, ('大', 'き', 'な') => -1255, ('対', 'し', 'て') => -2721, ('社', '会', '党') => -3216, }; - static ref TW3: FnvHashMap<(char, char, char), i32> = hashmap! { ('い', 'た', 'だ') => -1734, ('し', 'て', 'い') => 1314, ('と', 'し', 'て') => -4314, ('に', 'つ', 'い') => -5483, ('に', 'と', 'っ') => -5989, ('に', '当', 'た') => -6247, ('の', 'で', ',') => -727, ('の', 'で', '、') => -727, ('の', 'も', 'の') => -600, ('れ', 'か', 'ら') => -3752, ('十', '二', '月') => -2287, }; - static ref TW4: FnvHashMap<(char, char, char), i32> = hashmap! { ('い', 'う', '.') => 8576, ('い', 'う', '。') => 8576, ('か', 'ら', 'な') => -2348, ('し', 'て', 'い') => 2958, ('た', 'が', ',') => 1516, ('た', 'が', '、') => 1516, ('て', 'い', 'る') => 1538, ('と', 'い', 'う') => 1349, ('ま', 'し', 'た') => 5543, ('ま', 'せ', 'ん') => 1097, ('よ', 'う', 'と') => -4258, ('よ', 'る', 'と') => 5865, }; -} - -lazy_static! { - static ref UC1: FnvHashMap = hashmap! { 'A' => 484, 'K' => 93, 'M' => 645, 'O' => -505, }; - static ref UC2: FnvHashMap = hashmap! { 'A' => 819, 'H' => 1059, 'I' => 409, 'M' => 3987, 'N' => 5775, 'O' => 646, }; - static ref UC3: FnvHashMap = hashmap! { 'A' => -1370, 'I' => 2311, }; - static ref UC4: FnvHashMap = hashmap! { 'A' => -2643, 'H' => 1809, 'I' => -1032, 'K' => -3450, 'M' => 3565, 'N' => 3876, 'O' => 6646, }; - static ref UC5: FnvHashMap = hashmap! { 'H' => 313, 'I' => -1238, 'K' => -799, 'M' => 539, 'O' => -831, }; - static ref UC6: FnvHashMap = hashmap! { 'H' => -506, 'I' => -253, 'K' => 87, 'M' => 247, 'O' => -387, }; - static ref UP1: FnvHashMap = hashmap! { 'O' => -214, }; - static ref UP2: FnvHashMap = hashmap! { 'B' => 69, 'O' => 935, }; - static ref UP3: FnvHashMap = hashmap! { 'B' => 189, }; - static ref UQ1: FnvHashMap<(char, char), i32> = hashmap! { ('B', 'H') => 21, ('B', 'I') => -12, ('B', 'K') => -99, ('B', 'N') => 142, ('B', 'O') => -56, ('O', 'H') => -95, ('O', 'I') => 477, ('O', 'K') => 410, ('O', 'O') => -2422, }; - static ref UQ2: FnvHashMap<(char, char), i32> = hashmap! { ('B', 'H') => 216, ('B', 'I') => 113, ('O', 'K') => 1759, }; - static ref UQ3: FnvHashMap<(char, char), i32> = hashmap! { ('B', 'A') => -479, ('B', 'H') => 42, ('B', 'I') => 1913, ('B', 'K') => -7198, ('B', 'M') => 3160, ('B', 'N') => 6427, ('B', 'O') => 14761, ('O', 'I') => -827, ('O', 'N') => -3212, }; - static ref UW1: FnvHashMap = hashmap! { ',' => 156, '、' => 156, '「' => -463, 'あ' => -941, 'う' => -127, 'が' => -553, 'き' => 121, 'こ' => 505, 'で' => -201, 'と' => -547, 'ど' => -123, 'に' => -789, 'の' => -185, 'は' => -847, 'も' => -466, 'や' => -470, 'よ' => 182, 'ら' => -292, 'り' => 208, 'れ' => 169, 'を' => -446, 'ん' => -137, '・' => -135, '主' => -402, '京' => -268, '区' => -912, '午' => 871, '国' => -460, '大' => 561, '委' => 729, '市' => -411, '日' => -141, '理' => 361, '生' => -408, '県' => -386, '都' => -718, '「' => -463, '・' => -135, }; - static ref UW2: FnvHashMap = hashmap! { ',' => -829, '、' => -829, '〇' => 892, '「' => -645, '」' => 3145, 'あ' => -538, 'い' => 505, 'う' => 134, 'お' => -502, 'か' => 1454, 'が' => -856, 'く' => -412, 'こ' => 1141, 'さ' => 878, 'ざ' => 540, 'し' => 1529, 'す' => -675, 'せ' => 300, 'そ' => -1011, 'た' => 188, 'だ' => 1837, 'つ' => -949, 'て' => -291, 'で' => -268, 'と' => -981, 'ど' => 1273, 'な' => 1063, 'に' => -1764, 'の' => 130, 'は' => -409, 'ひ' => -1273, 'べ' => 1261, 'ま' => 600, 'も' => -1263, 'や' => -402, 'よ' => 1639, 'り' => -579, 'る' => -694, 'れ' => 571, 'を' => -2516, 'ん' => 2095, 'ア' => -587, 'カ' => 306, 'キ' => 568, 'ッ' => 831, '三' => -758, '不' => -2150, '世' => -302, '中' => -968, '主' => -861, '事' => 492, '人' => -123, '会' => 978, '保' => 362, '入' => 548, '初' => -3025, '副' => -1566, '北' => -3414, '区' => -422, '大' => -1769, '天' => -865, '太' => -483, '子' => -1519, '学' => 760, '実' => 1023, '小' => -2009, '市' => -813, '年' => -1060, '強' => 1067, '手' => -1519, '揺' => -1033, '政' => 1522, '文' => -1355, '新' => -1682, '日' => -1815, '明' => -1462, '最' => -630, '朝' => -1843, '本' => -1650, '東' => -931, '果' => -665, '次' => -2378, '民' => -180, '気' => -1740, '理' => 752, '発' => 529, '目' => -1584, '相' => -242, '県' => -1165, '立' => -763, '第' => 810, '米' => 509, '自' => -1353, '行' => 838, '西' => -744, '見' => -3874, '調' => 1010, '議' => 1198, '込' => 3041, '開' => 1758, '間' => -1257, '「' => -645, '」' => 3145, 'ッ' => 831, 'ア' => -587, 'カ' => 306, 'キ' => 568, }; - static ref UW3: FnvHashMap = hashmap! { ',' => 4889, '1' => -800, '−' => -1723, '、' => 4889, '々' => -2311, '〇' => 5827, '」' => 2670, '〓' => -3573, 'あ' => -2696, 'い' => 1006, 'う' => 2342, 'え' => 1983, 'お' => -4864, 'か' => -1163, 'が' => 3271, 'く' => 1004, 'け' => 388, 'げ' => 401, 'こ' => -3552, 'ご' => -3116, 'さ' => -1058, 'し' => -395, 'す' => 584, 'せ' => 3685, 'そ' => -5228, 'た' => 842, 'ち' => -521, 'っ' => -1444, 'つ' => -1081, 'て' => 6167, 'で' => 2318, 'と' => 1691, 'ど' => -899, 'な' => -2788, 'に' => 2745, 'の' => 4056, 'は' => 4555, 'ひ' => -2171, 'ふ' => -1798, 'へ' => 1199, 'ほ' => -5516, 'ま' => -4384, 'み' => -120, 'め' => 1205, 'も' => 2323, 'や' => -788, 'よ' => -202, 'ら' => 727, 'り' => 649, 'る' => 5905, 'れ' => 2773, 'わ' => -1207, 'を' => 6620, 'ん' => -518, 'ア' => 551, 'グ' => 1319, 'ス' => 874, 'ッ' => -1350, 'ト' => 521, 'ム' => 1109, 'ル' => 1591, 'ロ' => 2201, 'ン' => 278, '・' => -3794, '一' => -1619, '下' => -1759, '世' => -2087, '両' => 3815, '中' => 653, '主' => -758, '予' => -1193, '二' => 974, '人' => 2742, '今' => 792, '他' => 1889, '以' => -1368, '低' => 811, '何' => 4265, '作' => -361, '保' => -2439, '元' => 4858, '党' => 3593, '全' => 1574, '公' => -3030, '六' => 755, '共' => -1880, '円' => 5807, '再' => 3095, '分' => 457, '初' => 2475, '別' => 1129, '前' => 2286, '副' => 4437, '力' => 365, '動' => -949, '務' => -1872, '化' => 1327, '北' => -1038, '区' => 4646, '千' => -2309, '午' => -783, '協' => -1006, '口' => 483, '右' => 1233, '各' => 3588, '合' => -241, '同' => 3906, '和' => -837, '員' => 4513, '国' => 642, '型' => 1389, '場' => 1219, '外' => -241, '妻' => 2016, '学' => -1356, '安' => -423, '実' => -1008, '家' => 1078, '小' => -513, '少' => -3102, '州' => 1155, '市' => 3197, '平' => -1804, '年' => 2416, '広' => -1030, '府' => 1605, '度' => 1452, '建' => -2352, '当' => -3885, '得' => 1905, '思' => -1291, '性' => 1822, '戸' => -488, '指' => -3973, '政' => -2013, '教' => -1479, '数' => 3222, '文' => -1489, '新' => 1764, '日' => 2099, '旧' => 5792, '昨' => -661, '時' => -1248, '曜' => -951, '最' => -937, '月' => 4125, '期' => 360, '李' => 3094, '村' => 364, '東' => -805, '核' => 5156, '森' => 2438, '業' => 484, '氏' => 2613, '民' => -1694, '決' => -1073, '法' => 1868, '海' => -495, '無' => 979, '物' => 461, '特' => -3850, '生' => -273, '用' => 914, '町' => 1215, '的' => 7313, '直' => -1835, '省' => 792, '県' => 6293, '知' => -1528, '私' => 4231, '税' => 401, '立' => -960, '第' => 1201, '米' => 7767, '系' => 3066, '約' => 3663, '級' => 1384, '統' => -4229, '総' => 1163, '線' => 1255, '者' => 6457, '能' => 725, '自' => -2869, '英' => 785, '見' => 1044, '調' => -562, '財' => -733, '費' => 1777, '車' => 1835, '軍' => 1375, '込' => -1504, '通' => -1136, '選' => -681, '郎' => 1026, '郡' => 4404, '部' => 1200, '金' => 2163, '長' => 421, '開' => -1432, '間' => 1302, '関' => -1282, '雨' => 2009, '電' => -1045, '非' => 2066, '駅' => 1620, '1' => -800, '」' => 2670, '・' => -3794, 'ッ' => -1350, 'ア' => 551, 'ス' => 874, 'ト' => 521, 'ム' => 1109, 'ル' => 1591, 'ロ' => 2201, 'ン' => 278, }; - static ref UW4: FnvHashMap = hashmap! { ',' => 3930, '.' => 3508, '―' => -4841, '、' => 3930, '。' => 3508, '〇' => 4999, '「' => 1895, '」' => 3798, '〓' => -5156, 'あ' => 4752, 'い' => -3435, 'う' => -640, 'え' => -2514, 'お' => 2405, 'か' => 530, 'が' => 6006, 'き' => -4482, 'ぎ' => -3821, 'く' => -3788, 'け' => -4376, 'げ' => -4734, 'こ' => 2255, 'ご' => 1979, 'さ' => 2864, 'し' => -843, 'じ' => -2506, 'す' => -731, 'ず' => 1251, 'せ' => 181, 'そ' => 4091, 'た' => 5034, 'だ' => 5408, 'ち' => -3654, 'っ' => -5882, 'つ' => -1659, 'て' => 3994, 'で' => 7410, 'と' => 4547, 'な' => 5433, 'に' => 6499, 'ぬ' => 1853, 'ね' => 1413, 'の' => 7396, 'は' => 8578, 'ば' => 1940, 'ひ' => 4249, 'び' => -4134, 'ふ' => 1345, 'へ' => 6665, 'べ' => -744, 'ほ' => 1464, 'ま' => 1051, 'み' => -2082, 'む' => -882, 'め' => -5046, 'も' => 4169, 'ゃ' => -2666, 'や' => 2795, 'ょ' => -1544, 'よ' => 3351, 'ら' => -2922, 'り' => -9726, 'る' => -14896, 'れ' => -2613, 'ろ' => -4570, 'わ' => -1783, 'を' => 13150, 'ん' => -2352, 'カ' => 2145, 'コ' => 1789, 'セ' => 1287, 'ッ' => -724, 'ト' => -403, 'メ' => -1635, 'ラ' => -881, 'リ' => -541, 'ル' => -856, 'ン' => -3637, '・' => -4371, 'ー' => -11870, '一' => -2069, '中' => 2210, '予' => 782, '事' => -190, '井' => -1768, '人' => 1036, '以' => 544, '会' => 950, '体' => -1286, '作' => 530, '側' => 4292, '先' => 601, '党' => -2006, '共' => -1212, '内' => 584, '円' => 788, '初' => 1347, '前' => 1623, '副' => 3879, '力' => -302, '動' => -740, '務' => -2715, '化' => 776, '区' => 4517, '協' => 1013, '参' => 1555, '合' => -1834, '和' => -681, '員' => -910, '器' => -851, '回' => 1500, '国' => -619, '園' => -1200, '地' => 866, '場' => -1410, '塁' => -2094, '士' => -1413, '多' => 1067, '大' => 571, '子' => -4802, '学' => -1397, '定' => -1057, '寺' => -809, '小' => 1910, '屋' => -1328, '山' => -1500, '島' => -2056, '川' => -2667, '市' => 2771, '年' => 374, '庁' => -4556, '後' => 456, '性' => 553, '感' => 916, '所' => -1566, '支' => 856, '改' => 787, '政' => 2182, '教' => 704, '文' => 522, '方' => -856, '日' => 1798, '時' => 1829, '最' => 845, '月' => -9066, '木' => -485, '来' => -442, '校' => -360, '業' => -1043, '氏' => 5388, '民' => -2716, '気' => -910, '沢' => -939, '済' => -543, '物' => -735, '率' => 672, '球' => -1267, '生' => -1286, '産' => -1101, '田' => -2900, '町' => 1826, '的' => 2586, '目' => 922, '省' => -3485, '県' => 2997, '空' => -867, '立' => -2112, '第' => 788, '米' => 2937, '系' => 786, '約' => 2171, '経' => 1146, '統' => -1169, '総' => 940, '線' => -994, '署' => 749, '者' => 2145, '能' => -730, '般' => -852, '行' => -792, '規' => 792, '警' => -1184, '議' => -244, '谷' => -1000, '賞' => 730, '車' => -1481, '軍' => 1158, '輪' => -1433, '込' => -3370, '近' => 929, '道' => -1291, '選' => 2596, '郎' => -4866, '都' => 1192, '野' => -1100, '銀' => -2213, '長' => 357, '間' => -2344, '院' => -2297, '際' => -2604, '電' => -878, '領' => -1659, '題' => -792, '館' => -1984, '首' => 1749, '高' => 2120, '「' => 1895, '」' => 3798, '・' => -4371, 'ッ' => -724, 'ー' => -11870, 'カ' => 2145, 'コ' => 1789, 'セ' => 1287, 'ト' => -403, 'メ' => -1635, 'ラ' => -881, 'リ' => -541, 'ル' => -856, 'ン' => -3637, }; - static ref UW5: FnvHashMap = hashmap! { ',' => 465, '.' => -299, '1' => -514, *E2 => -32768, ']' => -2762, '、' => 465, '。' => -299, '「' => 363, 'あ' => 1655, 'い' => 331, 'う' => -503, 'え' => 1199, 'お' => 527, 'か' => 647, 'が' => -421, 'き' => 1624, 'ぎ' => 1971, 'く' => 312, 'げ' => -983, 'さ' => -1537, 'し' => -1371, 'す' => -852, 'だ' => -1186, 'ち' => 1093, 'っ' => 52, 'つ' => 921, 'て' => -18, 'で' => -850, 'と' => -127, 'ど' => 1682, 'な' => -787, 'に' => -1224, 'の' => -635, 'は' => -578, 'べ' => 1001, 'み' => 502, 'め' => 865, 'ゃ' => 3350, 'ょ' => 854, 'り' => -208, 'る' => 429, 'れ' => 504, 'わ' => 419, 'を' => -1264, 'ん' => 327, 'イ' => 241, 'ル' => 451, 'ン' => -343, '中' => -871, '京' => 722, '会' => -1153, '党' => -654, '務' => 3519, '区' => -901, '告' => 848, '員' => 2104, '大' => -1296, '学' => -548, '定' => 1785, '嵐' => -1304, '市' => -2991, '席' => 921, '年' => 1763, '思' => 872, '所' => -814, '挙' => 1618, '新' => -1682, '日' => 218, '月' => -4353, '査' => 932, '格' => 1356, '機' => -1508, '氏' => -1347, '田' => 240, '町' => -3912, '的' => -3149, '相' => 1319, '省' => -1052, '県' => -4003, '研' => -997, '社' => -278, '空' => -813, '統' => 1955, '者' => -2233, '表' => 663, '語' => -1073, '議' => 1219, '選' => -1018, '郎' => -368, '長' => 786, '間' => 1191, '題' => 2368, '館' => -689, '1' => -514, '「' => 363, 'イ' => 241, 'ル' => 451, 'ン' => -343, }; - static ref UW6: FnvHashMap = hashmap! { ',' => 227, '.' => 808, '1' => -270, *E1 => 306, '、' => 227, '。' => 808, 'あ' => -307, 'う' => 189, 'か' => 241, 'が' => -73, 'く' => -121, 'こ' => -200, 'じ' => 1782, 'す' => 383, 'た' => -428, 'っ' => 573, 'て' => -1014, 'で' => 101, 'と' => -105, 'な' => -253, 'に' => -149, 'の' => -417, 'は' => -236, 'も' => -206, 'り' => 187, 'る' => -135, 'を' => 195, 'ル' => -673, 'ン' => -496, '一' => -277, '中' => 201, '件' => -800, '会' => 624, '前' => 302, '区' => 1792, '員' => -1212, '委' => 798, '学' => -960, '市' => 887, '広' => -695, '後' => 535, '業' => -697, '相' => 753, '社' => -507, '福' => 974, '空' => -822, '者' => 1811, '連' => 463, '郎' => 1082, '1' => -270, 'ル' => -673, 'ン' => -496, }; } \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 90ed5df..3ef28d7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,15 +1,8 @@ #[macro_use] extern crate lazy_static; -extern crate fnv; - use std::char; -use std::hash::Hash; include!("constants.rs"); -fn get_score(d: &FnvHashMap, s: &T) -> i32 { - d.get(s).cloned().unwrap_or(0) -} - fn get_ctype(c: char) -> char { match c as u32 { 0x4E00|0x4E8C|0x4E09|0x56DB|0x4E94|0x516D|0x4E03|0x516B|0x4E5D|0x5341 => 'M', @@ -50,48 +43,64 @@ pub fn tokenize(s: &str) -> Vec { let w = &segments[index - 3 .. index + 3]; let c = &ctypes[index - 3 .. index + 3]; - score = score + get_score(&*UP1, &p[0]); - score = score + get_score(&*UP2, &p[1]); - score = score + get_score(&*UP3, &p[2]); - score = score + get_score(&*BP1, &(p[0], p[1])); - score = score + get_score(&*BP2, &(p[1], p[2])); - score = score + get_score(&*UW1, &w[0]); - score = score + get_score(&*UW2, &w[1]); - score = score + get_score(&*UW3, &w[2]); - score = score + get_score(&*UW4, &w[3]); - score = score + get_score(&*UW5, &w[4]); - score = score + get_score(&*UW6, &w[5]); - score = score + get_score(&*BW1, &(w[1], w[2])); - score = score + get_score(&*BW2, &(w[2], w[3])); - score = score + get_score(&*BW3, &(w[3], w[4])); - score = score + get_score(&*TW1, &(w[0], w[1], w[2])); - score = score + get_score(&*TW2, &(w[1], w[2], w[3])); - score = score + get_score(&*TW3, &(w[2], w[3], w[4])); - score = score + get_score(&*TW4, &(w[3], w[4], w[5])); - score = score + get_score(&*UC1, &c[0]); - score = score + get_score(&*UC2, &c[1]); - score = score + get_score(&*UC3, &c[2]); - score = score + get_score(&*UC4, &c[3]); - score = score + get_score(&*UC5, &c[4]); - score = score + get_score(&*UC6, &c[5]); - score = score + get_score(&*BC1, &(c[1], c[2])); - score = score + get_score(&*BC2, &(c[2], c[3])); - score = score + get_score(&*BC3, &(c[3], c[4])); - score = score + get_score(&*TC1, &(c[0], c[1], c[2])); - score = score + get_score(&*TC2, &(c[1], c[2], c[3])); - score = score + get_score(&*TC3, &(c[2], c[3], c[4])); - score = score + get_score(&*TC4, &(c[3], c[4], c[5])); - score = score + get_score(&*UQ1, &(p[0], c[0])); - score = score + get_score(&*UQ2, &(p[1], c[1])); - score = score + get_score(&*UQ3, &(p[2], c[2])); - score = score + get_score(&*BQ1, &(p[1], c[1], c[2])); - score = score + get_score(&*BQ2, &(p[1], c[2], c[3])); - score = score + get_score(&*BQ3, &(p[2], c[1], c[2])); - score = score + get_score(&*BQ4, &(p[2], c[2], c[3])); - score = score + get_score(&*TQ1, &(p[1], c[0], c[1], c[2])); - score = score + get_score(&*TQ2, &(p[1], c[1], c[2], c[3])); - score = score + get_score(&*TQ3, &(p[2], c[0], c[1], c[2])); - score = score + get_score(&*TQ4, &(p[2], c[1], c[2], c[3])); + score += match &(c[0], c[1], c[2]) { ('A', 'A', 'A') => 1093, ('H', 'H', 'H') => 1029, ('H', 'H', 'M') => 580, ('H', 'I', 'I') => 998, ('H', 'O', 'H') => -390, ('H', 'O', 'M') => -331, ('I', 'H', 'I') => 1169, ('I', 'O', 'H') => -142, ('I', 'O', 'I') => -1015, ('I', 'O', 'M') => 467, ('M', 'M', 'H') => 187, ('O', 'O', 'I') => -1832, _ => 0}; + score += match &(c[1], c[2], c[3]) { ('H', 'H', 'O') => 2088, ('H', 'I', 'I') => -1023, ('H', 'M', 'M') => -1154, ('I', 'H', 'I') => -1965, ('K', 'K', 'H') => 703, ('O', 'I', 'I') => -2649, _ => 0}; + score += match &(c[2], c[3], c[4]) { ('A', 'A', 'A') => -294, ('H', 'H', 'H') => 346, ('H', 'H', 'I') => -341, ('H', 'I', 'I') => -1088, ('H', 'I', 'K') => 731, ('H', 'O', 'H') => -1486, ('I', 'H', 'H') => 128, ('I', 'H', 'I') => -3041, ('I', 'H', 'O') => -1935, ('I', 'I', 'H') => -825, ('I', 'I', 'M') => -1035, ('I', 'O', 'I') => -542, ('K', 'H', 'H') => -1216, ('K', 'K', 'A') => 491, ('K', 'K', 'H') => -1217, ('K', 'O', 'K') => -1009, ('M', 'H', 'H') => -2694, ('M', 'H', 'M') => -457, ('M', 'H', 'O') => 123, ('M', 'M', 'H') => -471, ('N', 'N', 'H') => -1689, ('N', 'N', 'O') => 662, ('O', 'H', 'O') => -3393, _ => 0}; + score += match &(c[3], c[4], c[5]) { ('H', 'H', 'H') => -203, ('H', 'H', 'I') => 1344, ('H', 'H', 'K') => 365, ('H', 'H', 'M') => -122, ('H', 'H', 'N') => 182, ('H', 'H', 'O') => 669, ('H', 'I', 'H') => 804, ('H', 'I', 'I') => 679, ('H', 'O', 'H') => 446, ('I', 'H', 'H') => 695, ('I', 'H', 'O') => -2324, ('I', 'I', 'H') => 321, ('I', 'I', 'I') => 1497, ('I', 'I', 'O') => 656, ('I', 'O', 'O') => 54, ('K', 'A', 'K') => 4845, ('K', 'K', 'A') => 3386, ('K', 'K', 'K') => 3065, ('M', 'H', 'H') => -405, ('M', 'H', 'I') => 201, ('M', 'M', 'H') => -241, ('M', 'M', 'M') => 661, ('M', 'O', 'M') => 841, _ => 0}; + score += match &(p[1], c[0], c[1], c[2]) { ('B', 'H', 'H', 'H') => -227, ('B', 'H', 'H', 'I') => 316, ('B', 'H', 'I', 'H') => -132, ('B', 'I', 'H', 'H') => 60, ('B', 'I', 'I', 'I') => 1595, ('B', 'N', 'H', 'H') => -744, ('B', 'O', 'H', 'H') => 225, ('B', 'O', 'O', 'O') => -908, ('O', 'A', 'K', 'K') => 482, ('O', 'H', 'H', 'H') => 281, ('O', 'H', 'I', 'H') => 249, ('O', 'I', 'H', 'I') => 200, ('O', 'I', 'I', 'H') => -68, _ => 0}; + score += match &(p[1], c[1], c[2], c[3]) { ('B', 'I', 'H', 'H') => -1401, ('B', 'I', 'I', 'I') => -1033, ('B', 'K', 'A', 'K') => -543, ('B', 'O', 'O', 'O') => -5591, _ => 0}; + score += match &(p[2], c[0], c[1], c[2]) { ('B', 'H', 'H', 'H') => 478, ('B', 'H', 'H', 'M') => -1073, ('B', 'H', 'I', 'H') => 222, ('B', 'H', 'I', 'I') => -504, ('B', 'I', 'I', 'H') => -116, ('B', 'I', 'I', 'I') => -105, ('B', 'M', 'H', 'I') => -863, ('B', 'M', 'H', 'M') => -464, ('B', 'O', 'M', 'H') => 620, ('O', 'H', 'H', 'H') => 346, ('O', 'H', 'H', 'I') => 1729, ('O', 'H', 'I', 'I') => 997, ('O', 'H', 'M', 'H') => 481, ('O', 'I', 'H', 'H') => 623, ('O', 'I', 'I', 'H') => 1344, ('O', 'K', 'A', 'K') => 2792, ('O', 'K', 'H', 'H') => 587, ('O', 'K', 'K', 'A') => 679, ('O', 'O', 'H', 'H') => 110, ('O', 'O', 'I', 'I') => -685, _ => 0}; + score += match &(p[2], c[1], c[2], c[3]) { ('B', 'H', 'H', 'H') => -721, ('B', 'H', 'H', 'M') => -3604, ('B', 'H', 'I', 'I') => -966, ('B', 'I', 'I', 'H') => -607, ('B', 'I', 'I', 'I') => -2181, ('O', 'A', 'A', 'A') => -2763, ('O', 'A', 'K', 'K') => 180, ('O', 'H', 'H', 'H') => -294, ('O', 'H', 'H', 'I') => 2446, ('O', 'H', 'H', 'O') => 480, ('O', 'H', 'I', 'H') => -1573, ('O', 'I', 'H', 'H') => 1935, ('O', 'I', 'H', 'I') => -493, ('O', 'I', 'I', 'H') => 626, ('O', 'I', 'I', 'I') => -4007, ('O', 'K', 'A', 'K') => -8156, _ => 0}; + score += match &(w[0], w[1], w[2]) { ('に', 'つ', 'い') => -4681, ('東', '京', '都') => 2026, _ => 0}; + score += match &(w[1], w[2], w[3]) { ('あ', 'る', '程') => -2049, ('い', 'っ', 'た') => -1256, ('こ', 'ろ', 'が') => -2434, ('し', 'ょ', 'う') => 3873, ('そ', 'の', '後') => -4430, ('だ', 'っ', 'て') => -1049, ('て', 'い', 'た') => 1833, ('と', 'し', 'て') => -4657, ('と', 'も', 'に') => -4517, ('も', 'の', 'で') => 1882, ('一', '気', 'に') => -792, ('初', 'め', 'て') => -1512, ('同', '時', 'に') => -8097, ('大', 'き', 'な') => -1255, ('対', 'し', 'て') => -2721, ('社', '会', '党') => -3216, _ => 0}; + score += match &(w[2], w[3], w[4]) { ('い', 'た', 'だ') => -1734, ('し', 'て', 'い') => 1314, ('と', 'し', 'て') => -4314, ('に', 'つ', 'い') => -5483, ('に', 'と', 'っ') => -5989, ('に', '当', 'た') => -6247, ('の', 'で', ',') => -727, ('の', 'で', '、') => -727, ('の', 'も', 'の') => -600, ('れ', 'か', 'ら') => -3752, ('十', '二', '月') => -2287, _ => 0}; + score += match &(w[3], w[4], w[5]) { ('い', 'う', '.') => 8576, ('い', 'う', '。') => 8576, ('か', 'ら', 'な') => -2348, ('し', 'て', 'い') => 2958, ('た', 'が', ',') => 1516, ('た', 'が', '、') => 1516, ('て', 'い', 'る') => 1538, ('と', 'い', 'う') => 1349, ('ま', 'し', 'た') => 5543, ('ま', 'せ', 'ん') => 1097, ('よ', 'う', 'と') => -4258, ('よ', 'る', 'と') => 5865, _ => 0}; + + score += match &c[0] { 'A' => 484, 'K' => 93, 'M' => 645, 'O' => -505, _ => 0 }; + score += match &c[1] { 'A' => 819, 'H' => 1059, 'I' => 409, 'M' => 3987, 'N' => 5775, 'O' => 646, _ => 0 }; + score += match &c[2] { 'A' => -1370, 'I' => 2311, _ => 0 }; + score += match &c[3] { 'A' => -2643, 'H' => 1809, 'I' => -1032, 'K' => -3450, 'M' => 3565, 'N' => 3876, 'O' => 6646, _ => 0 }; + score += match &c[4] { 'H' => 313, 'I' => -1238, 'K' => -799, 'M' => 539, 'O' => -831, _ => 0 }; + score += match &c[5] { 'H' => -506, 'I' => -253, 'K' => 87, 'M' => 247, 'O' => -387, _ => 0 }; + + score += match &p[0] { 'O' => -214, _ => 0}; + score += match &p[1] { 'B' => 69, 'O' => 935, _ => 0}; + score += match &p[2] { 'B' => 189, _ => 0}; + score += match &(p[0], c[0]) { ('B', 'H') => 21, ('B', 'I') => -12, ('B', 'K') => -99, ('B', 'N') => 142, ('B', 'O') => -56, ('O', 'H') => -95, ('O', 'I') => 477, ('O', 'K') => 410, ('O', 'O') => -2422, _ => 0}; + score += match &(p[1], c[1]) { ('B', 'H') => 216, ('B', 'I') => 113, ('O', 'K') => 1759, _ => 0}; + score += match &(p[2], c[2]) { ('B', 'A') => -479, ('B', 'H') => 42, ('B', 'I') => 1913, ('B', 'K') => -7198, ('B', 'M') => 3160, ('B', 'N') => 6427, ('B', 'O') => 14761, ('O', 'I') => -827, ('O', 'N') => -3212, _ => 0}; + score += match &w[0] { ',' => 156, '、' => 156, '「' => -463, 'あ' => -941, 'う' => -127, 'が' => -553, 'き' => 121, 'こ' => 505, 'で' => -201, 'と' => -547, 'ど' => -123, 'に' => -789, 'の' => -185, 'は' => -847, 'も' => -466, 'や' => -470, 'よ' => 182, 'ら' => -292, 'り' => 208, 'れ' => 169, 'を' => -446, 'ん' => -137, '・' => -135, '主' => -402, '京' => -268, '区' => -912, '午' => 871, '国' => -460, '大' => 561, '委' => 729, '市' => -411, '日' => -141, '理' => 361, '生' => -408, '県' => -386, '都' => -718, '「' => -463, '・' => -135, _ => 0}; + score += match &w[1] { ',' => -829, '、' => -829, '〇' => 892, '「' => -645, '」' => 3145, 'あ' => -538, 'い' => 505, 'う' => 134, 'お' => -502, 'か' => 1454, 'が' => -856, 'く' => -412, 'こ' => 1141, 'さ' => 878, 'ざ' => 540, 'し' => 1529, 'す' => -675, 'せ' => 300, 'そ' => -1011, 'た' => 188, 'だ' => 1837, 'つ' => -949, 'て' => -291, 'で' => -268, 'と' => -981, 'ど' => 1273, 'な' => 1063, 'に' => -1764, 'の' => 130, 'は' => -409, 'ひ' => -1273, 'べ' => 1261, 'ま' => 600, 'も' => -1263, 'や' => -402, 'よ' => 1639, 'り' => -579, 'る' => -694, 'れ' => 571, 'を' => -2516, 'ん' => 2095, 'ア' => -587, 'カ' => 306, 'キ' => 568, 'ッ' => 831, '三' => -758, '不' => -2150, '世' => -302, '中' => -968, '主' => -861, '事' => 492, '人' => -123, '会' => 978, '保' => 362, '入' => 548, '初' => -3025, '副' => -1566, '北' => -3414, '区' => -422, '大' => -1769, '天' => -865, '太' => -483, '子' => -1519, '学' => 760, '実' => 1023, '小' => -2009, '市' => -813, '年' => -1060, '強' => 1067, '手' => -1519, '揺' => -1033, '政' => 1522, '文' => -1355, '新' => -1682, '日' => -1815, '明' => -1462, '最' => -630, '朝' => -1843, '本' => -1650, '東' => -931, '果' => -665, '次' => -2378, '民' => -180, '気' => -1740, '理' => 752, '発' => 529, '目' => -1584, '相' => -242, '県' => -1165, '立' => -763, '第' => 810, '米' => 509, '自' => -1353, '行' => 838, '西' => -744, '見' => -3874, '調' => 1010, '議' => 1198, '込' => 3041, '開' => 1758, '間' => -1257, '「' => -645, '」' => 3145, 'ッ' => 831, 'ア' => -587, 'カ' => 306, 'キ' => 568, _ => 0}; + score += match &w[2] { ',' => 4889, '1' => -800, '−' => -1723, '、' => 4889, '々' => -2311, '〇' => 5827, '」' => 2670, '〓' => -3573, 'あ' => -2696, 'い' => 1006, 'う' => 2342, 'え' => 1983, 'お' => -4864, 'か' => -1163, 'が' => 3271, 'く' => 1004, 'け' => 388, 'げ' => 401, 'こ' => -3552, 'ご' => -3116, 'さ' => -1058, 'し' => -395, 'す' => 584, 'せ' => 3685, 'そ' => -5228, 'た' => 842, 'ち' => -521, 'っ' => -1444, 'つ' => -1081, 'て' => 6167, 'で' => 2318, 'と' => 1691, 'ど' => -899, 'な' => -2788, 'に' => 2745, 'の' => 4056, 'は' => 4555, 'ひ' => -2171, 'ふ' => -1798, 'へ' => 1199, 'ほ' => -5516, 'ま' => -4384, 'み' => -120, 'め' => 1205, 'も' => 2323, 'や' => -788, 'よ' => -202, 'ら' => 727, 'り' => 649, 'る' => 5905, 'れ' => 2773, 'わ' => -1207, 'を' => 6620, 'ん' => -518, 'ア' => 551, 'グ' => 1319, 'ス' => 874, 'ッ' => -1350, 'ト' => 521, 'ム' => 1109, 'ル' => 1591, 'ロ' => 2201, 'ン' => 278, '・' => -3794, '一' => -1619, '下' => -1759, '世' => -2087, '両' => 3815, '中' => 653, '主' => -758, '予' => -1193, '二' => 974, '人' => 2742, '今' => 792, '他' => 1889, '以' => -1368, '低' => 811, '何' => 4265, '作' => -361, '保' => -2439, '元' => 4858, '党' => 3593, '全' => 1574, '公' => -3030, '六' => 755, '共' => -1880, '円' => 5807, '再' => 3095, '分' => 457, '初' => 2475, '別' => 1129, '前' => 2286, '副' => 4437, '力' => 365, '動' => -949, '務' => -1872, '化' => 1327, '北' => -1038, '区' => 4646, '千' => -2309, '午' => -783, '協' => -1006, '口' => 483, '右' => 1233, '各' => 3588, '合' => -241, '同' => 3906, '和' => -837, '員' => 4513, '国' => 642, '型' => 1389, '場' => 1219, '外' => -241, '妻' => 2016, '学' => -1356, '安' => -423, '実' => -1008, '家' => 1078, '小' => -513, '少' => -3102, '州' => 1155, '市' => 3197, '平' => -1804, '年' => 2416, '広' => -1030, '府' => 1605, '度' => 1452, '建' => -2352, '当' => -3885, '得' => 1905, '思' => -1291, '性' => 1822, '戸' => -488, '指' => -3973, '政' => -2013, '教' => -1479, '数' => 3222, '文' => -1489, '新' => 1764, '日' => 2099, '旧' => 5792, '昨' => -661, '時' => -1248, '曜' => -951, '最' => -937, '月' => 4125, '期' => 360, '李' => 3094, '村' => 364, '東' => -805, '核' => 5156, '森' => 2438, '業' => 484, '氏' => 2613, '民' => -1694, '決' => -1073, '法' => 1868, '海' => -495, '無' => 979, '物' => 461, '特' => -3850, '生' => -273, '用' => 914, '町' => 1215, '的' => 7313, '直' => -1835, '省' => 792, '県' => 6293, '知' => -1528, '私' => 4231, '税' => 401, '立' => -960, '第' => 1201, '米' => 7767, '系' => 3066, '約' => 3663, '級' => 1384, '統' => -4229, '総' => 1163, '線' => 1255, '者' => 6457, '能' => 725, '自' => -2869, '英' => 785, '見' => 1044, '調' => -562, '財' => -733, '費' => 1777, '車' => 1835, '軍' => 1375, '込' => -1504, '通' => -1136, '選' => -681, '郎' => 1026, '郡' => 4404, '部' => 1200, '金' => 2163, '長' => 421, '開' => -1432, '間' => 1302, '関' => -1282, '雨' => 2009, '電' => -1045, '非' => 2066, '駅' => 1620, '1' => -800, '」' => 2670, '・' => -3794, 'ッ' => -1350, 'ア' => 551, 'ス' => 874, 'ト' => 521, 'ム' => 1109, 'ル' => 1591, 'ロ' => 2201, 'ン' => 278, _ => 0}; + score += match &w[3] { ',' => 3930, '.' => 3508, '―' => -4841, '、' => 3930, '。' => 3508, '〇' => 4999, '「' => 1895, '」' => 3798, '〓' => -5156, 'あ' => 4752, 'い' => -3435, 'う' => -640, 'え' => -2514, 'お' => 2405, 'か' => 530, 'が' => 6006, 'き' => -4482, 'ぎ' => -3821, 'く' => -3788, 'け' => -4376, 'げ' => -4734, 'こ' => 2255, 'ご' => 1979, 'さ' => 2864, 'し' => -843, 'じ' => -2506, 'す' => -731, 'ず' => 1251, 'せ' => 181, 'そ' => 4091, 'た' => 5034, 'だ' => 5408, 'ち' => -3654, 'っ' => -5882, 'つ' => -1659, 'て' => 3994, 'で' => 7410, 'と' => 4547, 'な' => 5433, 'に' => 6499, 'ぬ' => 1853, 'ね' => 1413, 'の' => 7396, 'は' => 8578, 'ば' => 1940, 'ひ' => 4249, 'び' => -4134, 'ふ' => 1345, 'へ' => 6665, 'べ' => -744, 'ほ' => 1464, 'ま' => 1051, 'み' => -2082, 'む' => -882, 'め' => -5046, 'も' => 4169, 'ゃ' => -2666, 'や' => 2795, 'ょ' => -1544, 'よ' => 3351, 'ら' => -2922, 'り' => -9726, 'る' => -14896, 'れ' => -2613, 'ろ' => -4570, 'わ' => -1783, 'を' => 13150, 'ん' => -2352, 'カ' => 2145, 'コ' => 1789, 'セ' => 1287, 'ッ' => -724, 'ト' => -403, 'メ' => -1635, 'ラ' => -881, 'リ' => -541, 'ル' => -856, 'ン' => -3637, '・' => -4371, 'ー' => -11870, '一' => -2069, '中' => 2210, '予' => 782, '事' => -190, '井' => -1768, '人' => 1036, '以' => 544, '会' => 950, '体' => -1286, '作' => 530, '側' => 4292, '先' => 601, '党' => -2006, '共' => -1212, '内' => 584, '円' => 788, '初' => 1347, '前' => 1623, '副' => 3879, '力' => -302, '動' => -740, '務' => -2715, '化' => 776, '区' => 4517, '協' => 1013, '参' => 1555, '合' => -1834, '和' => -681, '員' => -910, '器' => -851, '回' => 1500, '国' => -619, '園' => -1200, '地' => 866, '場' => -1410, '塁' => -2094, '士' => -1413, '多' => 1067, '大' => 571, '子' => -4802, '学' => -1397, '定' => -1057, '寺' => -809, '小' => 1910, '屋' => -1328, '山' => -1500, '島' => -2056, '川' => -2667, '市' => 2771, '年' => 374, '庁' => -4556, '後' => 456, '性' => 553, '感' => 916, '所' => -1566, '支' => 856, '改' => 787, '政' => 2182, '教' => 704, '文' => 522, '方' => -856, '日' => 1798, '時' => 1829, '最' => 845, '月' => -9066, '木' => -485, '来' => -442, '校' => -360, '業' => -1043, '氏' => 5388, '民' => -2716, '気' => -910, '沢' => -939, '済' => -543, '物' => -735, '率' => 672, '球' => -1267, '生' => -1286, '産' => -1101, '田' => -2900, '町' => 1826, '的' => 2586, '目' => 922, '省' => -3485, '県' => 2997, '空' => -867, '立' => -2112, '第' => 788, '米' => 2937, '系' => 786, '約' => 2171, '経' => 1146, '統' => -1169, '総' => 940, '線' => -994, '署' => 749, '者' => 2145, '能' => -730, '般' => -852, '行' => -792, '規' => 792, '警' => -1184, '議' => -244, '谷' => -1000, '賞' => 730, '車' => -1481, '軍' => 1158, '輪' => -1433, '込' => -3370, '近' => 929, '道' => -1291, '選' => 2596, '郎' => -4866, '都' => 1192, '野' => -1100, '銀' => -2213, '長' => 357, '間' => -2344, '院' => -2297, '際' => -2604, '電' => -878, '領' => -1659, '題' => -792, '館' => -1984, '首' => 1749, '高' => 2120, '「' => 1895, '」' => 3798, '・' => -4371, 'ッ' => -724, 'ー' => -11870, 'カ' => 2145, 'コ' => 1789, 'セ' => 1287, 'ト' => -403, 'メ' => -1635, 'ラ' => -881, 'リ' => -541, 'ル' => -856, 'ン' => -3637, _ => 0}; + score += match &w[4] { ',' => 465, '.' => -299, '1' => -514, ']' => -2762, '、' => 465, '。' => -299, '「' => 363, 'あ' => 1655, 'い' => 331, 'う' => -503, 'え' => 1199, 'お' => 527, 'か' => 647, 'が' => -421, 'き' => 1624, 'ぎ' => 1971, 'く' => 312, 'げ' => -983, 'さ' => -1537, 'し' => -1371, 'す' => -852, 'だ' => -1186, 'ち' => 1093, 'っ' => 52, 'つ' => 921, 'て' => -18, 'で' => -850, 'と' => -127, 'ど' => 1682, 'な' => -787, 'に' => -1224, 'の' => -635, 'は' => -578, 'べ' => 1001, 'み' => 502, 'め' => 865, 'ゃ' => 3350, 'ょ' => 854, 'り' => -208, 'る' => 429, 'れ' => 504, 'わ' => 419, 'を' => -1264, 'ん' => 327, 'イ' => 241, 'ル' => 451, 'ン' => -343, '中' => -871, '京' => 722, '会' => -1153, '党' => -654, '務' => 3519, '区' => -901, '告' => 848, '員' => 2104, '大' => -1296, '学' => -548, '定' => 1785, '嵐' => -1304, '市' => -2991, '席' => 921, '年' => 1763, '思' => 872, '所' => -814, '挙' => 1618, '新' => -1682, '日' => 218, '月' => -4353, '査' => 932, '格' => 1356, '機' => -1508, '氏' => -1347, '田' => 240, '町' => -3912, '的' => -3149, '相' => 1319, '省' => -1052, '県' => -4003, '研' => -997, '社' => -278, '空' => -813, '統' => 1955, '者' => -2233, '表' => 663, '語' => -1073, '議' => 1219, '選' => -1018, '郎' => -368, '長' => 786, '間' => 1191, '題' => 2368, '館' => -689, '1' => -514, '「' => 363, 'イ' => 241, 'ル' => 451, 'ン' => -343, _ => 0}; + if w[4] == *E2 { + score += -32768; + } + score += match &w[5] { ',' => 227, '.' => 808, '1' => -270, '、' => 227, '。' => 808, 'あ' => -307, 'う' => 189, 'か' => 241, 'が' => -73, 'く' => -121, 'こ' => -200, 'じ' => 1782, 'す' => 383, 'た' => -428, 'っ' => 573, 'て' => -1014, 'で' => 101, 'と' => -105, 'な' => -253, 'に' => -149, 'の' => -417, 'は' => -236, 'も' => -206, 'り' => 187, 'る' => -135, 'を' => 195, 'ル' => -673, 'ン' => -496, '一' => -277, '中' => 201, '件' => -800, '会' => 624, '前' => 302, '区' => 1792, '員' => -1212, '委' => 798, '学' => -960, '市' => 887, '広' => -695, '後' => 535, '業' => -697, '相' => 753, '社' => -507, '福' => 974, '空' => -822, '者' => 1811, '連' => 463, '郎' => 1082, '1' => -270, 'ル' => -673, 'ン' => -496, _ => 0}; + if w[5] == *E1 { + score += 306; + } + + score += match &(c[1], c[2]) { ('H', 'H') => 6, ('I', 'I') => 2461, ('K', 'H') => 406, ('O', 'H') => -1378, _ => 0}; + score += match &(c[2], c[3]) { ('A', 'A') => -3267, ('A', 'I') => 2744, ('A', 'N') => -878, ('H', 'H') => -4070, ('H', 'M') => -1711, ('H', 'N') => 4012, ('H', 'O') => 3761, ('I', 'A') => 1327, ('I', 'H') => -1184, ('I', 'I') => -1332, ('I', 'K') => 1721, ('I', 'O') => 5492, ('K', 'I') => 3831, ('K', 'K') => -8741, ('M', 'H') => -3132, ('M', 'K') => 3334, ('O', 'O') => -2920, _ => 0}; + score += match &(c[3], c[4]) { ('H', 'H') => 996, ('H', 'I') => 626, ('H', 'K') => -721, ('H', 'N') => -1307, ('H', 'O') => -836, ('I', 'H') => -301, ('K', 'K') => 2762, ('M', 'K') => 1079, ('M', 'M') => 4034, ('O', 'A') => -1652, ('O', 'H') => 266, _ => 0}; + score += match &(p[0], p[1]) { ('B', 'B') => 295, ('O', 'B') => 304, ('O', 'O') => -125, ('U', 'B') => 352, _ => 0}; + score += match &(p[1], p[2]) { ('B', 'O') => 60, ('O', 'O') => -1762, _ => 0}; + score += match &(p[1], c[1], c[2]) { ('B', 'H', 'H') => 1150, ('B', 'H', 'M') => 1521, ('B', 'I', 'I') => -1158, ('B', 'I', 'M') => 886, ('B', 'M', 'H') => 1208, ('B', 'N', 'H') => 449, ('B', 'O', 'H') => -91, ('B', 'O', 'O') => -2597, ('O', 'H', 'I') => 451, ('O', 'I', 'H') => -296, ('O', 'K', 'A') => 1851, ('O', 'K', 'H') => -1020, ('O', 'K', 'K') => 904, ('O', 'O', 'O') => 2965, _ => 0}; + score += match &(p[1], c[2], c[3]) { ('B', 'H', 'H') => 118, ('B', 'H', 'I') => -1159, ('B', 'H', 'M') => 466, ('B', 'I', 'H') => -919, ('B', 'K', 'K') => -1720, ('B', 'K', 'O') => 864, ('O', 'H', 'H') => -1139, ('O', 'H', 'M') => -181, ('O', 'I', 'H') => 153, ('U', 'H', 'I') => -1146, _ => 0}; + score += match &(p[2], c[1], c[2]) { ('B', 'H', 'H') => -792, ('B', 'H', 'I') => 2664, ('B', 'I', 'I') => -299, ('B', 'K', 'I') => 419, ('B', 'M', 'H') => 937, ('B', 'M', 'M') => 8335, ('B', 'N', 'N') => 998, ('B', 'O', 'H') => 775, ('O', 'H', 'H') => 2174, ('O', 'H', 'M') => 439, ('O', 'I', 'I') => 280, ('O', 'K', 'H') => 1798, ('O', 'K', 'I') => -793, ('O', 'K', 'O') => -2242, ('O', 'M', 'H') => -2402, ('O', 'O', 'O') => 11699, _ => 0}; + score += match &(p[2], c[2], c[3]) { ('B', 'H', 'H') => -3895, ('B', 'I', 'H') => 3761, ('B', 'I', 'I') => -4654, ('B', 'I', 'K') => 1348, ('B', 'K', 'K') => -1806, ('B', 'M', 'I') => -3385, ('B', 'O', 'O') => -12396, ('O', 'A', 'H') => 926, ('O', 'H', 'H') => 266, ('O', 'H', 'K') => -2036, ('O', 'N', 'N') => -973, _ => 0}; + score += match &(w[1], w[2]) { (',', 'と') => 660, (',', '同') => 727, ('、', 'と') => 660, ('、', '同') => 727, ('」', 'と') => 1682, ('あ', 'っ') => 1505, ('い', 'う') => 1743, ('い', 'っ') => -2055, ('い', 'る') => 672, ('う', 'し') => -4817, ('う', 'ん') => 665, ('か', 'ら') => 3472, ('が', 'ら') => 600, ('こ', 'う') => -790, ('こ', 'と') => 2083, ('こ', 'ん') => -1262, ('さ', 'ら') => -4143, ('さ', 'ん') => 4573, ('し', 'た') => 2641, ('し', 'て') => 1104, ('す', 'で') => -3399, ('そ', 'こ') => 1977, ('そ', 'れ') => -871, ('た', 'ち') => 1122, ('た', 'め') => 601, ('っ', 'た') => 3463, ('つ', 'い') => -802, ('て', 'い') => 805, ('て', 'き') => 1249, ('で', 'き') => 1127, ('で', 'す') => 3445, ('で', 'は') => 844, ('と', 'い') => -4915, ('と', 'み') => 1922, ('ど', 'こ') => 3887, ('な', 'い') => 5713, ('な', 'っ') => 3015, ('な', 'ど') => 7379, ('な', 'ん') => -1113, ('に', 'し') => 2468, ('に', 'は') => 1498, ('に', 'も') => 1671, ('に', '対') => -912, ('の', '一') => -501, ('の', '中') => 741, ('ま', 'せ') => 2448, ('ま', 'で') => 1711, ('ま', 'ま') => 2600, ('ま', 'る') => -2155, ('や', 'む') => -1947, ('よ', 'っ') => -2565, ('れ', 'た') => 2369, ('れ', 'で') => -913, ('を', 'し') => 1860, ('を', '見') => 731, ('亡', 'く') => -1886, ('京', '都') => 2558, ('取', 'り') => -2784, ('大', 'き') => -2604, ('大', '阪') => 1497, ('平', '方') => -2314, ('引', 'き') => -1336, ('日', '本') => -195, ('本', '当') => -2423, ('毎', '日') => -2113, ('目', '指') => -724, ('」', 'と') => 1682, _ => 0}; + if (w[1], w[2]) == (*B1, 'あ') { + score += 1404; + } + if (w[1], w[2]) == (*B1, '同') { + score += 542; + } + + score += match &(w[2], w[3]) { ('.', '.') => -11822, ('1', '1') => -669, ('―', '―') => -5730, ('−', '−') => -13175, ('い', 'う') => -1609, ('う', 'か') => 2490, ('か', 'し') => -1350, ('か', 'も') => -602, ('か', 'ら') => -7194, ('か', 'れ') => 4612, ('が', 'い') => 853, ('が', 'ら') => -3198, ('き', 'た') => 1941, ('く', 'な') => -1597, ('こ', 'と') => -8392, ('こ', 'の') => -4193, ('さ', 'せ') => 4533, ('さ', 'れ') => 13168, ('さ', 'ん') => -3977, ('し', 'い') => -1819, ('し', 'か') => -545, ('し', 'た') => 5078, ('し', 'て') => 972, ('し', 'な') => 939, ('そ', 'の') => -3744, ('た', 'い') => -1253, ('た', 'た') => -662, ('た', 'だ') => -3857, ('た', 'ち') => -786, ('た', 'と') => 1224, ('た', 'は') => -939, ('っ', 'た') => 4589, ('っ', 'て') => 1647, ('っ', 'と') => -2094, ('て', 'い') => 6144, ('て', 'き') => 3640, ('て', 'く') => 2551, ('て', 'は') => -3110, ('て', 'も') => -3065, ('で', 'い') => 2666, ('で', 'き') => -1528, ('で', 'し') => -3828, ('で', 'す') => -4761, ('で', 'も') => -4203, ('と', 'い') => 1890, ('と', 'こ') => -1746, ('と', 'と') => -2279, ('と', 'の') => 720, ('と', 'み') => 5168, ('と', 'も') => -3941, ('な', 'い') => -2488, ('な', 'が') => -1313, ('な', 'ど') => -6509, ('な', 'の') => 2614, ('な', 'ん') => 3099, ('に', 'お') => -1615, ('に', 'し') => 2748, ('に', 'な') => 2454, ('に', 'よ') => -7236, ('に', '対') => -14943, ('に', '従') => -4688, ('に', '関') => -11388, ('の', 'か') => 2093, ('の', 'で') => -7059, ('の', 'に') => -6041, ('の', 'の') => -6125, ('は', 'い') => 1073, ('は', 'が') => -1033, ('は', 'ず') => -2532, ('ば', 'れ') => 1813, ('ま', 'し') => -1316, ('ま', 'で') => -6621, ('ま', 'れ') => 5409, ('め', 'て') => -3153, ('も', 'い') => 2230, ('も', 'の') => -10713, ('ら', 'か') => -944, ('ら', 'し') => -1611, ('ら', 'に') => -1897, ('り', 'し') => 651, ('り', 'ま') => 1620, ('れ', 'た') => 4270, ('れ', 'て') => 849, ('れ', 'ば') => 4114, ('ろ', 'う') => 6067, ('わ', 'れ') => 7901, ('を', '通') => -11877, ('ん', 'だ') => 728, ('ん', 'な') => -4115, ('一', '人') => 602, ('一', '方') => -1375, ('一', '日') => 970, ('一', '部') => -1051, ('上', 'が') => -4479, ('会', '社') => -1116, ('出', 'て') => 2163, ('分', 'の') => -7758, ('同', '党') => 970, ('同', '日') => -913, ('大', '阪') => -2471, ('委', '員') => -1250, ('少', 'な') => -1050, ('年', '度') => -8669, ('年', '間') => -1626, ('府', '県') => -2363, ('手', '権') => -1982, ('新', '聞') => -4066, ('日', '新') => -722, ('日', '本') => -7068, ('日', '米') => 3372, ('曜', '日') => -601, ('朝', '鮮') => -2355, ('本', '人') => -2697, ('東', '京') => -1543, ('然', 'と') => -1384, ('社', '会') => -1276, ('立', 'て') => -990, ('第', 'に') => -1612, ('米', '国') => -4268, ('1', '1') => -669, ('ク', '゙') => 1319,_ => 0}; + score += match &(w[3], w[4]) { ('あ', 'た') => -2194, ('あ', 'り') => 719, ('あ', 'る') => 3846, ('い', '.') => -1185, ('い', '。') => -1185, ('い', 'い') => 5308, ('い', 'え') => 2079, ('い', 'く') => 3029, ('い', 'た') => 2056, ('い', 'っ') => 1883, ('い', 'る') => 5600, ('い', 'わ') => 1527, ('う', 'ち') => 1117, ('う', 'と') => 4798, ('え', 'と') => 1454, ('か', '.') => 2857, ('か', '。') => 2857, ('か', 'け') => -743, ('か', 'っ') => -4098, ('か', 'に') => -669, ('か', 'ら') => 6520, ('か', 'り') => -2670, ('が', ',') => 1816, ('が', '、') => 1816, ('が', 'き') => -4855, ('が', 'け') => -1127, ('が', 'っ') => -913, ('が', 'ら') => -4977, ('が', 'り') => -2064, ('き', 'た') => 1645, ('け', 'ど') => 1374, ('こ', 'と') => 7397, ('こ', 'の') => 1542, ('こ', 'ろ') => -2757, ('さ', 'い') => -714, ('さ', 'を') => 976, ('し', ',') => 1557, ('し', '、') => 1557, ('し', 'い') => -3714, ('し', 'た') => 3562, ('し', 'て') => 1449, ('し', 'な') => 2608, ('し', 'ま') => 1200, ('す', '.') => -1310, ('す', '。') => -1310, ('す', 'る') => 6521, ('ず', ',') => 3426, ('ず', '、') => 3426, ('ず', 'に') => 841, ('そ', 'う') => 428, ('た', '.') => 8875, ('た', '。') => 8875, ('た', 'い') => -594, ('た', 'の') => 812, ('た', 'り') => -1183, ('た', 'る') => -853, ('だ', '.') => 4098, ('だ', '。') => 4098, ('だ', 'っ') => 1004, ('っ', 'た') => -4748, ('っ', 'て') => 300, ('て', 'い') => 6240, ('て', 'お') => 855, ('て', 'も') => 302, ('で', 'す') => 1437, ('で', 'に') => -1482, ('で', 'は') => 2295, ('と', 'う') => -1387, ('と', 'し') => 2266, ('と', 'の') => 541, ('と', 'も') => -3543, ('ど', 'う') => 4664, ('な', 'い') => 1796, ('な', 'く') => -903, ('な', 'ど') => 2135, ('に', ',') => -1021, ('に', '、') => -1021, ('に', 'し') => 1771, ('に', 'な') => 1906, ('に', 'は') => 2644, ('の', ',') => -724, ('の', '、') => -724, ('の', '子') => -1000, ('は', ',') => 1337, ('は', '、') => 1337, ('べ', 'き') => 2181, ('ま', 'し') => 1113, ('ま', 'す') => 6943, ('ま', 'っ') => -1549, ('ま', 'で') => 6154, ('ま', 'れ') => -793, ('ら', 'し') => 1479, ('ら', 'れ') => 6820, ('る', 'る') => 3818, ('れ', ',') => 854, ('れ', '、') => 854, ('れ', 'た') => 1850, ('れ', 'て') => 1375, ('れ', 'ば') => -3246, ('れ', 'る') => 1091, ('わ', 'れ') => -605, ('ん', 'だ') => 606, ('ん', 'で') => 798, ('カ', '月') => 990, ('会', '議') => 860, ('入', 'り') => 1232, ('大', '会') => 2217, ('始', 'め') => 1681, ('市', ' ') => 965, ('新', '聞') => -5055, ('日', ',') => 974, ('日', '、') => 974, ('社', '会') => 2024, ('カ', '月') => 990, _ => 0}; p.remove(0); p.push(if score < 0 { 'O' } else { 'B' }); @@ -99,6 +108,7 @@ pub fn tokenize(s: &str) -> Vec { if 0 < score { result.push(word.clone()); word.clear(); + } word.push(segments[index]); } From ca8039fc651aa122e7f959d3d62e681501679b76 Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Thu, 16 Aug 2018 16:37:56 +0900 Subject: [PATCH 4/6] cargo fmt --- benchmark/benchmark.rs | 19 +- src/constants.rs | 11 - src/lib.rs | 1631 +++++++++++++++++++++++++++++++++++++--- test/test.rs | 29 +- 4 files changed, 1560 insertions(+), 130 deletions(-) delete mode 100644 src/constants.rs diff --git a/benchmark/benchmark.rs b/benchmark/benchmark.rs index 39da21b..54cd818 100644 --- a/benchmark/benchmark.rs +++ b/benchmark/benchmark.rs @@ -3,22 +3,21 @@ extern crate test; extern crate tinysegmenter; -use std::io::prelude::*; use std::fs::File; +use std::io::prelude::*; use test::Bencher; #[bench] fn run(b: &mut Bencher) { - // http://www.genpaku.org/timemachine/timemachineu8j.txt - let mut f = - File::open("benchmark/timemachineu8j.txt") - .expect("Failed to read a benchmark text."); - let mut s = String::new(); - let _ = f.read_to_string(&mut s); + // http://www.genpaku.org/timemachine/timemachineu8j.txt + let mut f = + File::open("benchmark/timemachineu8j.txt").expect("Failed to read a benchmark text."); + let mut s = String::new(); + let _ = f.read_to_string(&mut s); - b.iter(|| tinysegmenter::tokenize(&s)); + b.iter(|| tinysegmenter::tokenize(&s)); } #[bench] fn test_small(b: &mut test::Bencher) { - b.iter(|| tinysegmenter::tokenize("私はおでぶです")) -} \ No newline at end of file + b.iter(|| tinysegmenter::tokenize("私はおでぶです")) +} diff --git a/src/constants.rs b/src/constants.rs deleted file mode 100644 index 97c7023..0000000 --- a/src/constants.rs +++ /dev/null @@ -1,11 +0,0 @@ -const BIAS: i32 = -332; - - -lazy_static! { - static ref B1: char = unsafe { char::from_u32_unchecked(0x110001) }; - static ref B2: char = unsafe { char::from_u32_unchecked(0x110002) }; - static ref B3: char = unsafe { char::from_u32_unchecked(0x110003) }; - static ref E1: char = unsafe { char::from_u32_unchecked(0x110004) }; - static ref E2: char = unsafe { char::from_u32_unchecked(0x110005) }; - static ref E3: char = unsafe { char::from_u32_unchecked(0x110006) }; -} \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 3ef28d7..1520672 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,118 +1,1545 @@ -#[macro_use] extern crate lazy_static; +#[macro_use] +extern crate lazy_static; use std::char; -include!("constants.rs"); +const BIAS: i32 = -332; + +lazy_static! { + static ref B1: char = unsafe { char::from_u32_unchecked(0x110001) }; + static ref B2: char = unsafe { char::from_u32_unchecked(0x110002) }; + static ref B3: char = unsafe { char::from_u32_unchecked(0x110003) }; + static ref E1: char = unsafe { char::from_u32_unchecked(0x110004) }; + static ref E2: char = unsafe { char::from_u32_unchecked(0x110005) }; + static ref E3: char = unsafe { char::from_u32_unchecked(0x110006) }; +} fn get_ctype(c: char) -> char { - match c as u32 { - 0x4E00|0x4E8C|0x4E09|0x56DB|0x4E94|0x516D|0x4E03|0x516B|0x4E5D|0x5341 => 'M', - 0x767E|0x5343|0x4E07|0x5104|0x5146 => 'M', - 0x4E00...0x9FA0|0x3005|0x3006|0x30F5|0x30F6 => 'H', - 0x3041...0x3093 => 'I', - 0x30A1...0x30F4|0x30FC|0xFF71...0xFF9D|0xFF9E|0xFF70 => 'K', - 0x61...0x7A|0x41...0x5A|0xFF41...0xFF5A|0xFF21...0xFF3A => 'A', - 0x30...0x3a|0xFF10...0xFF19 => 'N', - _ => 'O', - } + match c as u32 { + 0x4E00 | 0x4E8C | 0x4E09 | 0x56DB | 0x4E94 | 0x516D | 0x4E03 | 0x516B | 0x4E5D | 0x5341 => { + 'M' + } + 0x767E | 0x5343 | 0x4E07 | 0x5104 | 0x5146 => 'M', + 0x4E00...0x9FA0 | 0x3005 | 0x3006 | 0x30F5 | 0x30F6 => 'H', + 0x3041...0x3093 => 'I', + 0x30A1...0x30F4 | 0x30FC | 0xFF71...0xFF9D | 0xFF9E | 0xFF70 => 'K', + 0x61...0x7A | 0x41...0x5A | 0xFF41...0xFF5A | 0xFF21...0xFF3A => 'A', + 0x30...0x3a | 0xFF10...0xFF19 => 'N', + _ => 'O', + } } pub fn tokenize(s: &str) -> Vec { - if s.is_empty() { - return Vec::new(); - } - - let mut result = Vec::with_capacity(s.chars().count()); - - let segments = - vec!(*B3, *B2, *B1).into_iter() - .chain(s.chars()) - .chain(vec!(*E1, *E2, *E3).into_iter()) - .collect::>(); - - let ctypes = - vec!('O'; 3).into_iter() - .chain(s.chars().map(get_ctype)) - .chain(vec!('O'; 3).into_iter()) - .collect::>(); - - let mut word = segments[3].to_string(); - let mut p = vec!('U'; 3); - - for index in 4 .. segments.len() - 3 { - let mut score = BIAS; - let w = &segments[index - 3 .. index + 3]; - let c = &ctypes[index - 3 .. index + 3]; - - score += match &(c[0], c[1], c[2]) { ('A', 'A', 'A') => 1093, ('H', 'H', 'H') => 1029, ('H', 'H', 'M') => 580, ('H', 'I', 'I') => 998, ('H', 'O', 'H') => -390, ('H', 'O', 'M') => -331, ('I', 'H', 'I') => 1169, ('I', 'O', 'H') => -142, ('I', 'O', 'I') => -1015, ('I', 'O', 'M') => 467, ('M', 'M', 'H') => 187, ('O', 'O', 'I') => -1832, _ => 0}; - score += match &(c[1], c[2], c[3]) { ('H', 'H', 'O') => 2088, ('H', 'I', 'I') => -1023, ('H', 'M', 'M') => -1154, ('I', 'H', 'I') => -1965, ('K', 'K', 'H') => 703, ('O', 'I', 'I') => -2649, _ => 0}; - score += match &(c[2], c[3], c[4]) { ('A', 'A', 'A') => -294, ('H', 'H', 'H') => 346, ('H', 'H', 'I') => -341, ('H', 'I', 'I') => -1088, ('H', 'I', 'K') => 731, ('H', 'O', 'H') => -1486, ('I', 'H', 'H') => 128, ('I', 'H', 'I') => -3041, ('I', 'H', 'O') => -1935, ('I', 'I', 'H') => -825, ('I', 'I', 'M') => -1035, ('I', 'O', 'I') => -542, ('K', 'H', 'H') => -1216, ('K', 'K', 'A') => 491, ('K', 'K', 'H') => -1217, ('K', 'O', 'K') => -1009, ('M', 'H', 'H') => -2694, ('M', 'H', 'M') => -457, ('M', 'H', 'O') => 123, ('M', 'M', 'H') => -471, ('N', 'N', 'H') => -1689, ('N', 'N', 'O') => 662, ('O', 'H', 'O') => -3393, _ => 0}; - score += match &(c[3], c[4], c[5]) { ('H', 'H', 'H') => -203, ('H', 'H', 'I') => 1344, ('H', 'H', 'K') => 365, ('H', 'H', 'M') => -122, ('H', 'H', 'N') => 182, ('H', 'H', 'O') => 669, ('H', 'I', 'H') => 804, ('H', 'I', 'I') => 679, ('H', 'O', 'H') => 446, ('I', 'H', 'H') => 695, ('I', 'H', 'O') => -2324, ('I', 'I', 'H') => 321, ('I', 'I', 'I') => 1497, ('I', 'I', 'O') => 656, ('I', 'O', 'O') => 54, ('K', 'A', 'K') => 4845, ('K', 'K', 'A') => 3386, ('K', 'K', 'K') => 3065, ('M', 'H', 'H') => -405, ('M', 'H', 'I') => 201, ('M', 'M', 'H') => -241, ('M', 'M', 'M') => 661, ('M', 'O', 'M') => 841, _ => 0}; - score += match &(p[1], c[0], c[1], c[2]) { ('B', 'H', 'H', 'H') => -227, ('B', 'H', 'H', 'I') => 316, ('B', 'H', 'I', 'H') => -132, ('B', 'I', 'H', 'H') => 60, ('B', 'I', 'I', 'I') => 1595, ('B', 'N', 'H', 'H') => -744, ('B', 'O', 'H', 'H') => 225, ('B', 'O', 'O', 'O') => -908, ('O', 'A', 'K', 'K') => 482, ('O', 'H', 'H', 'H') => 281, ('O', 'H', 'I', 'H') => 249, ('O', 'I', 'H', 'I') => 200, ('O', 'I', 'I', 'H') => -68, _ => 0}; - score += match &(p[1], c[1], c[2], c[3]) { ('B', 'I', 'H', 'H') => -1401, ('B', 'I', 'I', 'I') => -1033, ('B', 'K', 'A', 'K') => -543, ('B', 'O', 'O', 'O') => -5591, _ => 0}; - score += match &(p[2], c[0], c[1], c[2]) { ('B', 'H', 'H', 'H') => 478, ('B', 'H', 'H', 'M') => -1073, ('B', 'H', 'I', 'H') => 222, ('B', 'H', 'I', 'I') => -504, ('B', 'I', 'I', 'H') => -116, ('B', 'I', 'I', 'I') => -105, ('B', 'M', 'H', 'I') => -863, ('B', 'M', 'H', 'M') => -464, ('B', 'O', 'M', 'H') => 620, ('O', 'H', 'H', 'H') => 346, ('O', 'H', 'H', 'I') => 1729, ('O', 'H', 'I', 'I') => 997, ('O', 'H', 'M', 'H') => 481, ('O', 'I', 'H', 'H') => 623, ('O', 'I', 'I', 'H') => 1344, ('O', 'K', 'A', 'K') => 2792, ('O', 'K', 'H', 'H') => 587, ('O', 'K', 'K', 'A') => 679, ('O', 'O', 'H', 'H') => 110, ('O', 'O', 'I', 'I') => -685, _ => 0}; - score += match &(p[2], c[1], c[2], c[3]) { ('B', 'H', 'H', 'H') => -721, ('B', 'H', 'H', 'M') => -3604, ('B', 'H', 'I', 'I') => -966, ('B', 'I', 'I', 'H') => -607, ('B', 'I', 'I', 'I') => -2181, ('O', 'A', 'A', 'A') => -2763, ('O', 'A', 'K', 'K') => 180, ('O', 'H', 'H', 'H') => -294, ('O', 'H', 'H', 'I') => 2446, ('O', 'H', 'H', 'O') => 480, ('O', 'H', 'I', 'H') => -1573, ('O', 'I', 'H', 'H') => 1935, ('O', 'I', 'H', 'I') => -493, ('O', 'I', 'I', 'H') => 626, ('O', 'I', 'I', 'I') => -4007, ('O', 'K', 'A', 'K') => -8156, _ => 0}; - score += match &(w[0], w[1], w[2]) { ('に', 'つ', 'い') => -4681, ('東', '京', '都') => 2026, _ => 0}; - score += match &(w[1], w[2], w[3]) { ('あ', 'る', '程') => -2049, ('い', 'っ', 'た') => -1256, ('こ', 'ろ', 'が') => -2434, ('し', 'ょ', 'う') => 3873, ('そ', 'の', '後') => -4430, ('だ', 'っ', 'て') => -1049, ('て', 'い', 'た') => 1833, ('と', 'し', 'て') => -4657, ('と', 'も', 'に') => -4517, ('も', 'の', 'で') => 1882, ('一', '気', 'に') => -792, ('初', 'め', 'て') => -1512, ('同', '時', 'に') => -8097, ('大', 'き', 'な') => -1255, ('対', 'し', 'て') => -2721, ('社', '会', '党') => -3216, _ => 0}; - score += match &(w[2], w[3], w[4]) { ('い', 'た', 'だ') => -1734, ('し', 'て', 'い') => 1314, ('と', 'し', 'て') => -4314, ('に', 'つ', 'い') => -5483, ('に', 'と', 'っ') => -5989, ('に', '当', 'た') => -6247, ('の', 'で', ',') => -727, ('の', 'で', '、') => -727, ('の', 'も', 'の') => -600, ('れ', 'か', 'ら') => -3752, ('十', '二', '月') => -2287, _ => 0}; - score += match &(w[3], w[4], w[5]) { ('い', 'う', '.') => 8576, ('い', 'う', '。') => 8576, ('か', 'ら', 'な') => -2348, ('し', 'て', 'い') => 2958, ('た', 'が', ',') => 1516, ('た', 'が', '、') => 1516, ('て', 'い', 'る') => 1538, ('と', 'い', 'う') => 1349, ('ま', 'し', 'た') => 5543, ('ま', 'せ', 'ん') => 1097, ('よ', 'う', 'と') => -4258, ('よ', 'る', 'と') => 5865, _ => 0}; - - score += match &c[0] { 'A' => 484, 'K' => 93, 'M' => 645, 'O' => -505, _ => 0 }; - score += match &c[1] { 'A' => 819, 'H' => 1059, 'I' => 409, 'M' => 3987, 'N' => 5775, 'O' => 646, _ => 0 }; - score += match &c[2] { 'A' => -1370, 'I' => 2311, _ => 0 }; - score += match &c[3] { 'A' => -2643, 'H' => 1809, 'I' => -1032, 'K' => -3450, 'M' => 3565, 'N' => 3876, 'O' => 6646, _ => 0 }; - score += match &c[4] { 'H' => 313, 'I' => -1238, 'K' => -799, 'M' => 539, 'O' => -831, _ => 0 }; - score += match &c[5] { 'H' => -506, 'I' => -253, 'K' => 87, 'M' => 247, 'O' => -387, _ => 0 }; - - score += match &p[0] { 'O' => -214, _ => 0}; - score += match &p[1] { 'B' => 69, 'O' => 935, _ => 0}; - score += match &p[2] { 'B' => 189, _ => 0}; - score += match &(p[0], c[0]) { ('B', 'H') => 21, ('B', 'I') => -12, ('B', 'K') => -99, ('B', 'N') => 142, ('B', 'O') => -56, ('O', 'H') => -95, ('O', 'I') => 477, ('O', 'K') => 410, ('O', 'O') => -2422, _ => 0}; - score += match &(p[1], c[1]) { ('B', 'H') => 216, ('B', 'I') => 113, ('O', 'K') => 1759, _ => 0}; - score += match &(p[2], c[2]) { ('B', 'A') => -479, ('B', 'H') => 42, ('B', 'I') => 1913, ('B', 'K') => -7198, ('B', 'M') => 3160, ('B', 'N') => 6427, ('B', 'O') => 14761, ('O', 'I') => -827, ('O', 'N') => -3212, _ => 0}; - score += match &w[0] { ',' => 156, '、' => 156, '「' => -463, 'あ' => -941, 'う' => -127, 'が' => -553, 'き' => 121, 'こ' => 505, 'で' => -201, 'と' => -547, 'ど' => -123, 'に' => -789, 'の' => -185, 'は' => -847, 'も' => -466, 'や' => -470, 'よ' => 182, 'ら' => -292, 'り' => 208, 'れ' => 169, 'を' => -446, 'ん' => -137, '・' => -135, '主' => -402, '京' => -268, '区' => -912, '午' => 871, '国' => -460, '大' => 561, '委' => 729, '市' => -411, '日' => -141, '理' => 361, '生' => -408, '県' => -386, '都' => -718, '「' => -463, '・' => -135, _ => 0}; - score += match &w[1] { ',' => -829, '、' => -829, '〇' => 892, '「' => -645, '」' => 3145, 'あ' => -538, 'い' => 505, 'う' => 134, 'お' => -502, 'か' => 1454, 'が' => -856, 'く' => -412, 'こ' => 1141, 'さ' => 878, 'ざ' => 540, 'し' => 1529, 'す' => -675, 'せ' => 300, 'そ' => -1011, 'た' => 188, 'だ' => 1837, 'つ' => -949, 'て' => -291, 'で' => -268, 'と' => -981, 'ど' => 1273, 'な' => 1063, 'に' => -1764, 'の' => 130, 'は' => -409, 'ひ' => -1273, 'べ' => 1261, 'ま' => 600, 'も' => -1263, 'や' => -402, 'よ' => 1639, 'り' => -579, 'る' => -694, 'れ' => 571, 'を' => -2516, 'ん' => 2095, 'ア' => -587, 'カ' => 306, 'キ' => 568, 'ッ' => 831, '三' => -758, '不' => -2150, '世' => -302, '中' => -968, '主' => -861, '事' => 492, '人' => -123, '会' => 978, '保' => 362, '入' => 548, '初' => -3025, '副' => -1566, '北' => -3414, '区' => -422, '大' => -1769, '天' => -865, '太' => -483, '子' => -1519, '学' => 760, '実' => 1023, '小' => -2009, '市' => -813, '年' => -1060, '強' => 1067, '手' => -1519, '揺' => -1033, '政' => 1522, '文' => -1355, '新' => -1682, '日' => -1815, '明' => -1462, '最' => -630, '朝' => -1843, '本' => -1650, '東' => -931, '果' => -665, '次' => -2378, '民' => -180, '気' => -1740, '理' => 752, '発' => 529, '目' => -1584, '相' => -242, '県' => -1165, '立' => -763, '第' => 810, '米' => 509, '自' => -1353, '行' => 838, '西' => -744, '見' => -3874, '調' => 1010, '議' => 1198, '込' => 3041, '開' => 1758, '間' => -1257, '「' => -645, '」' => 3145, 'ッ' => 831, 'ア' => -587, 'カ' => 306, 'キ' => 568, _ => 0}; - score += match &w[2] { ',' => 4889, '1' => -800, '−' => -1723, '、' => 4889, '々' => -2311, '〇' => 5827, '」' => 2670, '〓' => -3573, 'あ' => -2696, 'い' => 1006, 'う' => 2342, 'え' => 1983, 'お' => -4864, 'か' => -1163, 'が' => 3271, 'く' => 1004, 'け' => 388, 'げ' => 401, 'こ' => -3552, 'ご' => -3116, 'さ' => -1058, 'し' => -395, 'す' => 584, 'せ' => 3685, 'そ' => -5228, 'た' => 842, 'ち' => -521, 'っ' => -1444, 'つ' => -1081, 'て' => 6167, 'で' => 2318, 'と' => 1691, 'ど' => -899, 'な' => -2788, 'に' => 2745, 'の' => 4056, 'は' => 4555, 'ひ' => -2171, 'ふ' => -1798, 'へ' => 1199, 'ほ' => -5516, 'ま' => -4384, 'み' => -120, 'め' => 1205, 'も' => 2323, 'や' => -788, 'よ' => -202, 'ら' => 727, 'り' => 649, 'る' => 5905, 'れ' => 2773, 'わ' => -1207, 'を' => 6620, 'ん' => -518, 'ア' => 551, 'グ' => 1319, 'ス' => 874, 'ッ' => -1350, 'ト' => 521, 'ム' => 1109, 'ル' => 1591, 'ロ' => 2201, 'ン' => 278, '・' => -3794, '一' => -1619, '下' => -1759, '世' => -2087, '両' => 3815, '中' => 653, '主' => -758, '予' => -1193, '二' => 974, '人' => 2742, '今' => 792, '他' => 1889, '以' => -1368, '低' => 811, '何' => 4265, '作' => -361, '保' => -2439, '元' => 4858, '党' => 3593, '全' => 1574, '公' => -3030, '六' => 755, '共' => -1880, '円' => 5807, '再' => 3095, '分' => 457, '初' => 2475, '別' => 1129, '前' => 2286, '副' => 4437, '力' => 365, '動' => -949, '務' => -1872, '化' => 1327, '北' => -1038, '区' => 4646, '千' => -2309, '午' => -783, '協' => -1006, '口' => 483, '右' => 1233, '各' => 3588, '合' => -241, '同' => 3906, '和' => -837, '員' => 4513, '国' => 642, '型' => 1389, '場' => 1219, '外' => -241, '妻' => 2016, '学' => -1356, '安' => -423, '実' => -1008, '家' => 1078, '小' => -513, '少' => -3102, '州' => 1155, '市' => 3197, '平' => -1804, '年' => 2416, '広' => -1030, '府' => 1605, '度' => 1452, '建' => -2352, '当' => -3885, '得' => 1905, '思' => -1291, '性' => 1822, '戸' => -488, '指' => -3973, '政' => -2013, '教' => -1479, '数' => 3222, '文' => -1489, '新' => 1764, '日' => 2099, '旧' => 5792, '昨' => -661, '時' => -1248, '曜' => -951, '最' => -937, '月' => 4125, '期' => 360, '李' => 3094, '村' => 364, '東' => -805, '核' => 5156, '森' => 2438, '業' => 484, '氏' => 2613, '民' => -1694, '決' => -1073, '法' => 1868, '海' => -495, '無' => 979, '物' => 461, '特' => -3850, '生' => -273, '用' => 914, '町' => 1215, '的' => 7313, '直' => -1835, '省' => 792, '県' => 6293, '知' => -1528, '私' => 4231, '税' => 401, '立' => -960, '第' => 1201, '米' => 7767, '系' => 3066, '約' => 3663, '級' => 1384, '統' => -4229, '総' => 1163, '線' => 1255, '者' => 6457, '能' => 725, '自' => -2869, '英' => 785, '見' => 1044, '調' => -562, '財' => -733, '費' => 1777, '車' => 1835, '軍' => 1375, '込' => -1504, '通' => -1136, '選' => -681, '郎' => 1026, '郡' => 4404, '部' => 1200, '金' => 2163, '長' => 421, '開' => -1432, '間' => 1302, '関' => -1282, '雨' => 2009, '電' => -1045, '非' => 2066, '駅' => 1620, '1' => -800, '」' => 2670, '・' => -3794, 'ッ' => -1350, 'ア' => 551, 'ス' => 874, 'ト' => 521, 'ム' => 1109, 'ル' => 1591, 'ロ' => 2201, 'ン' => 278, _ => 0}; - score += match &w[3] { ',' => 3930, '.' => 3508, '―' => -4841, '、' => 3930, '。' => 3508, '〇' => 4999, '「' => 1895, '」' => 3798, '〓' => -5156, 'あ' => 4752, 'い' => -3435, 'う' => -640, 'え' => -2514, 'お' => 2405, 'か' => 530, 'が' => 6006, 'き' => -4482, 'ぎ' => -3821, 'く' => -3788, 'け' => -4376, 'げ' => -4734, 'こ' => 2255, 'ご' => 1979, 'さ' => 2864, 'し' => -843, 'じ' => -2506, 'す' => -731, 'ず' => 1251, 'せ' => 181, 'そ' => 4091, 'た' => 5034, 'だ' => 5408, 'ち' => -3654, 'っ' => -5882, 'つ' => -1659, 'て' => 3994, 'で' => 7410, 'と' => 4547, 'な' => 5433, 'に' => 6499, 'ぬ' => 1853, 'ね' => 1413, 'の' => 7396, 'は' => 8578, 'ば' => 1940, 'ひ' => 4249, 'び' => -4134, 'ふ' => 1345, 'へ' => 6665, 'べ' => -744, 'ほ' => 1464, 'ま' => 1051, 'み' => -2082, 'む' => -882, 'め' => -5046, 'も' => 4169, 'ゃ' => -2666, 'や' => 2795, 'ょ' => -1544, 'よ' => 3351, 'ら' => -2922, 'り' => -9726, 'る' => -14896, 'れ' => -2613, 'ろ' => -4570, 'わ' => -1783, 'を' => 13150, 'ん' => -2352, 'カ' => 2145, 'コ' => 1789, 'セ' => 1287, 'ッ' => -724, 'ト' => -403, 'メ' => -1635, 'ラ' => -881, 'リ' => -541, 'ル' => -856, 'ン' => -3637, '・' => -4371, 'ー' => -11870, '一' => -2069, '中' => 2210, '予' => 782, '事' => -190, '井' => -1768, '人' => 1036, '以' => 544, '会' => 950, '体' => -1286, '作' => 530, '側' => 4292, '先' => 601, '党' => -2006, '共' => -1212, '内' => 584, '円' => 788, '初' => 1347, '前' => 1623, '副' => 3879, '力' => -302, '動' => -740, '務' => -2715, '化' => 776, '区' => 4517, '協' => 1013, '参' => 1555, '合' => -1834, '和' => -681, '員' => -910, '器' => -851, '回' => 1500, '国' => -619, '園' => -1200, '地' => 866, '場' => -1410, '塁' => -2094, '士' => -1413, '多' => 1067, '大' => 571, '子' => -4802, '学' => -1397, '定' => -1057, '寺' => -809, '小' => 1910, '屋' => -1328, '山' => -1500, '島' => -2056, '川' => -2667, '市' => 2771, '年' => 374, '庁' => -4556, '後' => 456, '性' => 553, '感' => 916, '所' => -1566, '支' => 856, '改' => 787, '政' => 2182, '教' => 704, '文' => 522, '方' => -856, '日' => 1798, '時' => 1829, '最' => 845, '月' => -9066, '木' => -485, '来' => -442, '校' => -360, '業' => -1043, '氏' => 5388, '民' => -2716, '気' => -910, '沢' => -939, '済' => -543, '物' => -735, '率' => 672, '球' => -1267, '生' => -1286, '産' => -1101, '田' => -2900, '町' => 1826, '的' => 2586, '目' => 922, '省' => -3485, '県' => 2997, '空' => -867, '立' => -2112, '第' => 788, '米' => 2937, '系' => 786, '約' => 2171, '経' => 1146, '統' => -1169, '総' => 940, '線' => -994, '署' => 749, '者' => 2145, '能' => -730, '般' => -852, '行' => -792, '規' => 792, '警' => -1184, '議' => -244, '谷' => -1000, '賞' => 730, '車' => -1481, '軍' => 1158, '輪' => -1433, '込' => -3370, '近' => 929, '道' => -1291, '選' => 2596, '郎' => -4866, '都' => 1192, '野' => -1100, '銀' => -2213, '長' => 357, '間' => -2344, '院' => -2297, '際' => -2604, '電' => -878, '領' => -1659, '題' => -792, '館' => -1984, '首' => 1749, '高' => 2120, '「' => 1895, '」' => 3798, '・' => -4371, 'ッ' => -724, 'ー' => -11870, 'カ' => 2145, 'コ' => 1789, 'セ' => 1287, 'ト' => -403, 'メ' => -1635, 'ラ' => -881, 'リ' => -541, 'ル' => -856, 'ン' => -3637, _ => 0}; - score += match &w[4] { ',' => 465, '.' => -299, '1' => -514, ']' => -2762, '、' => 465, '。' => -299, '「' => 363, 'あ' => 1655, 'い' => 331, 'う' => -503, 'え' => 1199, 'お' => 527, 'か' => 647, 'が' => -421, 'き' => 1624, 'ぎ' => 1971, 'く' => 312, 'げ' => -983, 'さ' => -1537, 'し' => -1371, 'す' => -852, 'だ' => -1186, 'ち' => 1093, 'っ' => 52, 'つ' => 921, 'て' => -18, 'で' => -850, 'と' => -127, 'ど' => 1682, 'な' => -787, 'に' => -1224, 'の' => -635, 'は' => -578, 'べ' => 1001, 'み' => 502, 'め' => 865, 'ゃ' => 3350, 'ょ' => 854, 'り' => -208, 'る' => 429, 'れ' => 504, 'わ' => 419, 'を' => -1264, 'ん' => 327, 'イ' => 241, 'ル' => 451, 'ン' => -343, '中' => -871, '京' => 722, '会' => -1153, '党' => -654, '務' => 3519, '区' => -901, '告' => 848, '員' => 2104, '大' => -1296, '学' => -548, '定' => 1785, '嵐' => -1304, '市' => -2991, '席' => 921, '年' => 1763, '思' => 872, '所' => -814, '挙' => 1618, '新' => -1682, '日' => 218, '月' => -4353, '査' => 932, '格' => 1356, '機' => -1508, '氏' => -1347, '田' => 240, '町' => -3912, '的' => -3149, '相' => 1319, '省' => -1052, '県' => -4003, '研' => -997, '社' => -278, '空' => -813, '統' => 1955, '者' => -2233, '表' => 663, '語' => -1073, '議' => 1219, '選' => -1018, '郎' => -368, '長' => 786, '間' => 1191, '題' => 2368, '館' => -689, '1' => -514, '「' => 363, 'イ' => 241, 'ル' => 451, 'ン' => -343, _ => 0}; - if w[4] == *E2 { - score += -32768; - } - score += match &w[5] { ',' => 227, '.' => 808, '1' => -270, '、' => 227, '。' => 808, 'あ' => -307, 'う' => 189, 'か' => 241, 'が' => -73, 'く' => -121, 'こ' => -200, 'じ' => 1782, 'す' => 383, 'た' => -428, 'っ' => 573, 'て' => -1014, 'で' => 101, 'と' => -105, 'な' => -253, 'に' => -149, 'の' => -417, 'は' => -236, 'も' => -206, 'り' => 187, 'る' => -135, 'を' => 195, 'ル' => -673, 'ン' => -496, '一' => -277, '中' => 201, '件' => -800, '会' => 624, '前' => 302, '区' => 1792, '員' => -1212, '委' => 798, '学' => -960, '市' => 887, '広' => -695, '後' => 535, '業' => -697, '相' => 753, '社' => -507, '福' => 974, '空' => -822, '者' => 1811, '連' => 463, '郎' => 1082, '1' => -270, 'ル' => -673, 'ン' => -496, _ => 0}; - if w[5] == *E1 { - score += 306; + if s.is_empty() { + return Vec::new(); } - score += match &(c[1], c[2]) { ('H', 'H') => 6, ('I', 'I') => 2461, ('K', 'H') => 406, ('O', 'H') => -1378, _ => 0}; - score += match &(c[2], c[3]) { ('A', 'A') => -3267, ('A', 'I') => 2744, ('A', 'N') => -878, ('H', 'H') => -4070, ('H', 'M') => -1711, ('H', 'N') => 4012, ('H', 'O') => 3761, ('I', 'A') => 1327, ('I', 'H') => -1184, ('I', 'I') => -1332, ('I', 'K') => 1721, ('I', 'O') => 5492, ('K', 'I') => 3831, ('K', 'K') => -8741, ('M', 'H') => -3132, ('M', 'K') => 3334, ('O', 'O') => -2920, _ => 0}; - score += match &(c[3], c[4]) { ('H', 'H') => 996, ('H', 'I') => 626, ('H', 'K') => -721, ('H', 'N') => -1307, ('H', 'O') => -836, ('I', 'H') => -301, ('K', 'K') => 2762, ('M', 'K') => 1079, ('M', 'M') => 4034, ('O', 'A') => -1652, ('O', 'H') => 266, _ => 0}; - score += match &(p[0], p[1]) { ('B', 'B') => 295, ('O', 'B') => 304, ('O', 'O') => -125, ('U', 'B') => 352, _ => 0}; - score += match &(p[1], p[2]) { ('B', 'O') => 60, ('O', 'O') => -1762, _ => 0}; - score += match &(p[1], c[1], c[2]) { ('B', 'H', 'H') => 1150, ('B', 'H', 'M') => 1521, ('B', 'I', 'I') => -1158, ('B', 'I', 'M') => 886, ('B', 'M', 'H') => 1208, ('B', 'N', 'H') => 449, ('B', 'O', 'H') => -91, ('B', 'O', 'O') => -2597, ('O', 'H', 'I') => 451, ('O', 'I', 'H') => -296, ('O', 'K', 'A') => 1851, ('O', 'K', 'H') => -1020, ('O', 'K', 'K') => 904, ('O', 'O', 'O') => 2965, _ => 0}; - score += match &(p[1], c[2], c[3]) { ('B', 'H', 'H') => 118, ('B', 'H', 'I') => -1159, ('B', 'H', 'M') => 466, ('B', 'I', 'H') => -919, ('B', 'K', 'K') => -1720, ('B', 'K', 'O') => 864, ('O', 'H', 'H') => -1139, ('O', 'H', 'M') => -181, ('O', 'I', 'H') => 153, ('U', 'H', 'I') => -1146, _ => 0}; - score += match &(p[2], c[1], c[2]) { ('B', 'H', 'H') => -792, ('B', 'H', 'I') => 2664, ('B', 'I', 'I') => -299, ('B', 'K', 'I') => 419, ('B', 'M', 'H') => 937, ('B', 'M', 'M') => 8335, ('B', 'N', 'N') => 998, ('B', 'O', 'H') => 775, ('O', 'H', 'H') => 2174, ('O', 'H', 'M') => 439, ('O', 'I', 'I') => 280, ('O', 'K', 'H') => 1798, ('O', 'K', 'I') => -793, ('O', 'K', 'O') => -2242, ('O', 'M', 'H') => -2402, ('O', 'O', 'O') => 11699, _ => 0}; - score += match &(p[2], c[2], c[3]) { ('B', 'H', 'H') => -3895, ('B', 'I', 'H') => 3761, ('B', 'I', 'I') => -4654, ('B', 'I', 'K') => 1348, ('B', 'K', 'K') => -1806, ('B', 'M', 'I') => -3385, ('B', 'O', 'O') => -12396, ('O', 'A', 'H') => 926, ('O', 'H', 'H') => 266, ('O', 'H', 'K') => -2036, ('O', 'N', 'N') => -973, _ => 0}; - score += match &(w[1], w[2]) { (',', 'と') => 660, (',', '同') => 727, ('、', 'と') => 660, ('、', '同') => 727, ('」', 'と') => 1682, ('あ', 'っ') => 1505, ('い', 'う') => 1743, ('い', 'っ') => -2055, ('い', 'る') => 672, ('う', 'し') => -4817, ('う', 'ん') => 665, ('か', 'ら') => 3472, ('が', 'ら') => 600, ('こ', 'う') => -790, ('こ', 'と') => 2083, ('こ', 'ん') => -1262, ('さ', 'ら') => -4143, ('さ', 'ん') => 4573, ('し', 'た') => 2641, ('し', 'て') => 1104, ('す', 'で') => -3399, ('そ', 'こ') => 1977, ('そ', 'れ') => -871, ('た', 'ち') => 1122, ('た', 'め') => 601, ('っ', 'た') => 3463, ('つ', 'い') => -802, ('て', 'い') => 805, ('て', 'き') => 1249, ('で', 'き') => 1127, ('で', 'す') => 3445, ('で', 'は') => 844, ('と', 'い') => -4915, ('と', 'み') => 1922, ('ど', 'こ') => 3887, ('な', 'い') => 5713, ('な', 'っ') => 3015, ('な', 'ど') => 7379, ('な', 'ん') => -1113, ('に', 'し') => 2468, ('に', 'は') => 1498, ('に', 'も') => 1671, ('に', '対') => -912, ('の', '一') => -501, ('の', '中') => 741, ('ま', 'せ') => 2448, ('ま', 'で') => 1711, ('ま', 'ま') => 2600, ('ま', 'る') => -2155, ('や', 'む') => -1947, ('よ', 'っ') => -2565, ('れ', 'た') => 2369, ('れ', 'で') => -913, ('を', 'し') => 1860, ('を', '見') => 731, ('亡', 'く') => -1886, ('京', '都') => 2558, ('取', 'り') => -2784, ('大', 'き') => -2604, ('大', '阪') => 1497, ('平', '方') => -2314, ('引', 'き') => -1336, ('日', '本') => -195, ('本', '当') => -2423, ('毎', '日') => -2113, ('目', '指') => -724, ('」', 'と') => 1682, _ => 0}; - if (w[1], w[2]) == (*B1, 'あ') { - score += 1404; - } - if (w[1], w[2]) == (*B1, '同') { - score += 542; - } + let mut result = Vec::with_capacity(s.chars().count()); + + let segments = vec![*B3, *B2, *B1] + .into_iter() + .chain(s.chars()) + .chain(vec![*E1, *E2, *E3].into_iter()) + .collect::>(); + + let ctypes = vec!['O'; 3] + .into_iter() + .chain(s.chars().map(get_ctype)) + .chain(vec!['O'; 3].into_iter()) + .collect::>(); + + let mut word = segments[3].to_string(); + let mut p = vec!['U'; 3]; + + for index in 4..segments.len() - 3 { + let mut score = BIAS; + let w = &segments[index - 3..index + 3]; + let c = &ctypes[index - 3..index + 3]; + + score += match &(c[0], c[1], c[2]) { + ('A', 'A', 'A') => 1093, + ('H', 'H', 'H') => 1029, + ('H', 'H', 'M') => 580, + ('H', 'I', 'I') => 998, + ('H', 'O', 'H') => -390, + ('H', 'O', 'M') => -331, + ('I', 'H', 'I') => 1169, + ('I', 'O', 'H') => -142, + ('I', 'O', 'I') => -1015, + ('I', 'O', 'M') => 467, + ('M', 'M', 'H') => 187, + ('O', 'O', 'I') => -1832, + _ => 0, + }; + score += match &(c[1], c[2], c[3]) { + ('H', 'H', 'O') => 2088, + ('H', 'I', 'I') => -1023, + ('H', 'M', 'M') => -1154, + ('I', 'H', 'I') => -1965, + ('K', 'K', 'H') => 703, + ('O', 'I', 'I') => -2649, + _ => 0, + }; + score += match &(c[2], c[3], c[4]) { + ('A', 'A', 'A') => -294, + ('H', 'H', 'H') => 346, + ('H', 'H', 'I') => -341, + ('H', 'I', 'I') => -1088, + ('H', 'I', 'K') => 731, + ('H', 'O', 'H') => -1486, + ('I', 'H', 'H') => 128, + ('I', 'H', 'I') => -3041, + ('I', 'H', 'O') => -1935, + ('I', 'I', 'H') => -825, + ('I', 'I', 'M') => -1035, + ('I', 'O', 'I') => -542, + ('K', 'H', 'H') => -1216, + ('K', 'K', 'A') => 491, + ('K', 'K', 'H') => -1217, + ('K', 'O', 'K') => -1009, + ('M', 'H', 'H') => -2694, + ('M', 'H', 'M') => -457, + ('M', 'H', 'O') => 123, + ('M', 'M', 'H') => -471, + ('N', 'N', 'H') => -1689, + ('N', 'N', 'O') => 662, + ('O', 'H', 'O') => -3393, + _ => 0, + }; + score += match &(c[3], c[4], c[5]) { + ('H', 'H', 'H') => -203, + ('H', 'H', 'I') => 1344, + ('H', 'H', 'K') => 365, + ('H', 'H', 'M') => -122, + ('H', 'H', 'N') => 182, + ('H', 'H', 'O') => 669, + ('H', 'I', 'H') => 804, + ('H', 'I', 'I') => 679, + ('H', 'O', 'H') => 446, + ('I', 'H', 'H') => 695, + ('I', 'H', 'O') => -2324, + ('I', 'I', 'H') => 321, + ('I', 'I', 'I') => 1497, + ('I', 'I', 'O') => 656, + ('I', 'O', 'O') => 54, + ('K', 'A', 'K') => 4845, + ('K', 'K', 'A') => 3386, + ('K', 'K', 'K') => 3065, + ('M', 'H', 'H') => -405, + ('M', 'H', 'I') => 201, + ('M', 'M', 'H') => -241, + ('M', 'M', 'M') => 661, + ('M', 'O', 'M') => 841, + _ => 0, + }; + score += match &(p[1], c[0], c[1], c[2]) { + ('B', 'H', 'H', 'H') => -227, + ('B', 'H', 'H', 'I') => 316, + ('B', 'H', 'I', 'H') => -132, + ('B', 'I', 'H', 'H') => 60, + ('B', 'I', 'I', 'I') => 1595, + ('B', 'N', 'H', 'H') => -744, + ('B', 'O', 'H', 'H') => 225, + ('B', 'O', 'O', 'O') => -908, + ('O', 'A', 'K', 'K') => 482, + ('O', 'H', 'H', 'H') => 281, + ('O', 'H', 'I', 'H') => 249, + ('O', 'I', 'H', 'I') => 200, + ('O', 'I', 'I', 'H') => -68, + _ => 0, + }; + score += match &(p[1], c[1], c[2], c[3]) { + ('B', 'I', 'H', 'H') => -1401, + ('B', 'I', 'I', 'I') => -1033, + ('B', 'K', 'A', 'K') => -543, + ('B', 'O', 'O', 'O') => -5591, + _ => 0, + }; + score += match &(p[2], c[0], c[1], c[2]) { + ('B', 'H', 'H', 'H') => 478, + ('B', 'H', 'H', 'M') => -1073, + ('B', 'H', 'I', 'H') => 222, + ('B', 'H', 'I', 'I') => -504, + ('B', 'I', 'I', 'H') => -116, + ('B', 'I', 'I', 'I') => -105, + ('B', 'M', 'H', 'I') => -863, + ('B', 'M', 'H', 'M') => -464, + ('B', 'O', 'M', 'H') => 620, + ('O', 'H', 'H', 'H') => 346, + ('O', 'H', 'H', 'I') => 1729, + ('O', 'H', 'I', 'I') => 997, + ('O', 'H', 'M', 'H') => 481, + ('O', 'I', 'H', 'H') => 623, + ('O', 'I', 'I', 'H') => 1344, + ('O', 'K', 'A', 'K') => 2792, + ('O', 'K', 'H', 'H') => 587, + ('O', 'K', 'K', 'A') => 679, + ('O', 'O', 'H', 'H') => 110, + ('O', 'O', 'I', 'I') => -685, + _ => 0, + }; + score += match &(p[2], c[1], c[2], c[3]) { + ('B', 'H', 'H', 'H') => -721, + ('B', 'H', 'H', 'M') => -3604, + ('B', 'H', 'I', 'I') => -966, + ('B', 'I', 'I', 'H') => -607, + ('B', 'I', 'I', 'I') => -2181, + ('O', 'A', 'A', 'A') => -2763, + ('O', 'A', 'K', 'K') => 180, + ('O', 'H', 'H', 'H') => -294, + ('O', 'H', 'H', 'I') => 2446, + ('O', 'H', 'H', 'O') => 480, + ('O', 'H', 'I', 'H') => -1573, + ('O', 'I', 'H', 'H') => 1935, + ('O', 'I', 'H', 'I') => -493, + ('O', 'I', 'I', 'H') => 626, + ('O', 'I', 'I', 'I') => -4007, + ('O', 'K', 'A', 'K') => -8156, + _ => 0, + }; + score += match &(w[0], w[1], w[2]) { + ('に', 'つ', 'い') => -4681, + ('東', '京', '都') => 2026, + _ => 0, + }; + score += match &(w[1], w[2], w[3]) { + ('あ', 'る', '程') => -2049, + ('い', 'っ', 'た') => -1256, + ('こ', 'ろ', 'が') => -2434, + ('し', 'ょ', 'う') => 3873, + ('そ', 'の', '後') => -4430, + ('だ', 'っ', 'て') => -1049, + ('て', 'い', 'た') => 1833, + ('と', 'し', 'て') => -4657, + ('と', 'も', 'に') => -4517, + ('も', 'の', 'で') => 1882, + ('一', '気', 'に') => -792, + ('初', 'め', 'て') => -1512, + ('同', '時', 'に') => -8097, + ('大', 'き', 'な') => -1255, + ('対', 'し', 'て') => -2721, + ('社', '会', '党') => -3216, + _ => 0, + }; + score += match &(w[2], w[3], w[4]) { + ('い', 'た', 'だ') => -1734, + ('し', 'て', 'い') => 1314, + ('と', 'し', 'て') => -4314, + ('に', 'つ', 'い') => -5483, + ('に', 'と', 'っ') => -5989, + ('に', '当', 'た') => -6247, + ('の', 'で', ',') => -727, + ('の', 'で', '、') => -727, + ('の', 'も', 'の') => -600, + ('れ', 'か', 'ら') => -3752, + ('十', '二', '月') => -2287, + _ => 0, + }; + score += match &(w[3], w[4], w[5]) { + ('い', 'う', '.') => 8576, + ('い', 'う', '。') => 8576, + ('か', 'ら', 'な') => -2348, + ('し', 'て', 'い') => 2958, + ('た', 'が', ',') => 1516, + ('た', 'が', '、') => 1516, + ('て', 'い', 'る') => 1538, + ('と', 'い', 'う') => 1349, + ('ま', 'し', 'た') => 5543, + ('ま', 'せ', 'ん') => 1097, + ('よ', 'う', 'と') => -4258, + ('よ', 'る', 'と') => 5865, + _ => 0, + }; + + score += match &c[0] { + 'A' => 484, + 'K' => 93, + 'M' => 645, + 'O' => -505, + _ => 0, + }; + score += match &c[1] { + 'A' => 819, + 'H' => 1059, + 'I' => 409, + 'M' => 3987, + 'N' => 5775, + 'O' => 646, + _ => 0, + }; + score += match &c[2] { + 'A' => -1370, + 'I' => 2311, + _ => 0, + }; + score += match &c[3] { + 'A' => -2643, + 'H' => 1809, + 'I' => -1032, + 'K' => -3450, + 'M' => 3565, + 'N' => 3876, + 'O' => 6646, + _ => 0, + }; + score += match &c[4] { + 'H' => 313, + 'I' => -1238, + 'K' => -799, + 'M' => 539, + 'O' => -831, + _ => 0, + }; + score += match &c[5] { + 'H' => -506, + 'I' => -253, + 'K' => 87, + 'M' => 247, + 'O' => -387, + _ => 0, + }; + + score += match &p[0] { + 'O' => -214, + _ => 0, + }; + score += match &p[1] { + 'B' => 69, + 'O' => 935, + _ => 0, + }; + score += match &p[2] { + 'B' => 189, + _ => 0, + }; + score += match &(p[0], c[0]) { + ('B', 'H') => 21, + ('B', 'I') => -12, + ('B', 'K') => -99, + ('B', 'N') => 142, + ('B', 'O') => -56, + ('O', 'H') => -95, + ('O', 'I') => 477, + ('O', 'K') => 410, + ('O', 'O') => -2422, + _ => 0, + }; + score += match &(p[1], c[1]) { + ('B', 'H') => 216, + ('B', 'I') => 113, + ('O', 'K') => 1759, + _ => 0, + }; + score += match &(p[2], c[2]) { + ('B', 'A') => -479, + ('B', 'H') => 42, + ('B', 'I') => 1913, + ('B', 'K') => -7198, + ('B', 'M') => 3160, + ('B', 'N') => 6427, + ('B', 'O') => 14761, + ('O', 'I') => -827, + ('O', 'N') => -3212, + _ => 0, + }; + score += match &w[0] { + ',' => 156, + '、' => 156, + '「' => -463, + 'あ' => -941, + 'う' => -127, + 'が' => -553, + 'き' => 121, + 'こ' => 505, + 'で' => -201, + 'と' => -547, + 'ど' => -123, + 'に' => -789, + 'の' => -185, + 'は' => -847, + 'も' => -466, + 'や' => -470, + 'よ' => 182, + 'ら' => -292, + 'り' => 208, + 'れ' => 169, + 'を' => -446, + 'ん' => -137, + '・' => -135, + '主' => -402, + '京' => -268, + '区' => -912, + '午' => 871, + '国' => -460, + '大' => 561, + '委' => 729, + '市' => -411, + '日' => -141, + '理' => 361, + '生' => -408, + '県' => -386, + '都' => -718, + '「' => -463, + '・' => -135, + _ => 0, + }; + score += match &w[1] { + ',' => -829, + '、' => -829, + '〇' => 892, + '「' => -645, + '」' => 3145, + 'あ' => -538, + 'い' => 505, + 'う' => 134, + 'お' => -502, + 'か' => 1454, + 'が' => -856, + 'く' => -412, + 'こ' => 1141, + 'さ' => 878, + 'ざ' => 540, + 'し' => 1529, + 'す' => -675, + 'せ' => 300, + 'そ' => -1011, + 'た' => 188, + 'だ' => 1837, + 'つ' => -949, + 'て' => -291, + 'で' => -268, + 'と' => -981, + 'ど' => 1273, + 'な' => 1063, + 'に' => -1764, + 'の' => 130, + 'は' => -409, + 'ひ' => -1273, + 'べ' => 1261, + 'ま' => 600, + 'も' => -1263, + 'や' => -402, + 'よ' => 1639, + 'り' => -579, + 'る' => -694, + 'れ' => 571, + 'を' => -2516, + 'ん' => 2095, + 'ア' => -587, + 'カ' => 306, + 'キ' => 568, + 'ッ' => 831, + '三' => -758, + '不' => -2150, + '世' => -302, + '中' => -968, + '主' => -861, + '事' => 492, + '人' => -123, + '会' => 978, + '保' => 362, + '入' => 548, + '初' => -3025, + '副' => -1566, + '北' => -3414, + '区' => -422, + '大' => -1769, + '天' => -865, + '太' => -483, + '子' => -1519, + '学' => 760, + '実' => 1023, + '小' => -2009, + '市' => -813, + '年' => -1060, + '強' => 1067, + '手' => -1519, + '揺' => -1033, + '政' => 1522, + '文' => -1355, + '新' => -1682, + '日' => -1815, + '明' => -1462, + '最' => -630, + '朝' => -1843, + '本' => -1650, + '東' => -931, + '果' => -665, + '次' => -2378, + '民' => -180, + '気' => -1740, + '理' => 752, + '発' => 529, + '目' => -1584, + '相' => -242, + '県' => -1165, + '立' => -763, + '第' => 810, + '米' => 509, + '自' => -1353, + '行' => 838, + '西' => -744, + '見' => -3874, + '調' => 1010, + '議' => 1198, + '込' => 3041, + '開' => 1758, + '間' => -1257, + '「' => -645, + '」' => 3145, + 'ッ' => 831, + 'ア' => -587, + 'カ' => 306, + 'キ' => 568, + _ => 0, + }; + score += match &w[2] { + ',' => 4889, + '1' => -800, + '−' => -1723, + '、' => 4889, + '々' => -2311, + '〇' => 5827, + '」' => 2670, + '〓' => -3573, + 'あ' => -2696, + 'い' => 1006, + 'う' => 2342, + 'え' => 1983, + 'お' => -4864, + 'か' => -1163, + 'が' => 3271, + 'く' => 1004, + 'け' => 388, + 'げ' => 401, + 'こ' => -3552, + 'ご' => -3116, + 'さ' => -1058, + 'し' => -395, + 'す' => 584, + 'せ' => 3685, + 'そ' => -5228, + 'た' => 842, + 'ち' => -521, + 'っ' => -1444, + 'つ' => -1081, + 'て' => 6167, + 'で' => 2318, + 'と' => 1691, + 'ど' => -899, + 'な' => -2788, + 'に' => 2745, + 'の' => 4056, + 'は' => 4555, + 'ひ' => -2171, + 'ふ' => -1798, + 'へ' => 1199, + 'ほ' => -5516, + 'ま' => -4384, + 'み' => -120, + 'め' => 1205, + 'も' => 2323, + 'や' => -788, + 'よ' => -202, + 'ら' => 727, + 'り' => 649, + 'る' => 5905, + 'れ' => 2773, + 'わ' => -1207, + 'を' => 6620, + 'ん' => -518, + 'ア' => 551, + 'グ' => 1319, + 'ス' => 874, + 'ッ' => -1350, + 'ト' => 521, + 'ム' => 1109, + 'ル' => 1591, + 'ロ' => 2201, + 'ン' => 278, + '・' => -3794, + '一' => -1619, + '下' => -1759, + '世' => -2087, + '両' => 3815, + '中' => 653, + '主' => -758, + '予' => -1193, + '二' => 974, + '人' => 2742, + '今' => 792, + '他' => 1889, + '以' => -1368, + '低' => 811, + '何' => 4265, + '作' => -361, + '保' => -2439, + '元' => 4858, + '党' => 3593, + '全' => 1574, + '公' => -3030, + '六' => 755, + '共' => -1880, + '円' => 5807, + '再' => 3095, + '分' => 457, + '初' => 2475, + '別' => 1129, + '前' => 2286, + '副' => 4437, + '力' => 365, + '動' => -949, + '務' => -1872, + '化' => 1327, + '北' => -1038, + '区' => 4646, + '千' => -2309, + '午' => -783, + '協' => -1006, + '口' => 483, + '右' => 1233, + '各' => 3588, + '合' => -241, + '同' => 3906, + '和' => -837, + '員' => 4513, + '国' => 642, + '型' => 1389, + '場' => 1219, + '外' => -241, + '妻' => 2016, + '学' => -1356, + '安' => -423, + '実' => -1008, + '家' => 1078, + '小' => -513, + '少' => -3102, + '州' => 1155, + '市' => 3197, + '平' => -1804, + '年' => 2416, + '広' => -1030, + '府' => 1605, + '度' => 1452, + '建' => -2352, + '当' => -3885, + '得' => 1905, + '思' => -1291, + '性' => 1822, + '戸' => -488, + '指' => -3973, + '政' => -2013, + '教' => -1479, + '数' => 3222, + '文' => -1489, + '新' => 1764, + '日' => 2099, + '旧' => 5792, + '昨' => -661, + '時' => -1248, + '曜' => -951, + '最' => -937, + '月' => 4125, + '期' => 360, + '李' => 3094, + '村' => 364, + '東' => -805, + '核' => 5156, + '森' => 2438, + '業' => 484, + '氏' => 2613, + '民' => -1694, + '決' => -1073, + '法' => 1868, + '海' => -495, + '無' => 979, + '物' => 461, + '特' => -3850, + '生' => -273, + '用' => 914, + '町' => 1215, + '的' => 7313, + '直' => -1835, + '省' => 792, + '県' => 6293, + '知' => -1528, + '私' => 4231, + '税' => 401, + '立' => -960, + '第' => 1201, + '米' => 7767, + '系' => 3066, + '約' => 3663, + '級' => 1384, + '統' => -4229, + '総' => 1163, + '線' => 1255, + '者' => 6457, + '能' => 725, + '自' => -2869, + '英' => 785, + '見' => 1044, + '調' => -562, + '財' => -733, + '費' => 1777, + '車' => 1835, + '軍' => 1375, + '込' => -1504, + '通' => -1136, + '選' => -681, + '郎' => 1026, + '郡' => 4404, + '部' => 1200, + '金' => 2163, + '長' => 421, + '開' => -1432, + '間' => 1302, + '関' => -1282, + '雨' => 2009, + '電' => -1045, + '非' => 2066, + '駅' => 1620, + '1' => -800, + '」' => 2670, + '・' => -3794, + 'ッ' => -1350, + 'ア' => 551, + 'ス' => 874, + 'ト' => 521, + 'ム' => 1109, + 'ル' => 1591, + 'ロ' => 2201, + 'ン' => 278, + _ => 0, + }; + score += match &w[3] { + ',' => 3930, + '.' => 3508, + '―' => -4841, + '、' => 3930, + '。' => 3508, + '〇' => 4999, + '「' => 1895, + '」' => 3798, + '〓' => -5156, + 'あ' => 4752, + 'い' => -3435, + 'う' => -640, + 'え' => -2514, + 'お' => 2405, + 'か' => 530, + 'が' => 6006, + 'き' => -4482, + 'ぎ' => -3821, + 'く' => -3788, + 'け' => -4376, + 'げ' => -4734, + 'こ' => 2255, + 'ご' => 1979, + 'さ' => 2864, + 'し' => -843, + 'じ' => -2506, + 'す' => -731, + 'ず' => 1251, + 'せ' => 181, + 'そ' => 4091, + 'た' => 5034, + 'だ' => 5408, + 'ち' => -3654, + 'っ' => -5882, + 'つ' => -1659, + 'て' => 3994, + 'で' => 7410, + 'と' => 4547, + 'な' => 5433, + 'に' => 6499, + 'ぬ' => 1853, + 'ね' => 1413, + 'の' => 7396, + 'は' => 8578, + 'ば' => 1940, + 'ひ' => 4249, + 'び' => -4134, + 'ふ' => 1345, + 'へ' => 6665, + 'べ' => -744, + 'ほ' => 1464, + 'ま' => 1051, + 'み' => -2082, + 'む' => -882, + 'め' => -5046, + 'も' => 4169, + 'ゃ' => -2666, + 'や' => 2795, + 'ょ' => -1544, + 'よ' => 3351, + 'ら' => -2922, + 'り' => -9726, + 'る' => -14896, + 'れ' => -2613, + 'ろ' => -4570, + 'わ' => -1783, + 'を' => 13150, + 'ん' => -2352, + 'カ' => 2145, + 'コ' => 1789, + 'セ' => 1287, + 'ッ' => -724, + 'ト' => -403, + 'メ' => -1635, + 'ラ' => -881, + 'リ' => -541, + 'ル' => -856, + 'ン' => -3637, + '・' => -4371, + 'ー' => -11870, + '一' => -2069, + '中' => 2210, + '予' => 782, + '事' => -190, + '井' => -1768, + '人' => 1036, + '以' => 544, + '会' => 950, + '体' => -1286, + '作' => 530, + '側' => 4292, + '先' => 601, + '党' => -2006, + '共' => -1212, + '内' => 584, + '円' => 788, + '初' => 1347, + '前' => 1623, + '副' => 3879, + '力' => -302, + '動' => -740, + '務' => -2715, + '化' => 776, + '区' => 4517, + '協' => 1013, + '参' => 1555, + '合' => -1834, + '和' => -681, + '員' => -910, + '器' => -851, + '回' => 1500, + '国' => -619, + '園' => -1200, + '地' => 866, + '場' => -1410, + '塁' => -2094, + '士' => -1413, + '多' => 1067, + '大' => 571, + '子' => -4802, + '学' => -1397, + '定' => -1057, + '寺' => -809, + '小' => 1910, + '屋' => -1328, + '山' => -1500, + '島' => -2056, + '川' => -2667, + '市' => 2771, + '年' => 374, + '庁' => -4556, + '後' => 456, + '性' => 553, + '感' => 916, + '所' => -1566, + '支' => 856, + '改' => 787, + '政' => 2182, + '教' => 704, + '文' => 522, + '方' => -856, + '日' => 1798, + '時' => 1829, + '最' => 845, + '月' => -9066, + '木' => -485, + '来' => -442, + '校' => -360, + '業' => -1043, + '氏' => 5388, + '民' => -2716, + '気' => -910, + '沢' => -939, + '済' => -543, + '物' => -735, + '率' => 672, + '球' => -1267, + '生' => -1286, + '産' => -1101, + '田' => -2900, + '町' => 1826, + '的' => 2586, + '目' => 922, + '省' => -3485, + '県' => 2997, + '空' => -867, + '立' => -2112, + '第' => 788, + '米' => 2937, + '系' => 786, + '約' => 2171, + '経' => 1146, + '統' => -1169, + '総' => 940, + '線' => -994, + '署' => 749, + '者' => 2145, + '能' => -730, + '般' => -852, + '行' => -792, + '規' => 792, + '警' => -1184, + '議' => -244, + '谷' => -1000, + '賞' => 730, + '車' => -1481, + '軍' => 1158, + '輪' => -1433, + '込' => -3370, + '近' => 929, + '道' => -1291, + '選' => 2596, + '郎' => -4866, + '都' => 1192, + '野' => -1100, + '銀' => -2213, + '長' => 357, + '間' => -2344, + '院' => -2297, + '際' => -2604, + '電' => -878, + '領' => -1659, + '題' => -792, + '館' => -1984, + '首' => 1749, + '高' => 2120, + '「' => 1895, + '」' => 3798, + '・' => -4371, + 'ッ' => -724, + 'ー' => -11870, + 'カ' => 2145, + 'コ' => 1789, + 'セ' => 1287, + 'ト' => -403, + 'メ' => -1635, + 'ラ' => -881, + 'リ' => -541, + 'ル' => -856, + 'ン' => -3637, + _ => 0, + }; + score += match &w[4] { + ',' => 465, + '.' => -299, + '1' => -514, + ']' => -2762, + '、' => 465, + '。' => -299, + '「' => 363, + 'あ' => 1655, + 'い' => 331, + 'う' => -503, + 'え' => 1199, + 'お' => 527, + 'か' => 647, + 'が' => -421, + 'き' => 1624, + 'ぎ' => 1971, + 'く' => 312, + 'げ' => -983, + 'さ' => -1537, + 'し' => -1371, + 'す' => -852, + 'だ' => -1186, + 'ち' => 1093, + 'っ' => 52, + 'つ' => 921, + 'て' => -18, + 'で' => -850, + 'と' => -127, + 'ど' => 1682, + 'な' => -787, + 'に' => -1224, + 'の' => -635, + 'は' => -578, + 'べ' => 1001, + 'み' => 502, + 'め' => 865, + 'ゃ' => 3350, + 'ょ' => 854, + 'り' => -208, + 'る' => 429, + 'れ' => 504, + 'わ' => 419, + 'を' => -1264, + 'ん' => 327, + 'イ' => 241, + 'ル' => 451, + 'ン' => -343, + '中' => -871, + '京' => 722, + '会' => -1153, + '党' => -654, + '務' => 3519, + '区' => -901, + '告' => 848, + '員' => 2104, + '大' => -1296, + '学' => -548, + '定' => 1785, + '嵐' => -1304, + '市' => -2991, + '席' => 921, + '年' => 1763, + '思' => 872, + '所' => -814, + '挙' => 1618, + '新' => -1682, + '日' => 218, + '月' => -4353, + '査' => 932, + '格' => 1356, + '機' => -1508, + '氏' => -1347, + '田' => 240, + '町' => -3912, + '的' => -3149, + '相' => 1319, + '省' => -1052, + '県' => -4003, + '研' => -997, + '社' => -278, + '空' => -813, + '統' => 1955, + '者' => -2233, + '表' => 663, + '語' => -1073, + '議' => 1219, + '選' => -1018, + '郎' => -368, + '長' => 786, + '間' => 1191, + '題' => 2368, + '館' => -689, + '1' => -514, + '「' => 363, + 'イ' => 241, + 'ル' => 451, + 'ン' => -343, + _ => 0, + }; + if w[4] == *E2 { + score += -32768; + } + score += match &w[5] { + ',' => 227, + '.' => 808, + '1' => -270, + '、' => 227, + '。' => 808, + 'あ' => -307, + 'う' => 189, + 'か' => 241, + 'が' => -73, + 'く' => -121, + 'こ' => -200, + 'じ' => 1782, + 'す' => 383, + 'た' => -428, + 'っ' => 573, + 'て' => -1014, + 'で' => 101, + 'と' => -105, + 'な' => -253, + 'に' => -149, + 'の' => -417, + 'は' => -236, + 'も' => -206, + 'り' => 187, + 'る' => -135, + 'を' => 195, + 'ル' => -673, + 'ン' => -496, + '一' => -277, + '中' => 201, + '件' => -800, + '会' => 624, + '前' => 302, + '区' => 1792, + '員' => -1212, + '委' => 798, + '学' => -960, + '市' => 887, + '広' => -695, + '後' => 535, + '業' => -697, + '相' => 753, + '社' => -507, + '福' => 974, + '空' => -822, + '者' => 1811, + '連' => 463, + '郎' => 1082, + '1' => -270, + 'ル' => -673, + 'ン' => -496, + _ => 0, + }; + if w[5] == *E1 { + score += 306; + } - score += match &(w[2], w[3]) { ('.', '.') => -11822, ('1', '1') => -669, ('―', '―') => -5730, ('−', '−') => -13175, ('い', 'う') => -1609, ('う', 'か') => 2490, ('か', 'し') => -1350, ('か', 'も') => -602, ('か', 'ら') => -7194, ('か', 'れ') => 4612, ('が', 'い') => 853, ('が', 'ら') => -3198, ('き', 'た') => 1941, ('く', 'な') => -1597, ('こ', 'と') => -8392, ('こ', 'の') => -4193, ('さ', 'せ') => 4533, ('さ', 'れ') => 13168, ('さ', 'ん') => -3977, ('し', 'い') => -1819, ('し', 'か') => -545, ('し', 'た') => 5078, ('し', 'て') => 972, ('し', 'な') => 939, ('そ', 'の') => -3744, ('た', 'い') => -1253, ('た', 'た') => -662, ('た', 'だ') => -3857, ('た', 'ち') => -786, ('た', 'と') => 1224, ('た', 'は') => -939, ('っ', 'た') => 4589, ('っ', 'て') => 1647, ('っ', 'と') => -2094, ('て', 'い') => 6144, ('て', 'き') => 3640, ('て', 'く') => 2551, ('て', 'は') => -3110, ('て', 'も') => -3065, ('で', 'い') => 2666, ('で', 'き') => -1528, ('で', 'し') => -3828, ('で', 'す') => -4761, ('で', 'も') => -4203, ('と', 'い') => 1890, ('と', 'こ') => -1746, ('と', 'と') => -2279, ('と', 'の') => 720, ('と', 'み') => 5168, ('と', 'も') => -3941, ('な', 'い') => -2488, ('な', 'が') => -1313, ('な', 'ど') => -6509, ('な', 'の') => 2614, ('な', 'ん') => 3099, ('に', 'お') => -1615, ('に', 'し') => 2748, ('に', 'な') => 2454, ('に', 'よ') => -7236, ('に', '対') => -14943, ('に', '従') => -4688, ('に', '関') => -11388, ('の', 'か') => 2093, ('の', 'で') => -7059, ('の', 'に') => -6041, ('の', 'の') => -6125, ('は', 'い') => 1073, ('は', 'が') => -1033, ('は', 'ず') => -2532, ('ば', 'れ') => 1813, ('ま', 'し') => -1316, ('ま', 'で') => -6621, ('ま', 'れ') => 5409, ('め', 'て') => -3153, ('も', 'い') => 2230, ('も', 'の') => -10713, ('ら', 'か') => -944, ('ら', 'し') => -1611, ('ら', 'に') => -1897, ('り', 'し') => 651, ('り', 'ま') => 1620, ('れ', 'た') => 4270, ('れ', 'て') => 849, ('れ', 'ば') => 4114, ('ろ', 'う') => 6067, ('わ', 'れ') => 7901, ('を', '通') => -11877, ('ん', 'だ') => 728, ('ん', 'な') => -4115, ('一', '人') => 602, ('一', '方') => -1375, ('一', '日') => 970, ('一', '部') => -1051, ('上', 'が') => -4479, ('会', '社') => -1116, ('出', 'て') => 2163, ('分', 'の') => -7758, ('同', '党') => 970, ('同', '日') => -913, ('大', '阪') => -2471, ('委', '員') => -1250, ('少', 'な') => -1050, ('年', '度') => -8669, ('年', '間') => -1626, ('府', '県') => -2363, ('手', '権') => -1982, ('新', '聞') => -4066, ('日', '新') => -722, ('日', '本') => -7068, ('日', '米') => 3372, ('曜', '日') => -601, ('朝', '鮮') => -2355, ('本', '人') => -2697, ('東', '京') => -1543, ('然', 'と') => -1384, ('社', '会') => -1276, ('立', 'て') => -990, ('第', 'に') => -1612, ('米', '国') => -4268, ('1', '1') => -669, ('ク', '゙') => 1319,_ => 0}; - score += match &(w[3], w[4]) { ('あ', 'た') => -2194, ('あ', 'り') => 719, ('あ', 'る') => 3846, ('い', '.') => -1185, ('い', '。') => -1185, ('い', 'い') => 5308, ('い', 'え') => 2079, ('い', 'く') => 3029, ('い', 'た') => 2056, ('い', 'っ') => 1883, ('い', 'る') => 5600, ('い', 'わ') => 1527, ('う', 'ち') => 1117, ('う', 'と') => 4798, ('え', 'と') => 1454, ('か', '.') => 2857, ('か', '。') => 2857, ('か', 'け') => -743, ('か', 'っ') => -4098, ('か', 'に') => -669, ('か', 'ら') => 6520, ('か', 'り') => -2670, ('が', ',') => 1816, ('が', '、') => 1816, ('が', 'き') => -4855, ('が', 'け') => -1127, ('が', 'っ') => -913, ('が', 'ら') => -4977, ('が', 'り') => -2064, ('き', 'た') => 1645, ('け', 'ど') => 1374, ('こ', 'と') => 7397, ('こ', 'の') => 1542, ('こ', 'ろ') => -2757, ('さ', 'い') => -714, ('さ', 'を') => 976, ('し', ',') => 1557, ('し', '、') => 1557, ('し', 'い') => -3714, ('し', 'た') => 3562, ('し', 'て') => 1449, ('し', 'な') => 2608, ('し', 'ま') => 1200, ('す', '.') => -1310, ('す', '。') => -1310, ('す', 'る') => 6521, ('ず', ',') => 3426, ('ず', '、') => 3426, ('ず', 'に') => 841, ('そ', 'う') => 428, ('た', '.') => 8875, ('た', '。') => 8875, ('た', 'い') => -594, ('た', 'の') => 812, ('た', 'り') => -1183, ('た', 'る') => -853, ('だ', '.') => 4098, ('だ', '。') => 4098, ('だ', 'っ') => 1004, ('っ', 'た') => -4748, ('っ', 'て') => 300, ('て', 'い') => 6240, ('て', 'お') => 855, ('て', 'も') => 302, ('で', 'す') => 1437, ('で', 'に') => -1482, ('で', 'は') => 2295, ('と', 'う') => -1387, ('と', 'し') => 2266, ('と', 'の') => 541, ('と', 'も') => -3543, ('ど', 'う') => 4664, ('な', 'い') => 1796, ('な', 'く') => -903, ('な', 'ど') => 2135, ('に', ',') => -1021, ('に', '、') => -1021, ('に', 'し') => 1771, ('に', 'な') => 1906, ('に', 'は') => 2644, ('の', ',') => -724, ('の', '、') => -724, ('の', '子') => -1000, ('は', ',') => 1337, ('は', '、') => 1337, ('べ', 'き') => 2181, ('ま', 'し') => 1113, ('ま', 'す') => 6943, ('ま', 'っ') => -1549, ('ま', 'で') => 6154, ('ま', 'れ') => -793, ('ら', 'し') => 1479, ('ら', 'れ') => 6820, ('る', 'る') => 3818, ('れ', ',') => 854, ('れ', '、') => 854, ('れ', 'た') => 1850, ('れ', 'て') => 1375, ('れ', 'ば') => -3246, ('れ', 'る') => 1091, ('わ', 'れ') => -605, ('ん', 'だ') => 606, ('ん', 'で') => 798, ('カ', '月') => 990, ('会', '議') => 860, ('入', 'り') => 1232, ('大', '会') => 2217, ('始', 'め') => 1681, ('市', ' ') => 965, ('新', '聞') => -5055, ('日', ',') => 974, ('日', '、') => 974, ('社', '会') => 2024, ('カ', '月') => 990, _ => 0}; + score += match &(c[1], c[2]) { + ('H', 'H') => 6, + ('I', 'I') => 2461, + ('K', 'H') => 406, + ('O', 'H') => -1378, + _ => 0, + }; + score += match &(c[2], c[3]) { + ('A', 'A') => -3267, + ('A', 'I') => 2744, + ('A', 'N') => -878, + ('H', 'H') => -4070, + ('H', 'M') => -1711, + ('H', 'N') => 4012, + ('H', 'O') => 3761, + ('I', 'A') => 1327, + ('I', 'H') => -1184, + ('I', 'I') => -1332, + ('I', 'K') => 1721, + ('I', 'O') => 5492, + ('K', 'I') => 3831, + ('K', 'K') => -8741, + ('M', 'H') => -3132, + ('M', 'K') => 3334, + ('O', 'O') => -2920, + _ => 0, + }; + score += match &(c[3], c[4]) { + ('H', 'H') => 996, + ('H', 'I') => 626, + ('H', 'K') => -721, + ('H', 'N') => -1307, + ('H', 'O') => -836, + ('I', 'H') => -301, + ('K', 'K') => 2762, + ('M', 'K') => 1079, + ('M', 'M') => 4034, + ('O', 'A') => -1652, + ('O', 'H') => 266, + _ => 0, + }; + score += match &(p[0], p[1]) { + ('B', 'B') => 295, + ('O', 'B') => 304, + ('O', 'O') => -125, + ('U', 'B') => 352, + _ => 0, + }; + score += match &(p[1], p[2]) { + ('B', 'O') => 60, + ('O', 'O') => -1762, + _ => 0, + }; + score += match &(p[1], c[1], c[2]) { + ('B', 'H', 'H') => 1150, + ('B', 'H', 'M') => 1521, + ('B', 'I', 'I') => -1158, + ('B', 'I', 'M') => 886, + ('B', 'M', 'H') => 1208, + ('B', 'N', 'H') => 449, + ('B', 'O', 'H') => -91, + ('B', 'O', 'O') => -2597, + ('O', 'H', 'I') => 451, + ('O', 'I', 'H') => -296, + ('O', 'K', 'A') => 1851, + ('O', 'K', 'H') => -1020, + ('O', 'K', 'K') => 904, + ('O', 'O', 'O') => 2965, + _ => 0, + }; + score += match &(p[1], c[2], c[3]) { + ('B', 'H', 'H') => 118, + ('B', 'H', 'I') => -1159, + ('B', 'H', 'M') => 466, + ('B', 'I', 'H') => -919, + ('B', 'K', 'K') => -1720, + ('B', 'K', 'O') => 864, + ('O', 'H', 'H') => -1139, + ('O', 'H', 'M') => -181, + ('O', 'I', 'H') => 153, + ('U', 'H', 'I') => -1146, + _ => 0, + }; + score += match &(p[2], c[1], c[2]) { + ('B', 'H', 'H') => -792, + ('B', 'H', 'I') => 2664, + ('B', 'I', 'I') => -299, + ('B', 'K', 'I') => 419, + ('B', 'M', 'H') => 937, + ('B', 'M', 'M') => 8335, + ('B', 'N', 'N') => 998, + ('B', 'O', 'H') => 775, + ('O', 'H', 'H') => 2174, + ('O', 'H', 'M') => 439, + ('O', 'I', 'I') => 280, + ('O', 'K', 'H') => 1798, + ('O', 'K', 'I') => -793, + ('O', 'K', 'O') => -2242, + ('O', 'M', 'H') => -2402, + ('O', 'O', 'O') => 11699, + _ => 0, + }; + score += match &(p[2], c[2], c[3]) { + ('B', 'H', 'H') => -3895, + ('B', 'I', 'H') => 3761, + ('B', 'I', 'I') => -4654, + ('B', 'I', 'K') => 1348, + ('B', 'K', 'K') => -1806, + ('B', 'M', 'I') => -3385, + ('B', 'O', 'O') => -12396, + ('O', 'A', 'H') => 926, + ('O', 'H', 'H') => 266, + ('O', 'H', 'K') => -2036, + ('O', 'N', 'N') => -973, + _ => 0, + }; + score += match &(w[1], w[2]) { + (',', 'と') => 660, + (',', '同') => 727, + ('、', 'と') => 660, + ('、', '同') => 727, + ('」', 'と') => 1682, + ('あ', 'っ') => 1505, + ('い', 'う') => 1743, + ('い', 'っ') => -2055, + ('い', 'る') => 672, + ('う', 'し') => -4817, + ('う', 'ん') => 665, + ('か', 'ら') => 3472, + ('が', 'ら') => 600, + ('こ', 'う') => -790, + ('こ', 'と') => 2083, + ('こ', 'ん') => -1262, + ('さ', 'ら') => -4143, + ('さ', 'ん') => 4573, + ('し', 'た') => 2641, + ('し', 'て') => 1104, + ('す', 'で') => -3399, + ('そ', 'こ') => 1977, + ('そ', 'れ') => -871, + ('た', 'ち') => 1122, + ('た', 'め') => 601, + ('っ', 'た') => 3463, + ('つ', 'い') => -802, + ('て', 'い') => 805, + ('て', 'き') => 1249, + ('で', 'き') => 1127, + ('で', 'す') => 3445, + ('で', 'は') => 844, + ('と', 'い') => -4915, + ('と', 'み') => 1922, + ('ど', 'こ') => 3887, + ('な', 'い') => 5713, + ('な', 'っ') => 3015, + ('な', 'ど') => 7379, + ('な', 'ん') => -1113, + ('に', 'し') => 2468, + ('に', 'は') => 1498, + ('に', 'も') => 1671, + ('に', '対') => -912, + ('の', '一') => -501, + ('の', '中') => 741, + ('ま', 'せ') => 2448, + ('ま', 'で') => 1711, + ('ま', 'ま') => 2600, + ('ま', 'る') => -2155, + ('や', 'む') => -1947, + ('よ', 'っ') => -2565, + ('れ', 'た') => 2369, + ('れ', 'で') => -913, + ('を', 'し') => 1860, + ('を', '見') => 731, + ('亡', 'く') => -1886, + ('京', '都') => 2558, + ('取', 'り') => -2784, + ('大', 'き') => -2604, + ('大', '阪') => 1497, + ('平', '方') => -2314, + ('引', 'き') => -1336, + ('日', '本') => -195, + ('本', '当') => -2423, + ('毎', '日') => -2113, + ('目', '指') => -724, + ('」', 'と') => 1682, + _ => 0, + }; + if (w[1], w[2]) == (*B1, 'あ') { + score += 1404; + } + if (w[1], w[2]) == (*B1, '同') { + score += 542; + } - p.remove(0); - p.push(if score < 0 { 'O' } else { 'B' }); + score += match &(w[2], w[3]) { + ('.', '.') => -11822, + ('1', '1') => -669, + ('―', '―') => -5730, + ('−', '−') => -13175, + ('い', 'う') => -1609, + ('う', 'か') => 2490, + ('か', 'し') => -1350, + ('か', 'も') => -602, + ('か', 'ら') => -7194, + ('か', 'れ') => 4612, + ('が', 'い') => 853, + ('が', 'ら') => -3198, + ('き', 'た') => 1941, + ('く', 'な') => -1597, + ('こ', 'と') => -8392, + ('こ', 'の') => -4193, + ('さ', 'せ') => 4533, + ('さ', 'れ') => 13168, + ('さ', 'ん') => -3977, + ('し', 'い') => -1819, + ('し', 'か') => -545, + ('し', 'た') => 5078, + ('し', 'て') => 972, + ('し', 'な') => 939, + ('そ', 'の') => -3744, + ('た', 'い') => -1253, + ('た', 'た') => -662, + ('た', 'だ') => -3857, + ('た', 'ち') => -786, + ('た', 'と') => 1224, + ('た', 'は') => -939, + ('っ', 'た') => 4589, + ('っ', 'て') => 1647, + ('っ', 'と') => -2094, + ('て', 'い') => 6144, + ('て', 'き') => 3640, + ('て', 'く') => 2551, + ('て', 'は') => -3110, + ('て', 'も') => -3065, + ('で', 'い') => 2666, + ('で', 'き') => -1528, + ('で', 'し') => -3828, + ('で', 'す') => -4761, + ('で', 'も') => -4203, + ('と', 'い') => 1890, + ('と', 'こ') => -1746, + ('と', 'と') => -2279, + ('と', 'の') => 720, + ('と', 'み') => 5168, + ('と', 'も') => -3941, + ('な', 'い') => -2488, + ('な', 'が') => -1313, + ('な', 'ど') => -6509, + ('な', 'の') => 2614, + ('な', 'ん') => 3099, + ('に', 'お') => -1615, + ('に', 'し') => 2748, + ('に', 'な') => 2454, + ('に', 'よ') => -7236, + ('に', '対') => -14943, + ('に', '従') => -4688, + ('に', '関') => -11388, + ('の', 'か') => 2093, + ('の', 'で') => -7059, + ('の', 'に') => -6041, + ('の', 'の') => -6125, + ('は', 'い') => 1073, + ('は', 'が') => -1033, + ('は', 'ず') => -2532, + ('ば', 'れ') => 1813, + ('ま', 'し') => -1316, + ('ま', 'で') => -6621, + ('ま', 'れ') => 5409, + ('め', 'て') => -3153, + ('も', 'い') => 2230, + ('も', 'の') => -10713, + ('ら', 'か') => -944, + ('ら', 'し') => -1611, + ('ら', 'に') => -1897, + ('り', 'し') => 651, + ('り', 'ま') => 1620, + ('れ', 'た') => 4270, + ('れ', 'て') => 849, + ('れ', 'ば') => 4114, + ('ろ', 'う') => 6067, + ('わ', 'れ') => 7901, + ('を', '通') => -11877, + ('ん', 'だ') => 728, + ('ん', 'な') => -4115, + ('一', '人') => 602, + ('一', '方') => -1375, + ('一', '日') => 970, + ('一', '部') => -1051, + ('上', 'が') => -4479, + ('会', '社') => -1116, + ('出', 'て') => 2163, + ('分', 'の') => -7758, + ('同', '党') => 970, + ('同', '日') => -913, + ('大', '阪') => -2471, + ('委', '員') => -1250, + ('少', 'な') => -1050, + ('年', '度') => -8669, + ('年', '間') => -1626, + ('府', '県') => -2363, + ('手', '権') => -1982, + ('新', '聞') => -4066, + ('日', '新') => -722, + ('日', '本') => -7068, + ('日', '米') => 3372, + ('曜', '日') => -601, + ('朝', '鮮') => -2355, + ('本', '人') => -2697, + ('東', '京') => -1543, + ('然', 'と') => -1384, + ('社', '会') => -1276, + ('立', 'て') => -990, + ('第', 'に') => -1612, + ('米', '国') => -4268, + ('1', '1') => -669, + ('ク', '゙') => 1319, + _ => 0, + }; + score += match &(w[3], w[4]) { + ('あ', 'た') => -2194, + ('あ', 'り') => 719, + ('あ', 'る') => 3846, + ('い', '.') => -1185, + ('い', '。') => -1185, + ('い', 'い') => 5308, + ('い', 'え') => 2079, + ('い', 'く') => 3029, + ('い', 'た') => 2056, + ('い', 'っ') => 1883, + ('い', 'る') => 5600, + ('い', 'わ') => 1527, + ('う', 'ち') => 1117, + ('う', 'と') => 4798, + ('え', 'と') => 1454, + ('か', '.') => 2857, + ('か', '。') => 2857, + ('か', 'け') => -743, + ('か', 'っ') => -4098, + ('か', 'に') => -669, + ('か', 'ら') => 6520, + ('か', 'り') => -2670, + ('が', ',') => 1816, + ('が', '、') => 1816, + ('が', 'き') => -4855, + ('が', 'け') => -1127, + ('が', 'っ') => -913, + ('が', 'ら') => -4977, + ('が', 'り') => -2064, + ('き', 'た') => 1645, + ('け', 'ど') => 1374, + ('こ', 'と') => 7397, + ('こ', 'の') => 1542, + ('こ', 'ろ') => -2757, + ('さ', 'い') => -714, + ('さ', 'を') => 976, + ('し', ',') => 1557, + ('し', '、') => 1557, + ('し', 'い') => -3714, + ('し', 'た') => 3562, + ('し', 'て') => 1449, + ('し', 'な') => 2608, + ('し', 'ま') => 1200, + ('す', '.') => -1310, + ('す', '。') => -1310, + ('す', 'る') => 6521, + ('ず', ',') => 3426, + ('ず', '、') => 3426, + ('ず', 'に') => 841, + ('そ', 'う') => 428, + ('た', '.') => 8875, + ('た', '。') => 8875, + ('た', 'い') => -594, + ('た', 'の') => 812, + ('た', 'り') => -1183, + ('た', 'る') => -853, + ('だ', '.') => 4098, + ('だ', '。') => 4098, + ('だ', 'っ') => 1004, + ('っ', 'た') => -4748, + ('っ', 'て') => 300, + ('て', 'い') => 6240, + ('て', 'お') => 855, + ('て', 'も') => 302, + ('で', 'す') => 1437, + ('で', 'に') => -1482, + ('で', 'は') => 2295, + ('と', 'う') => -1387, + ('と', 'し') => 2266, + ('と', 'の') => 541, + ('と', 'も') => -3543, + ('ど', 'う') => 4664, + ('な', 'い') => 1796, + ('な', 'く') => -903, + ('な', 'ど') => 2135, + ('に', ',') => -1021, + ('に', '、') => -1021, + ('に', 'し') => 1771, + ('に', 'な') => 1906, + ('に', 'は') => 2644, + ('の', ',') => -724, + ('の', '、') => -724, + ('の', '子') => -1000, + ('は', ',') => 1337, + ('は', '、') => 1337, + ('べ', 'き') => 2181, + ('ま', 'し') => 1113, + ('ま', 'す') => 6943, + ('ま', 'っ') => -1549, + ('ま', 'で') => 6154, + ('ま', 'れ') => -793, + ('ら', 'し') => 1479, + ('ら', 'れ') => 6820, + ('る', 'る') => 3818, + ('れ', ',') => 854, + ('れ', '、') => 854, + ('れ', 'た') => 1850, + ('れ', 'て') => 1375, + ('れ', 'ば') => -3246, + ('れ', 'る') => 1091, + ('わ', 'れ') => -605, + ('ん', 'だ') => 606, + ('ん', 'で') => 798, + ('カ', '月') => 990, + ('会', '議') => 860, + ('入', 'り') => 1232, + ('大', '会') => 2217, + ('始', 'め') => 1681, + ('市', ' ') => 965, + ('新', '聞') => -5055, + ('日', ',') => 974, + ('日', '、') => 974, + ('社', '会') => 2024, + ('カ', '月') => 990, + _ => 0, + }; - if 0 < score { - result.push(word.clone()); - word.clear(); + p.remove(0); + p.push(if score < 0 { 'O' } else { 'B' }); + if 0 < score { + result.push(word.clone()); + word.clear(); + } + word.push(segments[index]); } - word.push(segments[index]); - } - result.push(word.clone()); - result + result.push(word.clone()); + result } diff --git a/test/test.rs b/test/test.rs index 33381de..8b17792 100644 --- a/test/test.rs +++ b/test/test.rs @@ -2,13 +2,28 @@ extern crate tinysegmenter; #[test] fn tokenize() { - assert_eq!( - tinysegmenter::tokenize("私の名前は中野です"), - ["私", "の", "名前", "は", "中野", "です"]); + assert_eq!( + tinysegmenter::tokenize("私の名前は中野です"), + ["私", "の", "名前", "は", "中野", "です"] + ); - assert_eq!( - tinysegmenter::tokenize("TinySegmenterは25kBで書かれています。"), - ["TinySegmenter", "は", "2", "5", "kB", "で", "書か", "れ", "て", "い", "ます", "。"]); + assert_eq!( + tinysegmenter::tokenize("TinySegmenterは25kBで書かれています。"), + [ + "TinySegmenter", + "は", + "2", + "5", + "kB", + "で", + "書か", + "れ", + "て", + "い", + "ます", + "。" + ] + ); - assert_eq!(tinysegmenter::tokenize(""), [] as [&str; 0]); + assert_eq!(tinysegmenter::tokenize(""), [] as [&str; 0]); } From 628ef4e75eaec6fbce2626627ddb450c27ef8412 Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Thu, 16 Aug 2018 18:05:17 +0900 Subject: [PATCH 5/6] switch char to enum --- src/lib.rs | 565 +++++++++++++++++++++++++++-------------------------- 1 file changed, 292 insertions(+), 273 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 1520672..6ad8f64 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,6 +4,7 @@ use std::char; const BIAS: i32 = -332; +// B1-B3 and E1-E3 are begin and end markers, they are set to invalid chracters so no character collision can occur lazy_static! { static ref B1: char = unsafe { char::from_u32_unchecked(0x110001) }; static ref B2: char = unsafe { char::from_u32_unchecked(0x110002) }; @@ -13,21 +14,39 @@ lazy_static! { static ref E3: char = unsafe { char::from_u32_unchecked(0x110006) }; } -fn get_ctype(c: char) -> char { +fn get_ctype(c: char) -> Ctype { match c as u32 { 0x4E00 | 0x4E8C | 0x4E09 | 0x56DB | 0x4E94 | 0x516D | 0x4E03 | 0x516B | 0x4E5D | 0x5341 => { - 'M' + Ctype::M } - 0x767E | 0x5343 | 0x4E07 | 0x5104 | 0x5146 => 'M', - 0x4E00...0x9FA0 | 0x3005 | 0x3006 | 0x30F5 | 0x30F6 => 'H', - 0x3041...0x3093 => 'I', - 0x30A1...0x30F4 | 0x30FC | 0xFF71...0xFF9D | 0xFF9E | 0xFF70 => 'K', - 0x61...0x7A | 0x41...0x5A | 0xFF41...0xFF5A | 0xFF21...0xFF3A => 'A', - 0x30...0x3a | 0xFF10...0xFF19 => 'N', - _ => 'O', + 0x767E | 0x5343 | 0x4E07 | 0x5104 | 0x5146 => Ctype::M, + 0x4E00...0x9FA0 | 0x3005 | 0x3006 | 0x30F5 | 0x30F6 => Ctype::H, + 0x3041...0x3093 => Ctype::I, + 0x30A1...0x30F4 | 0x30FC | 0xFF71...0xFF9D | 0xFF9E | 0xFF70 => Ctype::K, + 0x61...0x7A | 0x41...0x5A | 0xFF41...0xFF5A | 0xFF21...0xFF3A => Ctype::A, + 0x30...0x3a | 0xFF10...0xFF19 => Ctype::N, + _ => Ctype::O, } } +#[derive(Debug, Clone, Copy)] +enum Marker { + U, + O, + B, +} + +#[derive(Debug, Clone, Copy)] +enum Ctype { + M, + H, + I, + K, + A, + N, + O +} + pub fn tokenize(s: &str) -> Vec { if s.is_empty() { return Vec::new(); @@ -41,14 +60,14 @@ pub fn tokenize(s: &str) -> Vec { .chain(vec![*E1, *E2, *E3].into_iter()) .collect::>(); - let ctypes = vec!['O'; 3] + let ctypes = vec![Ctype::O; 3] .into_iter() .chain(s.chars().map(get_ctype)) - .chain(vec!['O'; 3].into_iter()) + .chain(vec![Ctype::O; 3].into_iter()) .collect::>(); let mut word = segments[3].to_string(); - let mut p = vec!['U'; 3]; + let mut p = vec![Marker::U; 3]; for index in 4..segments.len() - 3 { let mut score = BIAS; @@ -56,144 +75,144 @@ pub fn tokenize(s: &str) -> Vec { let c = &ctypes[index - 3..index + 3]; score += match &(c[0], c[1], c[2]) { - ('A', 'A', 'A') => 1093, - ('H', 'H', 'H') => 1029, - ('H', 'H', 'M') => 580, - ('H', 'I', 'I') => 998, - ('H', 'O', 'H') => -390, - ('H', 'O', 'M') => -331, - ('I', 'H', 'I') => 1169, - ('I', 'O', 'H') => -142, - ('I', 'O', 'I') => -1015, - ('I', 'O', 'M') => 467, - ('M', 'M', 'H') => 187, - ('O', 'O', 'I') => -1832, + (Ctype::A, Ctype::A, Ctype::A) => 1093, + (Ctype::H, Ctype::H, Ctype::H) => 1029, + (Ctype::H, Ctype::H, Ctype::M) => 580, + (Ctype::H, Ctype::I, Ctype::I) => 998, + (Ctype::H, Ctype::O, Ctype::H) => -390, + (Ctype::H, Ctype::O, Ctype::M) => -331, + (Ctype::I, Ctype::H, Ctype::I) => 1169, + (Ctype::I, Ctype::O, Ctype::H) => -142, + (Ctype::I, Ctype::O, Ctype::I) => -1015, + (Ctype::I, Ctype::O, Ctype::M) => 467, + (Ctype::M, Ctype::M, Ctype::H) => 187, + (Ctype::O, Ctype::O, Ctype::I) => -1832, _ => 0, }; score += match &(c[1], c[2], c[3]) { - ('H', 'H', 'O') => 2088, - ('H', 'I', 'I') => -1023, - ('H', 'M', 'M') => -1154, - ('I', 'H', 'I') => -1965, - ('K', 'K', 'H') => 703, - ('O', 'I', 'I') => -2649, + (Ctype::H, Ctype::H, Ctype::O) => 2088, + (Ctype::H, Ctype::I, Ctype::I) => -1023, + (Ctype::H, Ctype::M, Ctype::M) => -1154, + (Ctype::I, Ctype::H, Ctype::I) => -1965, + (Ctype::K, Ctype::K, Ctype::H) => 703, + (Ctype::O, Ctype::I, Ctype::I) => -2649, _ => 0, }; score += match &(c[2], c[3], c[4]) { - ('A', 'A', 'A') => -294, - ('H', 'H', 'H') => 346, - ('H', 'H', 'I') => -341, - ('H', 'I', 'I') => -1088, - ('H', 'I', 'K') => 731, - ('H', 'O', 'H') => -1486, - ('I', 'H', 'H') => 128, - ('I', 'H', 'I') => -3041, - ('I', 'H', 'O') => -1935, - ('I', 'I', 'H') => -825, - ('I', 'I', 'M') => -1035, - ('I', 'O', 'I') => -542, - ('K', 'H', 'H') => -1216, - ('K', 'K', 'A') => 491, - ('K', 'K', 'H') => -1217, - ('K', 'O', 'K') => -1009, - ('M', 'H', 'H') => -2694, - ('M', 'H', 'M') => -457, - ('M', 'H', 'O') => 123, - ('M', 'M', 'H') => -471, - ('N', 'N', 'H') => -1689, - ('N', 'N', 'O') => 662, - ('O', 'H', 'O') => -3393, + (Ctype::A, Ctype::A, Ctype::A) => -294, + (Ctype::H, Ctype::H, Ctype::H) => 346, + (Ctype::H, Ctype::H, Ctype::I) => -341, + (Ctype::H, Ctype::I, Ctype::I) => -1088, + (Ctype::H, Ctype::I, Ctype::K) => 731, + (Ctype::H, Ctype::O, Ctype::H) => -1486, + (Ctype::I, Ctype::H, Ctype::H) => 128, + (Ctype::I, Ctype::H, Ctype::I) => -3041, + (Ctype::I, Ctype::H, Ctype::O) => -1935, + (Ctype::I, Ctype::I, Ctype::H) => -825, + (Ctype::I, Ctype::I, Ctype::M) => -1035, + (Ctype::I, Ctype::O, Ctype::I) => -542, + (Ctype::K, Ctype::H, Ctype::H) => -1216, + (Ctype::K, Ctype::K, Ctype::A) => 491, + (Ctype::K, Ctype::K, Ctype::H) => -1217, + (Ctype::K, Ctype::O, Ctype::K) => -1009, + (Ctype::M, Ctype::H, Ctype::H) => -2694, + (Ctype::M, Ctype::H, Ctype::M) => -457, + (Ctype::M, Ctype::H, Ctype::O) => 123, + (Ctype::M, Ctype::M, Ctype::H) => -471, + (Ctype::N, Ctype::N, Ctype::H) => -1689, + (Ctype::N, Ctype::N, Ctype::O) => 662, + (Ctype::O, Ctype::H, Ctype::O) => -3393, _ => 0, }; score += match &(c[3], c[4], c[5]) { - ('H', 'H', 'H') => -203, - ('H', 'H', 'I') => 1344, - ('H', 'H', 'K') => 365, - ('H', 'H', 'M') => -122, - ('H', 'H', 'N') => 182, - ('H', 'H', 'O') => 669, - ('H', 'I', 'H') => 804, - ('H', 'I', 'I') => 679, - ('H', 'O', 'H') => 446, - ('I', 'H', 'H') => 695, - ('I', 'H', 'O') => -2324, - ('I', 'I', 'H') => 321, - ('I', 'I', 'I') => 1497, - ('I', 'I', 'O') => 656, - ('I', 'O', 'O') => 54, - ('K', 'A', 'K') => 4845, - ('K', 'K', 'A') => 3386, - ('K', 'K', 'K') => 3065, - ('M', 'H', 'H') => -405, - ('M', 'H', 'I') => 201, - ('M', 'M', 'H') => -241, - ('M', 'M', 'M') => 661, - ('M', 'O', 'M') => 841, + (Ctype::H, Ctype::H, Ctype::H) => -203, + (Ctype::H, Ctype::H, Ctype::I) => 1344, + (Ctype::H, Ctype::H, Ctype::K) => 365, + (Ctype::H, Ctype::H, Ctype::M) => -122, + (Ctype::H, Ctype::H, Ctype::N) => 182, + (Ctype::H, Ctype::H, Ctype::O) => 669, + (Ctype::H, Ctype::I, Ctype::H) => 804, + (Ctype::H, Ctype::I, Ctype::I) => 679, + (Ctype::H, Ctype::O, Ctype::H) => 446, + (Ctype::I, Ctype::H, Ctype::H) => 695, + (Ctype::I, Ctype::H, Ctype::O) => -2324, + (Ctype::I, Ctype::I, Ctype::H) => 321, + (Ctype::I, Ctype::I, Ctype::I) => 1497, + (Ctype::I, Ctype::I, Ctype::O) => 656, + (Ctype::I, Ctype::O, Ctype::O) => 54, + (Ctype::K, Ctype::A, Ctype::K) => 4845, + (Ctype::K, Ctype::K, Ctype::A) => 3386, + (Ctype::K, Ctype::K, Ctype::K) => 3065, + (Ctype::M, Ctype::H, Ctype::H) => -405, + (Ctype::M, Ctype::H, Ctype::I) => 201, + (Ctype::M, Ctype::M, Ctype::H) => -241, + (Ctype::M, Ctype::M, Ctype::M) => 661, + (Ctype::M, Ctype::O, Ctype::M) => 841, _ => 0, }; score += match &(p[1], c[0], c[1], c[2]) { - ('B', 'H', 'H', 'H') => -227, - ('B', 'H', 'H', 'I') => 316, - ('B', 'H', 'I', 'H') => -132, - ('B', 'I', 'H', 'H') => 60, - ('B', 'I', 'I', 'I') => 1595, - ('B', 'N', 'H', 'H') => -744, - ('B', 'O', 'H', 'H') => 225, - ('B', 'O', 'O', 'O') => -908, - ('O', 'A', 'K', 'K') => 482, - ('O', 'H', 'H', 'H') => 281, - ('O', 'H', 'I', 'H') => 249, - ('O', 'I', 'H', 'I') => 200, - ('O', 'I', 'I', 'H') => -68, + (Marker::B, Ctype::H, Ctype::H, Ctype::H) => -227, + (Marker::B, Ctype::H, Ctype::H, Ctype::I) => 316, + (Marker::B, Ctype::H, Ctype::I, Ctype::H) => -132, + (Marker::B, Ctype::I, Ctype::H, Ctype::H) => 60, + (Marker::B, Ctype::I, Ctype::I, Ctype::I) => 1595, + (Marker::B, Ctype::N, Ctype::H, Ctype::H) => -744, + (Marker::B, Ctype::O, Ctype::H, Ctype::H) => 225, + (Marker::B, Ctype::O, Ctype::O, Ctype::O) => -908, + (Marker::O, Ctype::A, Ctype::K, Ctype::K) => 482, + (Marker::O, Ctype::H, Ctype::H, Ctype::H) => 281, + (Marker::O, Ctype::H, Ctype::I, Ctype::H) => 249, + (Marker::O, Ctype::I, Ctype::H, Ctype::I) => 200, + (Marker::O, Ctype::I, Ctype::I, Ctype::H) => -68, _ => 0, }; score += match &(p[1], c[1], c[2], c[3]) { - ('B', 'I', 'H', 'H') => -1401, - ('B', 'I', 'I', 'I') => -1033, - ('B', 'K', 'A', 'K') => -543, - ('B', 'O', 'O', 'O') => -5591, + (Marker::B, Ctype::I, Ctype::H, Ctype::H) => -1401, + (Marker::B, Ctype::I, Ctype::I, Ctype::I) => -1033, + (Marker::B, Ctype::K, Ctype::A, Ctype::K) => -543, + (Marker::B, Ctype::O, Ctype::O, Ctype::O) => -5591, _ => 0, }; score += match &(p[2], c[0], c[1], c[2]) { - ('B', 'H', 'H', 'H') => 478, - ('B', 'H', 'H', 'M') => -1073, - ('B', 'H', 'I', 'H') => 222, - ('B', 'H', 'I', 'I') => -504, - ('B', 'I', 'I', 'H') => -116, - ('B', 'I', 'I', 'I') => -105, - ('B', 'M', 'H', 'I') => -863, - ('B', 'M', 'H', 'M') => -464, - ('B', 'O', 'M', 'H') => 620, - ('O', 'H', 'H', 'H') => 346, - ('O', 'H', 'H', 'I') => 1729, - ('O', 'H', 'I', 'I') => 997, - ('O', 'H', 'M', 'H') => 481, - ('O', 'I', 'H', 'H') => 623, - ('O', 'I', 'I', 'H') => 1344, - ('O', 'K', 'A', 'K') => 2792, - ('O', 'K', 'H', 'H') => 587, - ('O', 'K', 'K', 'A') => 679, - ('O', 'O', 'H', 'H') => 110, - ('O', 'O', 'I', 'I') => -685, + (Marker::B, Ctype::H, Ctype::H, Ctype::H) => 478, + (Marker::B, Ctype::H, Ctype::H, Ctype::M) => -1073, + (Marker::B, Ctype::H, Ctype::I, Ctype::H) => 222, + (Marker::B, Ctype::H, Ctype::I, Ctype::I) => -504, + (Marker::B, Ctype::I, Ctype::I, Ctype::H) => -116, + (Marker::B, Ctype::I, Ctype::I, Ctype::I) => -105, + (Marker::B, Ctype::M, Ctype::H, Ctype::I) => -863, + (Marker::B, Ctype::M, Ctype::H, Ctype::M) => -464, + (Marker::B, Ctype::O, Ctype::M, Ctype::H) => 620, + (Marker::O, Ctype::H, Ctype::H, Ctype::H) => 346, + (Marker::O, Ctype::H, Ctype::H, Ctype::I) => 1729, + (Marker::O, Ctype::H, Ctype::I, Ctype::I) => 997, + (Marker::O, Ctype::H, Ctype::M, Ctype::H) => 481, + (Marker::O, Ctype::I, Ctype::H, Ctype::H) => 623, + (Marker::O, Ctype::I, Ctype::I, Ctype::H) => 1344, + (Marker::O, Ctype::K, Ctype::A, Ctype::K) => 2792, + (Marker::O, Ctype::K, Ctype::H, Ctype::H) => 587, + (Marker::O, Ctype::K, Ctype::K, Ctype::A) => 679, + (Marker::O, Ctype::O, Ctype::H, Ctype::H) => 110, + (Marker::O, Ctype::O, Ctype::I, Ctype::I) => -685, _ => 0, }; score += match &(p[2], c[1], c[2], c[3]) { - ('B', 'H', 'H', 'H') => -721, - ('B', 'H', 'H', 'M') => -3604, - ('B', 'H', 'I', 'I') => -966, - ('B', 'I', 'I', 'H') => -607, - ('B', 'I', 'I', 'I') => -2181, - ('O', 'A', 'A', 'A') => -2763, - ('O', 'A', 'K', 'K') => 180, - ('O', 'H', 'H', 'H') => -294, - ('O', 'H', 'H', 'I') => 2446, - ('O', 'H', 'H', 'O') => 480, - ('O', 'H', 'I', 'H') => -1573, - ('O', 'I', 'H', 'H') => 1935, - ('O', 'I', 'H', 'I') => -493, - ('O', 'I', 'I', 'H') => 626, - ('O', 'I', 'I', 'I') => -4007, - ('O', 'K', 'A', 'K') => -8156, + (Marker::B, Ctype::H, Ctype::H, Ctype::H) => -721, + (Marker::B, Ctype::H, Ctype::H, Ctype::M) => -3604, + (Marker::B, Ctype::H, Ctype::I, Ctype::I) => -966, + (Marker::B, Ctype::I, Ctype::I, Ctype::H) => -607, + (Marker::B, Ctype::I, Ctype::I, Ctype::I) => -2181, + (Marker::O, Ctype::A, Ctype::A, Ctype::A) => -2763, + (Marker::O, Ctype::A, Ctype::K, Ctype::K) => 180, + (Marker::O, Ctype::H, Ctype::H, Ctype::H) => -294, + (Marker::O, Ctype::H, Ctype::H, Ctype::I) => 2446, + (Marker::O, Ctype::H, Ctype::H, Ctype::O) => 480, + (Marker::O, Ctype::H, Ctype::I, Ctype::H) => -1573, + (Marker::O, Ctype::I, Ctype::H, Ctype::H) => 1935, + (Marker::O, Ctype::I, Ctype::H, Ctype::I) => -493, + (Marker::O, Ctype::I, Ctype::I, Ctype::H) => 626, + (Marker::O, Ctype::I, Ctype::I, Ctype::I) => -4007, + (Marker::O, Ctype::K, Ctype::A, Ctype::K) => -8156, _ => 0, }; score += match &(w[0], w[1], w[2]) { @@ -251,94 +270,94 @@ pub fn tokenize(s: &str) -> Vec { }; score += match &c[0] { - 'A' => 484, - 'K' => 93, - 'M' => 645, - 'O' => -505, + Ctype::A => 484, + Ctype::K => 93, + Ctype::M => 645, + Ctype::O => -505, _ => 0, }; score += match &c[1] { - 'A' => 819, - 'H' => 1059, - 'I' => 409, - 'M' => 3987, - 'N' => 5775, - 'O' => 646, + Ctype::A => 819, + Ctype::H => 1059, + Ctype::I => 409, + Ctype::M => 3987, + Ctype::N => 5775, + Ctype::O => 646, _ => 0, }; score += match &c[2] { - 'A' => -1370, - 'I' => 2311, + Ctype::A => -1370, + Ctype::I => 2311, _ => 0, }; score += match &c[3] { - 'A' => -2643, - 'H' => 1809, - 'I' => -1032, - 'K' => -3450, - 'M' => 3565, - 'N' => 3876, - 'O' => 6646, + Ctype::A => -2643, + Ctype::H => 1809, + Ctype::I => -1032, + Ctype::K => -3450, + Ctype::M => 3565, + Ctype::N => 3876, + Ctype::O => 6646, _ => 0, }; score += match &c[4] { - 'H' => 313, - 'I' => -1238, - 'K' => -799, - 'M' => 539, - 'O' => -831, + Ctype::H => 313, + Ctype::I => -1238, + Ctype::K => -799, + Ctype::M => 539, + Ctype::O => -831, _ => 0, }; score += match &c[5] { - 'H' => -506, - 'I' => -253, - 'K' => 87, - 'M' => 247, - 'O' => -387, + Ctype::H => -506, + Ctype::I => -253, + Ctype::K => 87, + Ctype::M => 247, + Ctype::O => -387, _ => 0, }; score += match &p[0] { - 'O' => -214, + Marker::O => -214, _ => 0, }; score += match &p[1] { - 'B' => 69, - 'O' => 935, + Marker::B => 69, + Marker::O => 935, _ => 0, }; score += match &p[2] { - 'B' => 189, + Marker::B => 189, _ => 0, }; score += match &(p[0], c[0]) { - ('B', 'H') => 21, - ('B', 'I') => -12, - ('B', 'K') => -99, - ('B', 'N') => 142, - ('B', 'O') => -56, - ('O', 'H') => -95, - ('O', 'I') => 477, - ('O', 'K') => 410, - ('O', 'O') => -2422, + (Marker::B, Ctype::H) => 21, + (Marker::B, Ctype::I) => -12, + (Marker::B, Ctype::K) => -99, + (Marker::B, Ctype::N) => 142, + (Marker::B, Ctype::O) => -56, + (Marker::O, Ctype::H) => -95, + (Marker::O, Ctype::I) => 477, + (Marker::O, Ctype::K) => 410, + (Marker::O, Ctype::O) => -2422, _ => 0, }; score += match &(p[1], c[1]) { - ('B', 'H') => 216, - ('B', 'I') => 113, - ('O', 'K') => 1759, + (Marker::B, Ctype::H) => 216, + (Marker::B, Ctype::I) => 113, + (Marker::O, Ctype::K) => 1759, _ => 0, }; score += match &(p[2], c[2]) { - ('B', 'A') => -479, - ('B', 'H') => 42, - ('B', 'I') => 1913, - ('B', 'K') => -7198, - ('B', 'M') => 3160, - ('B', 'N') => 6427, - ('B', 'O') => 14761, - ('O', 'I') => -827, - ('O', 'N') => -3212, + (Marker::B, Ctype::A) => -479, + (Marker::B, Ctype::H) => 42, + (Marker::B, Ctype::I) => 1913, + (Marker::B, Ctype::K) => -7198, + (Marker::B, Ctype::M) => 3160, + (Marker::B, Ctype::N) => 6427, + (Marker::B, Ctype::O) => 14761, + (Marker::O, Ctype::I) => -827, + (Marker::O, Ctype::N) => -3212, _ => 0, }; score += match &w[0] { @@ -1096,119 +1115,119 @@ pub fn tokenize(s: &str) -> Vec { } score += match &(c[1], c[2]) { - ('H', 'H') => 6, - ('I', 'I') => 2461, - ('K', 'H') => 406, - ('O', 'H') => -1378, + (Ctype::H, Ctype::H) => 6, + (Ctype::I, Ctype::I) => 2461, + (Ctype::K, Ctype::H) => 406, + (Ctype::O, Ctype::H) => -1378, _ => 0, }; score += match &(c[2], c[3]) { - ('A', 'A') => -3267, - ('A', 'I') => 2744, - ('A', 'N') => -878, - ('H', 'H') => -4070, - ('H', 'M') => -1711, - ('H', 'N') => 4012, - ('H', 'O') => 3761, - ('I', 'A') => 1327, - ('I', 'H') => -1184, - ('I', 'I') => -1332, - ('I', 'K') => 1721, - ('I', 'O') => 5492, - ('K', 'I') => 3831, - ('K', 'K') => -8741, - ('M', 'H') => -3132, - ('M', 'K') => 3334, - ('O', 'O') => -2920, + (Ctype::A, Ctype::A) => -3267, + (Ctype::A, Ctype::I) => 2744, + (Ctype::A, Ctype::N) => -878, + (Ctype::H, Ctype::H) => -4070, + (Ctype::H, Ctype::M) => -1711, + (Ctype::H, Ctype::N) => 4012, + (Ctype::H, Ctype::O) => 3761, + (Ctype::I, Ctype::A) => 1327, + (Ctype::I, Ctype::H) => -1184, + (Ctype::I, Ctype::I) => -1332, + (Ctype::I, Ctype::K) => 1721, + (Ctype::I, Ctype::O) => 5492, + (Ctype::K, Ctype::I) => 3831, + (Ctype::K, Ctype::K) => -8741, + (Ctype::M, Ctype::H) => -3132, + (Ctype::M, Ctype::K) => 3334, + (Ctype::O, Ctype::O) => -2920, _ => 0, }; score += match &(c[3], c[4]) { - ('H', 'H') => 996, - ('H', 'I') => 626, - ('H', 'K') => -721, - ('H', 'N') => -1307, - ('H', 'O') => -836, - ('I', 'H') => -301, - ('K', 'K') => 2762, - ('M', 'K') => 1079, - ('M', 'M') => 4034, - ('O', 'A') => -1652, - ('O', 'H') => 266, + (Ctype::H, Ctype::H) => 996, + (Ctype::H, Ctype::I) => 626, + (Ctype::H, Ctype::K) => -721, + (Ctype::H, Ctype::N) => -1307, + (Ctype::H, Ctype::O) => -836, + (Ctype::I, Ctype::H) => -301, + (Ctype::K, Ctype::K) => 2762, + (Ctype::M, Ctype::K) => 1079, + (Ctype::M, Ctype::M) => 4034, + (Ctype::O, Ctype::A) => -1652, + (Ctype::O, Ctype::H) => 266, _ => 0, }; score += match &(p[0], p[1]) { - ('B', 'B') => 295, - ('O', 'B') => 304, - ('O', 'O') => -125, - ('U', 'B') => 352, + (Marker::B, Marker::B) => 295, + (Marker::O, Marker::B) => 304, + (Marker::O, Marker::O) => -125, + (Marker::U, Marker::B) => 352, _ => 0, }; score += match &(p[1], p[2]) { - ('B', 'O') => 60, - ('O', 'O') => -1762, + (Marker::B, Marker::O) => 60, + (Marker::O, Marker::O) => -1762, _ => 0, }; score += match &(p[1], c[1], c[2]) { - ('B', 'H', 'H') => 1150, - ('B', 'H', 'M') => 1521, - ('B', 'I', 'I') => -1158, - ('B', 'I', 'M') => 886, - ('B', 'M', 'H') => 1208, - ('B', 'N', 'H') => 449, - ('B', 'O', 'H') => -91, - ('B', 'O', 'O') => -2597, - ('O', 'H', 'I') => 451, - ('O', 'I', 'H') => -296, - ('O', 'K', 'A') => 1851, - ('O', 'K', 'H') => -1020, - ('O', 'K', 'K') => 904, - ('O', 'O', 'O') => 2965, + (Marker::B, Ctype::H, Ctype::H) => 1150, + (Marker::B, Ctype::H, Ctype::M) => 1521, + (Marker::B, Ctype::I, Ctype::I) => -1158, + (Marker::B, Ctype::I, Ctype::M) => 886, + (Marker::B, Ctype::M, Ctype::H) => 1208, + (Marker::B, Ctype::N, Ctype::H) => 449, + (Marker::B, Ctype::O, Ctype::H) => -91, + (Marker::B, Ctype::O, Ctype::O) => -2597, + (Marker::O, Ctype::H, Ctype::I) => 451, + (Marker::O, Ctype::I, Ctype::H) => -296, + (Marker::O, Ctype::K, Ctype::A) => 1851, + (Marker::O, Ctype::K, Ctype::H) => -1020, + (Marker::O, Ctype::K, Ctype::K) => 904, + (Marker::O, Ctype::O, Ctype::O) => 2965, _ => 0, }; score += match &(p[1], c[2], c[3]) { - ('B', 'H', 'H') => 118, - ('B', 'H', 'I') => -1159, - ('B', 'H', 'M') => 466, - ('B', 'I', 'H') => -919, - ('B', 'K', 'K') => -1720, - ('B', 'K', 'O') => 864, - ('O', 'H', 'H') => -1139, - ('O', 'H', 'M') => -181, - ('O', 'I', 'H') => 153, - ('U', 'H', 'I') => -1146, + (Marker::B, Ctype::H, Ctype::H) => 118, + (Marker::B, Ctype::H, Ctype::I) => -1159, + (Marker::B, Ctype::H, Ctype::M) => 466, + (Marker::B, Ctype::I, Ctype::H) => -919, + (Marker::B, Ctype::K, Ctype::K) => -1720, + (Marker::B, Ctype::K, Ctype::O) => 864, + (Marker::O, Ctype::H, Ctype::H) => -1139, + (Marker::O, Ctype::H, Ctype::M) => -181, + (Marker::O, Ctype::I, Ctype::H) => 153, + (Marker::U, Ctype::H, Ctype::I) => -1146, _ => 0, }; score += match &(p[2], c[1], c[2]) { - ('B', 'H', 'H') => -792, - ('B', 'H', 'I') => 2664, - ('B', 'I', 'I') => -299, - ('B', 'K', 'I') => 419, - ('B', 'M', 'H') => 937, - ('B', 'M', 'M') => 8335, - ('B', 'N', 'N') => 998, - ('B', 'O', 'H') => 775, - ('O', 'H', 'H') => 2174, - ('O', 'H', 'M') => 439, - ('O', 'I', 'I') => 280, - ('O', 'K', 'H') => 1798, - ('O', 'K', 'I') => -793, - ('O', 'K', 'O') => -2242, - ('O', 'M', 'H') => -2402, - ('O', 'O', 'O') => 11699, + (Marker::B, Ctype::H, Ctype::H) => -792, + (Marker::B, Ctype::H, Ctype::I) => 2664, + (Marker::B, Ctype::I, Ctype::I) => -299, + (Marker::B, Ctype::K, Ctype::I) => 419, + (Marker::B, Ctype::M, Ctype::H) => 937, + (Marker::B, Ctype::M, Ctype::M) => 8335, + (Marker::B, Ctype::N, Ctype::N) => 998, + (Marker::B, Ctype::O, Ctype::H) => 775, + (Marker::O, Ctype::H, Ctype::H) => 2174, + (Marker::O, Ctype::H, Ctype::M) => 439, + (Marker::O, Ctype::I, Ctype::I) => 280, + (Marker::O, Ctype::K, Ctype::H) => 1798, + (Marker::O, Ctype::K, Ctype::I) => -793, + (Marker::O, Ctype::K, Ctype::O) => -2242, + (Marker::O, Ctype::M, Ctype::H) => -2402, + (Marker::O, Ctype::O, Ctype::O) => 11699, _ => 0, }; score += match &(p[2], c[2], c[3]) { - ('B', 'H', 'H') => -3895, - ('B', 'I', 'H') => 3761, - ('B', 'I', 'I') => -4654, - ('B', 'I', 'K') => 1348, - ('B', 'K', 'K') => -1806, - ('B', 'M', 'I') => -3385, - ('B', 'O', 'O') => -12396, - ('O', 'A', 'H') => 926, - ('O', 'H', 'H') => 266, - ('O', 'H', 'K') => -2036, - ('O', 'N', 'N') => -973, + (Marker::B, Ctype::H, Ctype::H) => -3895, + (Marker::B, Ctype::I, Ctype::H) => 3761, + (Marker::B, Ctype::I, Ctype::I) => -4654, + (Marker::B, Ctype::I, Ctype::K) => 1348, + (Marker::B, Ctype::K, Ctype::K) => -1806, + (Marker::B, Ctype::M, Ctype::I) => -3385, + (Marker::B, Ctype::O, Ctype::O) => -12396, + (Marker::O, Ctype::A, Ctype::H) => 926, + (Marker::O, Ctype::H, Ctype::H) => 266, + (Marker::O, Ctype::H, Ctype::K) => -2036, + (Marker::O, Ctype::N, Ctype::N) => -973, _ => 0, }; score += match &(w[1], w[2]) { @@ -1531,7 +1550,7 @@ pub fn tokenize(s: &str) -> Vec { }; p.remove(0); - p.push(if score < 0 { 'O' } else { 'B' }); + p.push(if score < 0 { Marker::O } else { Marker::B }); if 0 < score { result.push(word.clone()); From 518055ab9680d3d1c959860b9ab5778b3a5f174e Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Fri, 17 Aug 2018 12:24:02 +0900 Subject: [PATCH 6/6] fix invalid case --- src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 6ad8f64..b60501b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -298,7 +298,6 @@ pub fn tokenize(s: &str) -> Vec { Ctype::M => 3565, Ctype::N => 3876, Ctype::O => 6646, - _ => 0, }; score += match &c[4] { Ctype::H => 313,