Skip to content

Commit dc85298

Browse files
author
guoyongzhi
committed
split log norm
1 parent 7eb8fcd commit dc85298

File tree

1 file changed

+11
-2
lines changed

1 file changed

+11
-2
lines changed

src/detector.jl

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,17 @@ function load_profile(lang, ngramrange::AbstractRange, cutoff, vocabularyrange)
7979
end
8080

8181
function normalize_profile!(P)
82-
vs = sum(values(P))
83-
map!(v -> log(v / vs), values(P))
82+
sums = zeros(7)
83+
for (k, v) in P
84+
sums[length(k)] += v
85+
end
86+
weights = log1p.(sums)
87+
weights .*= sum(weights .!= 0) / sum(weights) # mean(weights) == 1
88+
scale = weights ./ sums
89+
logscale = log.(scale)
90+
for (k, v) in P
91+
P[k] = log(v) + logscale[length(k)]
92+
end
8493
P
8594
end
8695

0 commit comments

Comments
 (0)