11module . exports = ( data , fileInfo ) => {
2- const charRegex = new RegExp ( / \d | \n | \s | \- | \. | \, | \: | \; | \? | \! | \< | \> | \[ | \] | \{ | \} | \& | \= | \| / , "g" ) ;
3- const totalCharacters = data . content . replace ( charRegex , "" ) . length ;
4- const langArr = data . languageArr ;
5- const pos = data . pos ;
6- const testFilePath = data . testFilePath ;
2+ const charRegex = new RegExp (
3+ / \d | \n | \s | \- | \. | \, | \: | \; | \? | \! | \< | \> | \[ | \] | \{ | \} | \& | \= | \| / ,
4+ "g"
5+ ) ;
6+ const totalCharacters = data . content . replace ( charRegex , "" ) . length ;
7+ const langArr = data . languageArr ;
8+ const pos = data . pos ;
9+ const testFile = data . testFile ;
710
8- const secondLanguage = langArr . reduce ( ( acc , val ) => {
9- if ( acc . name === fileInfo . language ) return val ;
10- if ( val . name === fileInfo . language ) return acc ;
11+ const secondLanguage = langArr . reduce ( ( acc , val ) => {
12+ if ( acc . name === fileInfo . language ) return val ;
13+ if ( val . name === fileInfo . language ) return acc ;
1114
12- return acc . count >= val . count ? acc : val ;
13- } ) ;
15+ return acc . count >= val . count ? acc : val ;
16+ } ) ;
1417
15- const languageRatio = langArr [ pos ] . count / ( secondLanguage . count + langArr [ pos ] . count ) ;
16- const characterWordRatio = langArr [ pos ] . count / totalCharacters ;
18+ const languageRatio =
19+ langArr [ pos ] . count / ( secondLanguage . count + langArr [ pos ] . count ) ;
20+ const characterWordRatio = langArr [ pos ] . count / totalCharacters ;
1721
18- let lowerLimit = null ;
19- let upperLimit = null ;
20- const multiplier = 0.8 ;
22+ let lowerLimit = null ;
23+ let upperLimit = null ;
24+ const multiplier = 0.8 ;
2125
22- if ( data . utf8 ) {
23- lowerLimit = langArr [ pos ] . utfFrequency ? langArr [ pos ] . utfFrequency . low * multiplier : null ;
24- upperLimit = langArr [ pos ] . utfFrequency ? ( langArr [ pos ] . utfFrequency . low + langArr [ pos ] . utfFrequency . high ) / 2 : null ;
26+ if ( fileInfo . encoding === "UTF-8" || fileInfo . encoding === "UTF-16LE" ) {
27+ lowerLimit = langArr [ pos ] . utfFrequency
28+ ? langArr [ pos ] . utfFrequency . low * multiplier
29+ : null ;
30+ upperLimit = langArr [ pos ] . utfFrequency
31+ ? ( langArr [ pos ] . utfFrequency . low + langArr [ pos ] . utfFrequency . high ) / 2
32+ : null ;
33+ } else {
34+ lowerLimit = langArr [ pos ] . isoFrequency
35+ ? langArr [ pos ] . isoFrequency . low * multiplier
36+ : null ;
37+ upperLimit = langArr [ pos ] . isoFrequency
38+ ? ( langArr [ pos ] . isoFrequency . low + langArr [ pos ] . isoFrequency . high ) / 2
39+ : null ;
40+ }
2541
26- } else {
27- lowerLimit = langArr [ pos ] . isoFrequency ? langArr [ pos ] . isoFrequency . low * multiplier : null ;
28- upperLimit = langArr [ pos ] . isoFrequency ? ( langArr [ pos ] . isoFrequency . low + langArr [ pos ] . isoFrequency . high ) / 2 : null ;
29- }
42+ let confidenceScore ;
3043
31- let confidenceScore ;
44+ if ( ! lowerLimit || ! upperLimit ) {
45+ confidenceScore = null ;
46+ } else if ( characterWordRatio >= upperLimit ) {
47+ confidenceScore = 1 ;
48+ } else if ( characterWordRatio > lowerLimit ) {
49+ const range = upperLimit - lowerLimit ;
50+ const surplus = characterWordRatio - lowerLimit ;
51+ const confidenceRaisePercentage = surplus / range ;
52+ const confidenceRaise = ( 1 - languageRatio ) * confidenceRaisePercentage ;
53+ confidenceScore = Number ( ( languageRatio + confidenceRaise ) . toFixed ( 2 ) ) ;
54+ } else {
55+ confidenceScore = Number (
56+ ( languageRatio * ( characterWordRatio / lowerLimit ) ) . toFixed ( 2 )
57+ ) ;
58+ }
3259
33- if ( ! lowerLimit || ! upperLimit ) {
34- confidenceScore = null ;
60+ // If the test script is running
61+ if ( testFile ) {
62+ return {
63+ name : testFile . substr ( testFile . lastIndexOf ( "/" ) + 1 ) ,
64+ path : testFile ,
65+ encoding : fileInfo . encoding ,
66+ language : fileInfo . language ,
67+ languageConfidence : confidenceScore ,
68+ ratio : Number ( languageRatio . toFixed ( 2 ) ) ,
69+ count : langArr [ pos ] . count ,
70+ totalCharacters : totalCharacters ,
71+ characterWordRatio : characterWordRatio . toFixed ( 6 ) ,
72+ secondLanguage : {
73+ name : secondLanguage . name ,
74+ count : secondLanguage . count ,
75+ } ,
76+ } ;
77+ }
3578
36- } else if ( characterWordRatio >= upperLimit ) {
37- confidenceScore = 1 ;
38-
39- } else if ( characterWordRatio > lowerLimit ) {
40- const range = upperLimit - lowerLimit ;
41- const surplus = characterWordRatio - lowerLimit ;
42- const confidenceRaisePercentage = surplus / range ;
43- const confidenceRaise = ( 1 - languageRatio ) * confidenceRaisePercentage ;
44- confidenceScore = Number ( ( languageRatio + confidenceRaise ) . toFixed ( 2 ) ) ;
45-
46- } else {
47- confidenceScore = Number ( ( languageRatio * ( characterWordRatio / lowerLimit ) ) . toFixed ( 2 ) ) ;
48- }
49-
50- // If the test script is running
51- if ( testFilePath ) {
52- return {
53- name : testFilePath . substr ( testFilePath . lastIndexOf ( '/' ) + 1 ) ,
54- path : testFilePath ,
55- language : fileInfo . language ,
56- utf8 : data . utf8 ,
57- confidence : confidenceScore ,
58- ratio : Number ( languageRatio . toFixed ( 2 ) ) ,
59- count : langArr [ pos ] . count ,
60- totalCharacters : totalCharacters ,
61- characterWordRatio : characterWordRatio . toFixed ( 6 ) ,
62- secondLanguage : {
63- name : secondLanguage . name ,
64- count : secondLanguage . count
65- }
66- } ;
67- }
68-
69- return confidenceScore ;
70- } ;
79+ return confidenceScore ;
80+ } ;
0 commit comments