@@ -33,16 +33,23 @@ public static Optional<LdLocale> getRecognisedLanguage(String text) throws IOExc
3333 return languageDetector .detect (textObject );
3434 }
3535
36+ public static Optional <LdLocale > getRecognisedLanguage (WebDriver driver ) throws IOException {
37+ List <LanguageProfile > languageProfiles = new LanguageProfileReader ().readAllBuiltIn ();
38+
39+ LanguageDetector languageDetector = LanguageDetectorBuilder .create (NgramExtractors .standard ())
40+ .withProfiles (languageProfiles )
41+ .build ();
42+
43+ TextObjectFactory textObjectFactory = CommonTextObjectFactories .forDetectingOnLargeText ();
44+
45+ TextObject textObject = textObjectFactory .forText (getTextFromPage (driver ));
46+
47+ return languageDetector .detect (textObject );
48+ }
49+
3650 public static boolean isCorrectLanguageOnThePage (WebDriver driver , String lang ) throws IOException {
3751 boolean isCorrectLang = true ;
38- JavascriptExecutor jse = (JavascriptExecutor ) driver ;
39- String bodyText = jse .executeScript ("return document.body.innerHTML" , "" ).toString ();
40- bodyText = bodyText .replaceAll ("<script\\ b[^<]*(?:(?!<\\ /script>)<[^<]*)*<\\ /script>" , " " );
41- bodyText = bodyText .replaceAll ("<noscript\\ b[^<]*(?:(?!<\\ /noscript>)<[^<]*)*<\\ /noscript>" , " " );
42- bodyText = bodyText .replaceAll ("<style\\ b[^<]*(?:(?!<\\ /style>)<[^<]*)*<\\ /style>" , " " );
43- bodyText = bodyText .replaceAll ("<pre\\ b[^<]*(?:(?!<\\ /pre>)<[^<]*)*<\\ /pre>" , " " );
44- bodyText = bodyText .replaceAll ("<[^>]*>" , " " );
45- bodyText = bodyText .toLowerCase ().replaceAll ("[\\ t|\\ n|\\ r|\\ s]+" , " " ).replaceAll ("[\\ s]+" , " " );
52+ String bodyText = getTextFromPage (driver );
4653
4754 int textBlockLength = 300 ;
4855 int bodyTextLength = bodyText .length ();
@@ -53,7 +60,7 @@ public static boolean isCorrectLanguageOnThePage(WebDriver driver, String lang)
5360 } else {
5461 for (int i = 0 ; i < bodyTextLength ; i += textBlockLength ) {
5562 String tempString ;
56- if (bodyTextLength >= (i + textBlockLength ) ) {
63+ if (bodyTextLength >= (i + textBlockLength )) {
5764 tempString = bodyText .substring (i , i + textBlockLength );
5865 try {
5966 String detectedLanguage = getRecognisedLanguage (tempString ).get ().getLanguage ();
@@ -75,4 +82,17 @@ public static boolean isCorrectLanguageOnThePage(WebDriver driver, String lang)
7582 }
7683 return isCorrectLang ;
7784 }
85+
86+ private static String getTextFromPage (WebDriver driver ) {
87+ JavascriptExecutor jse = (JavascriptExecutor ) driver ;
88+ String bodyText = jse .executeScript ("return document.body.innerHTML" , "" ).toString ();
89+ bodyText = bodyText .replaceAll ("<script\\ b[^<]*(?:(?!</script>)<[^<]*)*</script>" , " " )
90+ .replaceAll ("<noscript\\ b[^<]*(?:(?!</noscript>)<[^<]*)*</noscript>" , " " )
91+ .replaceAll ("<style\\ b[^<]*(?:(?!</style>)<[^<]*)*</style>" , " " )
92+ .replaceAll ("<pre\\ b[^<]*(?:(?!</pre>)<[^<]*)*</pre>" , " " )
93+ .replaceAll ("<[^>]*>" , " " ).toLowerCase ()
94+ .replaceAll ("[\\ t|\\ n|\\ r|\\ s]+" , " " ).replaceAll ("[\\ s]+" , " " );
95+
96+ return bodyText ;
97+ }
7898}
0 commit comments