Remove redundant whitespace check in TokenizerStateMachine

schen149 · schen149 · commit be60f8907e73 · 2018-08-17T06:15:23.000-04:00
diff --git a/tokenizer/src/main/java/edu/illinois/cs/cogcomp/nlp/tokenizer/TokenizerStateMachine.java b/tokenizer/src/main/java/edu/illinois/cs/cogcomp/nlp/tokenizer/TokenizerStateMachine.java
@@ -336,7 +336,7 @@ public void process(char token) {
                                     if (getCurrent().isAbbr())
                                         return; // previous was upper case, acronym and word
                                                 // continues
-                                    else if (Character.isWhitespace(c) && Character.isLowerCase(nextnextChar))
+                                    else if (Character.isLowerCase(nextnextChar))
                                         return; // when the next char is white space and the next next char
                                                 // is lowercase, we know that the next word is not start of
                                                 // a sentence, so we continue.