Skip to content

Commit 2bd850e

Browse files
committed
More tests for invertible cleanxml. Make sure to include the before text of the next token in the after text of the previous token. Also addresses #1210
1 parent 07b2cf1 commit 2bd850e

File tree

2 files changed

+14
-17
lines changed

2 files changed

+14
-17
lines changed

src/edu/stanford/nlp/pipeline/CleanXmlAnnotator.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -581,7 +581,8 @@ private List<CoreLabel> process(Annotation annotation, List<CoreLabel> tokens) {
581581
CoreLabel previous = newTokens.get(newTokens.size() - 2);
582582
// Note that the original AfterAnnotation is not needed.
583583
// The BeforeAnnotation of the removed tokens already covers that.
584-
previous.set(CoreAnnotations.AfterAnnotation.class, removedText.toString());
584+
// However, the `before` text of the next token needs to be included.
585+
previous.set(CoreAnnotations.AfterAnnotation.class, removedText + before);
585586
}
586587
removedText = new StringBuilder();
587588
}

test/src/edu/stanford/nlp/pipeline/CleanXmlAnnotatorTest.java

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -219,29 +219,25 @@ public void testEarlyEnd() {
219219
@Test
220220
public void testInvertible() {
221221
String testNoTags = "This sentence should be invertible.";
222-
String testTags =
223-
" <xml> This sentence should be invertible. </xml> ";
224-
String testManyTags =
225-
" <xml> <foo> <bar>This sentence should " +
226-
" </bar>be invertible. </foo> </xml> ";
222+
String[] testInvertibles = { " <xml> This sentence should be invertible. </xml> ",
223+
" <xml> <foo> <bar>This sentence should </bar>be invertible. </foo> </xml> ",
224+
" This sentence <xml>should</xml> be invertible. ",
225+
" This sentence<xml> should </xml>be invertible. ",
226+
" This sentence <xml> should </xml> be invertible. " };
227227

228228
Annotation annotation = annotate(testNoTags, ptbInvertible,
229229
cleanXmlAllTags, wtsSplitter);
230230
checkResult(annotation, testNoTags);
231231
checkBeforeInvert(annotation, testNoTags);
232232
checkAfterInvert(annotation, testNoTags);
233233

234-
annotation = annotate(testTags, ptbInvertible,
235-
cleanXmlAllTags, wtsSplitter);
236-
checkResult(annotation, testNoTags);
237-
checkBeforeInvert(annotation, testTags);
238-
checkAfterInvert(annotation, testTags);
239-
240-
annotation = annotate(testManyTags, ptbInvertible,
241-
cleanXmlAllTags, wtsSplitter);
242-
checkResult(annotation, testNoTags);
243-
checkBeforeInvert(annotation, testManyTags);
244-
checkAfterInvert(annotation, testManyTags);
234+
for (String test : testInvertibles) {
235+
annotation = annotate(test, ptbInvertible,
236+
cleanXmlAllTags, wtsSplitter);
237+
checkResult(annotation, testNoTags);
238+
checkBeforeInvert(annotation, test);
239+
checkAfterInvert(annotation, test);
240+
}
245241
}
246242

247243
@Test

0 commit comments

Comments
 (0)