Skip to content

Commit ec9cce3

Browse files
authored
Merge pull request #1214 from stanfordnlp/fix_xml_invertible
More tests for invertible cleanxml. Make sure to include the before …
2 parents 07b2cf1 + 2bd850e commit ec9cce3

File tree

2 files changed

+14
-17
lines changed

2 files changed

+14
-17
lines changed

src/edu/stanford/nlp/pipeline/CleanXmlAnnotator.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -581,7 +581,8 @@ private List<CoreLabel> process(Annotation annotation, List<CoreLabel> tokens) {
581581
CoreLabel previous = newTokens.get(newTokens.size() - 2);
582582
// Note that the original AfterAnnotation is not needed.
583583
// The BeforeAnnotation of the removed tokens already covers that.
584-
previous.set(CoreAnnotations.AfterAnnotation.class, removedText.toString());
584+
// However, the `before` text of the next token needs to be included.
585+
previous.set(CoreAnnotations.AfterAnnotation.class, removedText + before);
585586
}
586587
removedText = new StringBuilder();
587588
}

test/src/edu/stanford/nlp/pipeline/CleanXmlAnnotatorTest.java

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -219,29 +219,25 @@ public void testEarlyEnd() {
219219
@Test
220220
public void testInvertible() {
221221
String testNoTags = "This sentence should be invertible.";
222-
String testTags =
223-
" <xml> This sentence should be invertible. </xml> ";
224-
String testManyTags =
225-
" <xml> <foo> <bar>This sentence should " +
226-
" </bar>be invertible. </foo> </xml> ";
222+
String[] testInvertibles = { " <xml> This sentence should be invertible. </xml> ",
223+
" <xml> <foo> <bar>This sentence should </bar>be invertible. </foo> </xml> ",
224+
" This sentence <xml>should</xml> be invertible. ",
225+
" This sentence<xml> should </xml>be invertible. ",
226+
" This sentence <xml> should </xml> be invertible. " };
227227

228228
Annotation annotation = annotate(testNoTags, ptbInvertible,
229229
cleanXmlAllTags, wtsSplitter);
230230
checkResult(annotation, testNoTags);
231231
checkBeforeInvert(annotation, testNoTags);
232232
checkAfterInvert(annotation, testNoTags);
233233

234-
annotation = annotate(testTags, ptbInvertible,
235-
cleanXmlAllTags, wtsSplitter);
236-
checkResult(annotation, testNoTags);
237-
checkBeforeInvert(annotation, testTags);
238-
checkAfterInvert(annotation, testTags);
239-
240-
annotation = annotate(testManyTags, ptbInvertible,
241-
cleanXmlAllTags, wtsSplitter);
242-
checkResult(annotation, testNoTags);
243-
checkBeforeInvert(annotation, testManyTags);
244-
checkAfterInvert(annotation, testManyTags);
234+
for (String test : testInvertibles) {
235+
annotation = annotate(test, ptbInvertible,
236+
cleanXmlAllTags, wtsSplitter);
237+
checkResult(annotation, testNoTags);
238+
checkBeforeInvert(annotation, test);
239+
checkAfterInvert(annotation, test);
240+
}
245241
}
246242

247243
@Test

0 commit comments

Comments
 (0)