Skip to content

Commit 1a898b6

Browse files
committed
Fixes the accumulation of AfterAnnotation text, as reported in #1210
1 parent 3b5f34b commit 1a898b6

File tree

2 files changed

+25
-11
lines changed

2 files changed

+25
-11
lines changed

src/edu/stanford/nlp/pipeline/CleanXmlAnnotator.java

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -579,12 +579,9 @@ private List<CoreLabel> process(Annotation annotation, List<CoreLabel> tokens) {
579579
}
580580
if (added && newTokens.size() > 1) {
581581
CoreLabel previous = newTokens.get(newTokens.size() - 2);
582-
String after = previous.get(CoreAnnotations.AfterAnnotation.class);
583-
if (after != null) {
584-
previous.set(CoreAnnotations.AfterAnnotation.class, after + removedText);
585-
} else {
586-
previous.set(CoreAnnotations.AfterAnnotation.class, removedText.toString());
587-
}
582+
// Note that the original AfterAnnotation is not needed.
583+
// The BeforeAnnotation of the removed tokens already covers that.
584+
previous.set(CoreAnnotations.AfterAnnotation.class, removedText.toString());
588585
}
589586
removedText = new StringBuilder();
590587
}
@@ -650,6 +647,7 @@ private List<CoreLabel> process(Annotation annotation, List<CoreLabel> tokens) {
650647
currentRemoval = token.get(CoreAnnotations.OriginalTextAnnotation.class);
651648
if (currentRemoval != null)
652649
removedText.append(currentRemoval);
650+
// We only process the after text if it's the last token
653651
if (token == tokens.get(tokens.size() - 1)) {
654652
currentRemoval = token.get(CoreAnnotations.AfterAnnotation.class);
655653
if (currentRemoval != null)

test/src/edu/stanford/nlp/pipeline/CleanXmlAnnotatorTest.java

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ private static void checkResult(Annotation annotation,
111111
}
112112
}
113113

114-
private static void checkInvert(Annotation annotation, String gold) {
114+
private static void checkBeforeInvert(Annotation annotation, String gold) {
115115
List<CoreLabel> annotationLabels =
116116
annotation.get(CoreAnnotations.TokensAnnotation.class);
117117
StringBuilder original = new StringBuilder();
@@ -124,6 +124,18 @@ private static void checkInvert(Annotation annotation, String gold) {
124124
assertEquals(gold, original.toString());
125125
}
126126

127+
private static void checkAfterInvert(Annotation annotation, String gold) {
128+
List<CoreLabel> annotationLabels =
129+
annotation.get(CoreAnnotations.TokensAnnotation.class);
130+
StringBuilder original = new StringBuilder();
131+
original.append(annotationLabels.get(0).get(CoreAnnotations.BeforeAnnotation.class));
132+
for (CoreLabel label : annotationLabels) {
133+
original.append(label.get(CoreAnnotations.OriginalTextAnnotation.class));
134+
original.append(label.get(CoreAnnotations.AfterAnnotation.class));
135+
}
136+
assertEquals(gold, original.toString());
137+
}
138+
127139
private static void checkContext(CoreLabel label, String... expectedContext) {
128140
List<String> xmlContext = label.get(CoreAnnotations.XmlContextAnnotation.class);
129141
assertEquals(expectedContext.length, xmlContext.size());
@@ -216,17 +228,20 @@ public void testInvertible() {
216228
Annotation annotation = annotate(testNoTags, ptbInvertible,
217229
cleanXmlAllTags, wtsSplitter);
218230
checkResult(annotation, testNoTags);
219-
checkInvert(annotation, testNoTags);
231+
checkBeforeInvert(annotation, testNoTags);
232+
checkAfterInvert(annotation, testNoTags);
220233

221234
annotation = annotate(testTags, ptbInvertible,
222235
cleanXmlAllTags, wtsSplitter);
223236
checkResult(annotation, testNoTags);
224-
checkInvert(annotation, testTags);
237+
checkBeforeInvert(annotation, testTags);
238+
checkAfterInvert(annotation, testTags);
225239

226240
annotation = annotate(testManyTags, ptbInvertible,
227241
cleanXmlAllTags, wtsSplitter);
228242
checkResult(annotation, testNoTags);
229-
checkInvert(annotation, testManyTags);
243+
checkBeforeInvert(annotation, testManyTags);
244+
checkAfterInvert(annotation, testManyTags);
230245
}
231246

232247
@Test
@@ -287,7 +302,8 @@ public void testViaCoreNlp() {
287302
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
288303
pipeline.annotate(anno);
289304

290-
checkInvert(anno, testManyTags);
305+
checkBeforeInvert(anno, testManyTags);
306+
checkAfterInvert(anno, testManyTags);
291307
List<CoreLabel> annotationLabels =
292308
anno.get(CoreAnnotations.TokensAnnotation.class);
293309
for (int i = 0; i < 3; ++i) {

0 commit comments

Comments
 (0)