|
| 1 | +package edu.stanford.nlp.pipeline; |
| 2 | + |
| 3 | +import static org.junit.Assert.assertEquals; |
| 4 | +import org.junit.Test; |
| 5 | + |
| 6 | +import java.util.*; |
| 7 | +import java.util.stream.*; |
| 8 | + |
| 9 | +public class PipelinePropertiesITest { |
| 10 | + |
| 11 | + |
| 12 | + /** Test options for building pipelines **/ |
| 13 | + @Test |
| 14 | + public void buildFrenchPipeline() { |
| 15 | + // expected output |
| 16 | + List<String> expectedTokens = Arrays.asList("Emmanuel", "Macron", "est", "le", "président", "de", "la", "France", "."); |
| 17 | + List<String> expectedTags = Arrays.asList("PROPN", "PROPN", "AUX", "DET", "NOUN", "ADP", "DET", "PROPN", "PUNCT"); |
| 18 | + List<String> expectedNER = Arrays.asList("I-PER", "I-PER", "O", "O", "O", "O", "I-LOC", "I-LOC", "O"); |
| 19 | + String expectedDependencyParse = "root(ROOT-0, président-5)\n" + |
| 20 | + "nsubj(président-5, Emmanuel-1)\n" + |
| 21 | + "flat:name(Emmanuel-1, Macron-2)\n" + |
| 22 | + "cop(président-5, est-3)\n" + |
| 23 | + "det(président-5, le-4)\n" + |
| 24 | + "case(France-8, de-6)\n" + |
| 25 | + "det(France-8, la-7)\n" + |
| 26 | + "nmod:de(président-5, France-8)\n" + |
| 27 | + "punct(président-5, .-9)\n"; |
| 28 | + // build doc |
| 29 | + CoreDocument doc = new CoreDocument("Emmanuel Macron est le président de la France."); |
| 30 | + // build pipeline with language name |
| 31 | + StanfordCoreNLP frenchPipeline = new StanfordCoreNLP("french"); |
| 32 | + // annotate |
| 33 | + frenchPipeline.annotate(doc); |
| 34 | + // compare results |
| 35 | + assertEquals(expectedTokens, doc.tokens().stream().map(w -> w.word()).collect(Collectors.toList())); |
| 36 | + assertEquals(expectedTags, doc.tokens().stream().map(w -> w.tag()).collect(Collectors.toList())); |
| 37 | + assertEquals(expectedNER, doc.tokens().stream().map(w -> w.ner()).collect(Collectors.toList())); |
| 38 | + assertEquals(expectedDependencyParse, doc.sentences().get(0).dependencyParse().toList()); |
| 39 | + } |
| 40 | +} |
0 commit comments