Skip to content

Commit 1d80d55

Browse files
committed
Merge branch 'dev' of https://github.com/stanfordnlp/CoreNLP into main
2 parents 1f2edb4 + 12a12e2 commit 1d80d55

File tree

46 files changed

+539
-180
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+539
-180
lines changed

.github/workflows/run-tests.yaml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
name: Run Tests
2+
on: [push]
3+
jobs:
4+
Run-CoreNLP-Tests:
5+
runs-on: self-hosted
6+
steps:
7+
- run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event."
8+
- run: echo "🐧 This job is now running on a ${{ runner.os }} server hosted by GitHub!"
9+
- run: echo "🔎 The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}."
10+
- name: Check out repository code
11+
uses: actions/checkout@v2
12+
- run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner."
13+
- run: echo "🖥️ The workflow is now ready to test your code on the runner."
14+
- name: Setup
15+
run: |
16+
pwd
17+
ant clean compile
18+
echo $CLASSPATH
19+
- name: Run Unit Tests
20+
if: always()
21+
run: |
22+
ant test
23+
- name: Run Basic ITests
24+
if: always()
25+
run: |
26+
ant itest
27+
- run: echo "All tests finished!"
28+
- run: echo "🍏 This job's status is ${{ job.status }}."

doc/corenlp/pom-full.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@
8686
<dependency>
8787
<groupId>xom</groupId>
8888
<artifactId>xom</artifactId>
89-
<version>1.3.2</version>
89+
<version>1.3.7</version>
9090
</dependency>
9191

9292
<dependency>
@@ -128,7 +128,7 @@
128128
<dependency>
129129
<groupId>com.google.protobuf</groupId>
130130
<artifactId>protobuf-java</artifactId>
131-
<version>3.9.2</version>
131+
<version>3.11.4</version>
132132
</dependency>
133133

134134
<dependency>

itest/src/edu/stanford/nlp/dcoref/DcorefSlowITest.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
import junit.framework.TestCase;
1414

1515
import edu.stanford.nlp.util.StringUtils;
16-
16+
import edu.stanford.nlp.util.TestPaths;
1717

1818
public class DcorefSlowITest extends TestCase {
1919

@@ -29,19 +29,19 @@ protected void makePropsFile(String path, String workDir, String scorer) throws
2929
pw.println("dcoref.maxdist = -1");
3030
pw.println("dcoref.replicate.conll = true");
3131
pw.println("dcoref.conll.scorer = " + scorer);
32-
pw.println("dcoref.conll2011 = /u/scr/nlp/data/conll-2011/v2/data/dev/data/english/annotations");
32+
pw.println(String.format("dcoref.conll2011 = %s/conll-2011/v2/data/dev/data/english/annotations", TestPaths.testHome()));
3333
pw.println("dcoref.logFile = "+workDir + File.separator + "log.txt");
3434
pw.close();
3535
}
3636

3737
public void testDcorefCoNLLResultV4() throws Exception {
38-
double finalScore = runDcoref("/u/scr/nlp/data/conll-2011/scorer/v4/scorer.pl");
38+
double finalScore = runDcoref(String.format("%s/conll-2011/scorer/v4/scorer.pl", TestPaths.testHome()));
3939
System.out.printf("Final Score (CoNLL 2011, scorer v4): (MUC+B^3+ceafe)/3 = %.2f%n", finalScore);
4040
assertEquals(59.3, finalScore, 0.3); // 2016-07: 59.45
4141
}
4242

4343
public void testDcorefCoNLLResultV801() throws Exception {
44-
double finalScore = runDcoref("/u/scr/nlp/data/conll-2012/scorer/v8.01/scorer.pl");
44+
double finalScore = runDcoref(String.format("%s/conll-2012/scorer/v8.01/scorer.pl", TestPaths.testHome()));
4545
System.out.printf("Final Score (CoNLL 2011, scorer v8): (MUC+B^3+ceafe)/3 = %.2f%n", finalScore);
4646
assertEquals(54.0, finalScore, 0.3); // 2016-07: 54.13
4747
}

itest/src/edu/stanford/nlp/ie/NERBenchmarkSlowITest.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import edu.stanford.nlp.sequences.CoNLLDocumentReaderAndWriter;
3434
import edu.stanford.nlp.sequences.SeqClassifierFlags;
3535
import edu.stanford.nlp.util.logging.Redwood;
36+
import edu.stanford.nlp.util.TestPaths;
3637

3738

3839
/**
@@ -45,7 +46,7 @@
4546
public class NERBenchmarkSlowITest {
4647
private static Redwood.RedwoodChannels log = Redwood.channels(NERBenchmarkSlowITest.class);
4748
// Conll paths
48-
private static final String CONLL_BASE_DIR = "/u/nlp/data/ner/conll/";
49+
private static final String CONLL_BASE_DIR = String.format("%s/ner/conll/", TestPaths.testHome());
4950
private static final String CONLL_TRAIN = CONLL_BASE_DIR + "eng.train";
5051
private static final String CONLL_DEV = CONLL_BASE_DIR + "eng.testa";
5152
private static final String CONLL_TEST = CONLL_BASE_DIR + "eng.testb";
@@ -54,13 +55,13 @@ public class NERBenchmarkSlowITest {
5455
private static final String CONLL_OUTPUT_TEST = "conll_output_test.txt";
5556

5657
// Onto paths
57-
private static final String ONTO_BASE_DIR = "/u/nlp/data/ner/ontonotes/";
58+
private static final String ONTO_BASE_DIR = String.format("%s/ner/ontonotes/", TestPaths.testHome());
5859
private static final String ONTO_DEV = ONTO_BASE_DIR + "onto-3class-dev.tsv";
5960
private static final String ONTO_TEST = ONTO_BASE_DIR + "onto-3class-test.tsv";
6061

6162
// TODO: use the model directly to run the test
6263
/** official CoNLL NER evaluation script **/
63-
private static final String CONLL_EVAL = "/u/nlp/data/ner/benchmark/eval_conll.sh";
64+
private static final String CONLL_EVAL = String.format("%s/ner/benchmark/eval_conll.sh", TestPaths.testHome());
6465
// private static final String CONLL_EVAL = (new File("projects/core/scripts/ner/eval_conll_cmd.sh").exists() ?
6566
// "projects/core/scripts/ner/eval_conll_cmd.sh" :
6667
// "../../scripts/ner/eval_conll_cmd.sh");

itest/src/edu/stanford/nlp/ie/NERServerITest.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import edu.stanford.nlp.ie.crf.CRFClassifier;
1818
import edu.stanford.nlp.io.IOUtils;
1919
import edu.stanford.nlp.net.Ports;
20+
import edu.stanford.nlp.util.TestPaths;
2021

2122
/**
2223
* Tests several operations on the NERServer.
@@ -39,10 +40,9 @@
3940
public class NERServerITest {
4041
private static CRFClassifier crf = null;
4142

42-
private static final String englishCRFPath =
43-
"/u/nlp/data/ner/goodClassifiers/english.all.3class.nodistsim.crf.ser.gz";
43+
private static final String englishCRFPath = "edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz";
4444
private static final String englishTestFile =
45-
"/u/nlp/data/ner/column_data/conll.testa";
45+
String.format("%s/ner/column_data/conll.testa", TestPaths.testHome());
4646
private static String loadedQueryFile = null;
4747

4848
private static final String CHARSET = "UTF-8";

itest/src/edu/stanford/nlp/ie/NumberSequenceClassifierITest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ public void testOrdinal() {
224224
{ "DATE" , "DATE", "DATE", "DATE" },
225225
{ "DATE" , "DATE" },
226226
{ "DATE" , "DATE", "DATE" },
227-
{ "DATE" , "DATE", "DATE", "DATE" },
227+
{ "DATE" , "DATE", "DATE" },
228228
{ "DATE" , "DATE" },
229229
{ "DATE" },
230230
{ "DATE" },
@@ -242,7 +242,7 @@ public void testOrdinal() {
242242
{ "1923-02-05" , "1923-02-05", "1923-02-05", "1923-02-05" },
243243
{ "XXXX-03-03" , "XXXX-03-03" },
244244
{ "2005-07-18" , "2005-07-18", "2005-07-18" },
245-
{ "XX05-09-18" , "XX05-09-18", "XX05-09-18", "XX05-09-18" },
245+
{ "XX05-09-18" , "XX05-09-18", "XX05-09-18" },
246246
{ "XXXX-01-13" , "XXXX-01-13" },
247247
{ "2009-07-19" },
248248
{ "2007-06-16" },

itest/src/edu/stanford/nlp/ie/crf/CRFClassifierITest.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import edu.stanford.nlp.stats.Counters;
2323
import edu.stanford.nlp.util.BenchmarkingHelper;
2424
import edu.stanford.nlp.util.Pair;
25+
import edu.stanford.nlp.util.TestPaths;
2526
import edu.stanford.nlp.util.Triple;
2627

2728

@@ -34,8 +35,8 @@ public class CRFClassifierITest {
3435
private static final String nerPath = "edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz";
3536

3637
private static final String caselessPath = "edu/stanford/nlp/models/ner/english.all.3class.caseless.distsim.crf.ser.gz";
37-
// private static final String caselessPath = "/u/nlp/data/ner/goodClassifiers/english.all.3class.caseless.distsim.crf.ser.gz"; // now works!
38-
// private static final String caselessPath = "/u/nlp/data/ner/classifiers-2014-08-31/english.all.3class.caseless.distsim.crf.ser.gz";
38+
// private static final String caselessPath = String.format("%s/ner/goodClassifiers/english.all.3class.caseless.distsim.crf.ser.gz", TestPaths.testHome()); // now works!
39+
// private static final String caselessPath = String.format("%s/ner/classifiers-2014-08-31/english.all.3class.caseless.distsim.crf.ser.gz", TestPaths.testHome());
3940

4041
/* The extra spaces and tab (after fate) are there to test space preservation.
4142
* Each item of the top level array is an array of 7 Strings:
@@ -330,7 +331,7 @@ public void testCRF() {
330331
System.getProperty("ner.caseless.model", caselessPath));
331332

332333
try {
333-
Triple<Double, Double, Double> prf = crfCaseless.classifyAndWriteAnswers("/u/nlp/data/ner/column_data/ritter.3class.test", true);
334+
Triple<Double, Double, Double> prf = crfCaseless.classifyAndWriteAnswers(String.format("%s/ner/column_data/ritter.3class.test", TestPaths.testHome()), true);
334335
Counter<String> results = new ClassicCounter<>();
335336
results.setCount("NER F1", prf.third());
336337
Counter<String> lowResults = new ClassicCounter<>();

itest/src/edu/stanford/nlp/ie/crf/ThreadedCRFClassifierITest.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
import org.junit.Before;
66
import org.junit.Test;
77

8+
import edu.stanford.nlp.util.TestPaths;
9+
810
/**
911
* Test that the CRFClassifier works when multiple classifiers are run
1012
* in multiple threads.
@@ -19,15 +21,13 @@ public class ThreadedCRFClassifierITest {
1921
"edu/stanford/nlp/models/ner/german.distsim.crf.ser.gz";
2022
/** -- We're no longer supporting this one
2123
private String german2 =
22-
"/u/nlp/data/ner/goodClassifiers/german.dewac_175m_600.crf.ser.gz";
24+
String.format("%s/ner/goodClassifiers/german.dewac_175m_600.crf.ser.gz", TestPaths.testHome());
2325
*/
24-
private static final String germanTestFile = "/u/nlp/data/german/ner/2016/deu.io.f15.utf8.testa";
26+
private static final String germanTestFile = String.format("%s/german/ner/2016/deu.io.f15.utf8.testa", TestPaths.testHome());
2527

26-
private static final String english1 =
27-
"/u/nlp/data/ner/goodClassifiers/english.all.3class.nodistsim.crf.ser.gz";
28-
private static final String english2 =
29-
"/u/nlp/data/ner/goodClassifiers/english.conll.4class.distsim.crf.ser.gz";
30-
private static final String englishTestFile = "/u/nlp/data/ner/column_data/conll.4class.testa";
28+
private static final String english1 = "edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz";
29+
private static final String english2 = "edu/stanford/nlp/models/ner/english.conll.4class.distsim.crf.ser.gz";
30+
private static final String englishTestFile = String.format("%s/ner/column_data/conll.4class.testa", TestPaths.testHome());
3131

3232
private static final String germanEncoding = "utf-8";
3333
private static final String englishEncoding = "utf-8";

itest/src/edu/stanford/nlp/ie/crf/ThreadedSegmenterITest.java

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
import org.junit.Before;
66
import org.junit.Test;
77

8+
import edu.stanford.nlp.util.TestPaths;
9+
810
/**
911
* Test that the CRFClassifier works when multiple classifiers are run
1012
* in multiple threads, in Chinese Segmentation mode.
@@ -21,21 +23,21 @@ public class ThreadedSegmenterITest {
2123
Properties props;
2224

2325
static final String crf1 =
24-
"/u/nlp/data/gale/segtool/stanford-seg/classifiers-2010/pk-chris6.lex.gz";
26+
String.format("%s/gale/segtool/stanford-seg/classifiers-2010/pk-chris6.lex.gz", TestPaths.testHome());
2527

26-
static final String crf2 = "/u/nlp/data/gale/segtool/stanford-seg/classifiers-2010/05202008-ctb6.processed-chris6.lex.gz";
28+
static final String crf2 = String.format("%s/gale/segtool/stanford-seg/classifiers-2010/05202008-ctb6.processed-chris6.lex.gz", TestPaths.testHome());
2729

2830
@Before
2931
public void setUp() {
3032
props = new Properties();
3133
props.setProperty("sighanCorporaDict",
32-
"/u/nlp/data/gale/segtool/stanford-seg/data");
34+
String.format("%s/gale/segtool/stanford-seg/data", TestPaths.testHome()));
3335
props.setProperty("testFile",
34-
"/u/nlp/segtool/stanford-seg/data/Sighan2006/CTB_train_test/test/CTB.utf8.simp");
36+
String.format("%s/gale/segtool/stanford-seg/data/Sighan2006/CTB_train_test/test/CTB.utf8.simp", TestPaths.testHome()));
3537
props.setProperty("inputEncoding", "utf-8");
3638
props.setProperty("sighanPostProcessing", "true");
3739
props.setProperty("serDictionary",
38-
"/u/nlp/data/gale/segtool/stanford-seg/classifiers/dict-chris6.ser.gz");
40+
String.format("%s/gale/segtool/stanford-seg/classifiers/dict-chris6.ser.gz", TestPaths.testHome()));
3941
props.setProperty("keepAllWhitespaces", "false");
4042
}
4143

itest/src/edu/stanford/nlp/international/german/process/GermanTokenizerPostProcessorITest.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,4 +49,20 @@ public void testPostProcessor() {
4949
testExample(abbreviationExample,abbreviationExampleGoldTokens);
5050
}
5151

52+
/**
53+
* You probably can't tell in your editor, but the input has 4 characters for
54+
* <pre>für</pre>
55+
* and the output has 3
56+
*/
57+
public void testUmlauts() {
58+
String fur = "für";
59+
assertEquals(4, fur.length());
60+
61+
String furry = "für";
62+
assertEquals(3, furry.length());
63+
64+
String umlautExample = "Welcher der Befunde ist " + fur + " eine Gehirnerkrankung typisch?";
65+
List<String> umlautGoldTokens = Arrays.asList("Welcher", "der", "Befunde", "ist", furry, "eine", "Gehirnerkrankung", "typisch", "?");
66+
testExample(umlautExample, umlautGoldTokens);
67+
}
5268
}

0 commit comments

Comments
 (0)