From cc30488270cd60c075c09f2718d737f88c537181 Mon Sep 17 00:00:00 2001 From: Arne Neumann Date: Thu, 19 Feb 2015 15:02:52 +0100 Subject: [PATCH 1/3] added basic setup.py to install corenlp system-wide --- setup.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 setup.py diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..711fbe8 --- /dev/null +++ b/setup.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python + +import sys +import os +try: + from setuptools import setup +except ImportError: + from distutils.core import setup + +here = os.path.abspath(os.path.dirname(__file__)) +README = open(os.path.join(here, 'README.md')).read() + +setup(name='corenlp', +version='3.4.1', +description='Python wrapper for Stanford CoreNLP tools v3.4.1', +long_description=README, +author='Dustin Smith', +author_email='dustin@media.mit.edu', +url='https://github.com/dasmith/stanford-corenlp-python', +py_modules=['client', 'corenlp', 'jsonrpc', 'progressbar'], +license='GPL v2+', +install_requires=['pexpect', 'unidecode'], +data_files=[ + ('.', ['default.properties']), + ('stanford-corenlp-full-2014-08-27', + ['stanford-corenlp-full-2014-08-27/stanford-corenlp-3.4.1.jar', + 'stanford-corenlp-full-2014-08-27/stanford-corenlp-3.4.1.jar', + 'stanford-corenlp-full-2014-08-27/stanford-corenlp-3.4.1-models.jar', + 'stanford-corenlp-full-2014-08-27/joda-time.jar', + 'stanford-corenlp-full-2014-08-27/xom.jar', + 'stanford-corenlp-full-2014-08-27/jollyday.jar' + ]) +], +) From 509411fd96f8f9f5379e63cbf41a8eec5a35bcef Mon Sep 17 00:00:00 2001 From: Arne Neumann Date: Thu, 19 Feb 2015 15:05:54 +0100 Subject: [PATCH 2/3] made *.jar paths relative to installation directory. with this change, corenlp can now be used/imported regardless of which directory the user / python script is currently in. --- corenlp.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/corenlp.py b/corenlp.py index 753e51c..0e20379 100644 --- a/corenlp.py +++ b/corenlp.py @@ -132,25 +132,26 @@ def __init__(self, corenlp_path=None): Checks the location of the jar files. Spawns the server as a process. """ + here = os.path.abspath(os.path.dirname(__file__)) jars = ["stanford-corenlp-3.4.1.jar", "stanford-corenlp-3.4.1-models.jar", "joda-time.jar", "xom.jar", "jollyday.jar"] - + # if CoreNLP libraries are in a different directory, # change the corenlp_path variable to point to them if not corenlp_path: - corenlp_path = "./stanford-corenlp-full-2014-08-27/" - + corenlp_path = os.path.join(here, "stanford-corenlp-full-2014-08-27") + java_path = "java" classname = "edu.stanford.nlp.pipeline.StanfordCoreNLP" # include the properties file, so you can change defaults # but any changes in output format will break parse_parser_results() - props = "-props default.properties" - + props = "-props {}".format(os.path.join(here, 'default.properties')) + # add and check classpaths - jars = [corenlp_path + jar for jar in jars] + jars = [os.path.join(corenlp_path, jar) for jar in jars] for jar in jars: if not os.path.exists(jar): logger.error("Error! Cannot locate %s" % jar) From 8ebd591d5750e419f2f57f43e043a6514995a6d3 Mon Sep 17 00:00:00 2001 From: Arne Neumann Date: Fri, 20 Feb 2015 11:56:26 +0100 Subject: [PATCH 3/3] fix #19: changed encoding of CoreNLP result string --- corenlp.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/corenlp.py b/corenlp.py index 0e20379..3c6479a 100644 --- a/corenlp.py +++ b/corenlp.py @@ -69,10 +69,15 @@ def parse_parser_results(text): interface of the CoreNLP tools. Takes a string of the parser results and then returns a Python list of dictionaries, one for each parsed sentence. + + Parameters + ---------- + text : str + UTF-8 encoded string of CoreNLP parser results """ results = {"sentences": []} state = STATE_START - for line in text.encode('utf-8').split("\n"): + for line in text.split("\n"): line = line.strip() if line.startswith("Sentence #"):