diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 3799529..0000000 --- a/Dockerfile +++ /dev/null @@ -1,22 +0,0 @@ -FROM ubuntu:14.04 - -RUN apt-get update && apt-get install -y python3-pip python3-nose ack-grep vim python3-lxml python-lxml python-nose python-pip - -RUN locale-gen en_GB.UTF-8 - -RUN mkdir -p /home/nobody && \ - chown nobody /home/nobody -USER nobody -ENV HOME=/home/nobody \ - PATH=/home/nobody/.local/bin:$PATH \ - LANG=en_GB.UTF-8 -WORKDIR /home/nobody - -RUN mkdir -p /home/nobody/.local/bin -RUN echo python3 $* > /home/nobody/.local/bin/python -RUN chmod +x /home/nobody/.local/bin/python -RUN pip3 install --user requests sqlalchemy alembic -RUN pip install --user requests sqlalchemy alembic -COPY . /home/nobody/ -RUN python3 tests.py -RUN python2 tests.py diff --git a/benchmark.py b/benchmark.py index ab6b3a9..49468bb 100755 --- a/benchmark.py +++ b/benchmark.py @@ -1,13 +1,12 @@ -#! /usr/bin/env python +#! /usr/bin/env python3 import scraperwiki import os -from six.moves import range rows = [{'id': i, 'test': i * 2, 's': "abc"} for i in range(1000)] try: os.remove('scraperwiki.sqlite') -except OSError: +except FileNotFoundError: pass scraperwiki.sql.save(['id'], rows) diff --git a/pyproject.toml b/pyproject.toml index 59b8bbd..58e37e1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ authors = [ { name = "ScraperWiki Developers", email = "hello@scraperwiki.com" } ] license = { text = "GPLv3+" } -readme = "README.md" # Assumes you have a README file +readme = "README.rst" urls = { Repository = "https://github.com/cantabular/scraperwiki-python" } classifiers = [ "Intended Audience :: Developers", @@ -25,10 +25,8 @@ classifiers = [ "Topic :: Database :: Front-Ends", ] dependencies = [ - "requests==2.32.5", - "six==1.17.0", - "sqlalchemy==2.0.45", - "alembic==1.17.2", + "sqlalchemy>=2,<3", + "alembic", ] [tool.setuptools.packages.find] diff --git a/save_speedtest.py b/save_speedtest.py index d3c1407..c26b09a 100755 --- a/save_speedtest.py +++ b/save_speedtest.py @@ -1,6 +1,5 @@ -#! /usr/bin/env python +#! /usr/bin/env python3 import scraperwiki -from six.moves import range rows = [{'id': i, 'test': i * 2, 's': "xx"*i} for i in range(10)] diff --git a/scraperwiki/__init__.py b/scraperwiki/__init__.py index 4f9aaf1..a854c2c 100644 --- a/scraperwiki/__init__.py +++ b/scraperwiki/__init__.py @@ -1,6 +1,3 @@ -#!/usr/bin/env python -# Thomas Levine, ScraperWiki Limited - ''' Local version of ScraperWiki Utils, documentation here: https://scraperwiki.com/docs/python/python_help_documentation/ diff --git a/scraperwiki/sql.py b/scraperwiki/sql.py index 367ca5d..3138e68 100644 --- a/scraperwiki/sql.py +++ b/scraperwiki/sql.py @@ -10,25 +10,23 @@ import alembic.ddl import sqlalchemy -import six DATABASE_NAME = os.environ.get("SCRAPERWIKI_DATABASE_NAME", "sqlite:///scraperwiki.sqlite") DATABASE_TIMEOUT = float(os.environ.get("SCRAPERWIKI_DATABASE_TIMEOUT", 300)) SECONDS_BETWEEN_COMMIT = 2 -unicode = str # The scraperwiki.sqlite.SqliteError exception SqliteError = sqlalchemy.exc.SQLAlchemyError class Blob(bytes): - """ Represents a blob as a string. """ + pass + PYTHON_SQLITE_TYPE_MAP = { - str: sqlalchemy.types.Text, str: sqlalchemy.types.Text, int: sqlalchemy.types.BigInteger, bool: sqlalchemy.types.Boolean, @@ -37,19 +35,10 @@ class Blob(bytes): datetime.date: sqlalchemy.types.Date, datetime.datetime: sqlalchemy.types.DateTime, + bytes: sqlalchemy.types.LargeBinary, Blob: sqlalchemy.types.LargeBinary, } -if bytes is not str: - # On 2.7, bytes *is* str, so we don't want to overwrite that. - PYTHON_SQLITE_TYPE_MAP[bytes] = sqlalchemy.types.LargeBinary - -try: - PYTHON_SQLITE_TYPE_MAP[long] = sqlalchemy.types.BigInteger -except NameError: - pass - - class _State: """ @@ -183,7 +172,7 @@ def select(query, data=None): rows = [] for row in result: - rows.append(dict(list(row._mapping.items()))) + rows.append(dict(row._mapping)) return rows @@ -268,7 +257,7 @@ def save_var(name, value): if column_type == sqlalchemy.types.LargeBinary: value_blob = value else: - value_blob = unicode(value).encode('utf-8') + value_blob = str(value).encode('utf-8') values = dict(name=name, value_blob=value_blob, diff --git a/scraperwiki/utils.py b/scraperwiki/utils.py index 2eed864..3dea8b6 100644 --- a/scraperwiki/utils.py +++ b/scraperwiki/utils.py @@ -1,25 +1,20 @@ -#!/usr/bin/env python -# utils.py -# David Jones, ScraperWiki Limited -# Thomas Levine, ScraperWiki Limited - ''' Local version of ScraperWiki Utils, documentation here: https://scraperwiki.com/docs/python/python_help_documentation/ ''' import os +import shutil import sys import warnings import tempfile -import six.moves.urllib.parse -import six.moves.urllib.request -import requests +import urllib.parse +import urllib.request def scrape(url, params=None, user_agent=None): ''' Scrape a URL optionally with parameters. - This is effectively a wrapper around urllib2.urlopen. + This is effectively a wrapper around urllib.request.urlopen. ''' headers = {} @@ -27,23 +22,32 @@ def scrape(url, params=None, user_agent=None): if user_agent: headers['User-Agent'] = user_agent - data = params and six.moves.urllib.parse.urlencode(params) or None - req = six.moves.urllib.request.Request(url, data=data, headers=headers) - f = six.moves.urllib.request.urlopen(req) + data = None + if params: + data = urllib.parse.urlencode(params).encode('utf-8') + + req = urllib.request.Request(url, data=data, headers=headers) - text = f.read() - f.close() + with urllib.request.urlopen(req) as f: + text = f.read() return text def pdftoxml(pdfdata, options=""): """converts pdf file to xml file""" + if not shutil.which('pdftohtml'): + warnings.warn( + 'scraperwiki.pdftoxml requires pdftohtml, but pdftohtml was not found ' + 'in the PATH. If you wish to use this function, you probably need to ' + 'install pdftohtml.' + ) + return None pdffout = tempfile.NamedTemporaryFile(suffix='.pdf') pdffout.write(pdfdata) pdffout.flush() - xmlin = tempfile.NamedTemporaryFile(mode='r', suffix='.xml') + xmlin = tempfile.NamedTemporaryFile(mode='r', suffix='.xml', encoding="utf-8") tmpxml = xmlin.name # "temph.xml" cmd = 'pdftohtml -xml -nodrm -zoom 1.5 -enc UTF-8 -noframes {} "{}" "{}"'.format( options, pdffout.name, os.path.splitext(tmpxml)[0]) @@ -55,29 +59,14 @@ def pdftoxml(pdfdata, options=""): #xmlfin = open(tmpxml) xmldata = xmlin.read() xmlin.close() - return xmldata.decode('utf-8') - - -def _in_box(): - return os.environ.get('HOME', None) == '/home' + return xmldata def status(type, message=None): - assert type in ['ok', 'error'] - - # if not running in a ScraperWiki platform box, silently do nothing - if not _in_box(): - return "Not in box" - - url = os.environ.get("SW_STATUS_URL", "https://app.quickcode.io/api/status") - if url == "OFF": - # For development mode - return + """Retained for backwards compatibility.""" + warnings.warn("status() is no longer in use following ScraperWiki/Quickcode application shutdown", DeprecationWarning, stacklevel=2) + return - # send status update to the box - r = requests.post(url, data={'type': type, 'message': message}) - r.raise_for_status() - return r.content def swimport(scrapername): return __import__(scrapername) diff --git a/tests.py b/tests.py index a5f7e22..9951bcc 100755 --- a/tests.py +++ b/tests.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python import datetime import json import os @@ -13,7 +12,6 @@ from unittest import TestCase, main import scraperwiki -import six import sys # scraperwiki.sql._State.echo = True @@ -64,9 +62,9 @@ def test_date(self): date1 = datetime.datetime.now() date2 = datetime.date.today() scraperwiki.sql.save_var(u"weird\u1234", date1) - self.assertEqual(scraperwiki.sql.get_var(u"weird\u1234"), six.text_type(date1)) + self.assertEqual(scraperwiki.sql.get_var(u"weird\u1234"), str(date1)) scraperwiki.sql.save_var(u"weird\u1234", date2) - self.assertEqual(scraperwiki.sql.get_var(u"weird\u1234"), six.text_type(date2)) + self.assertEqual(scraperwiki.sql.get_var(u"weird\u1234"), str(date2)) def test_save_multiple_values(self): scraperwiki.sql.save_var(u'foo\xc3', u'hello') @@ -94,7 +92,7 @@ def test_insert(self): """) ((colname, value, _type),) = self.cursor.fetchall() expected = [(u"birthday\xfe", u"\u1234November 30, 1888", "text",)] - observed = [(colname, type(b'')(value).decode('utf-8'), _type)] + observed = [(colname, value.decode('utf-8'), _type)] self.assertEqual(observed, expected) class SaveAndCheck(TestCase): @@ -269,7 +267,7 @@ def test_lxml_string(self): self.save_and_check( {"text": s}, "lxml", - [(six.text_type(s),)] + [(str(s),)] ) def test_save_and_drop(self): @@ -320,7 +318,7 @@ def test_save_date(self): scraperwiki.sql.select("* FROM swdata")) self.assertEqual( - {u'keys': [u'birthday\xaa'], u'data': [(six.text_type(d),)]}, + {u'keys': [u'birthday\xaa'], u'data': [(str(d),)]}, scraperwiki.sql.execute("SELECT * FROM swdata")) self.assertEqual(str(d), self.rawdate(column=u"birthday\xaa")) @@ -331,7 +329,7 @@ def test_save_datetime(self): scraperwiki.sql.save([], {"birthday": d}, table_name="datetimetest") - exemplar = six.text_type(d) + exemplar = str(d) # SQLAlchemy appears to convert with extended precision. exemplar += ".000000" @@ -347,13 +345,11 @@ def test_save_datetime(self): class TestStatus(TestCase): 'Test that the status endpoint works.' - def test_does_nothing_if_called_outside_box(self): - scraperwiki.status('ok') - - def test_raises_exception_with_invalid_type_field(self): - self.assertRaises(AssertionError, scraperwiki.status, 'hello') + def test_status(self): + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) - # XXX neeed some mocking tests for case of run inside a box + self.assertEqual(scraperwiki.status('ok'), None) class TestUnicodeColumns(TestCase): maxDiff = None @@ -385,6 +381,3 @@ def test_import_scraperwiki_utils(self): def test_import_scraperwiki_special_utils(self): self.sw.pdftoxml - -if __name__ == '__main__': - main()