Skip to content

Commit f2389a8

Browse files
authored
Merge pull request #128 from cantabular/modernise-code
Modernise the codebase
2 parents bb581b0 + 5d1e79f commit f2389a8

8 files changed

Lines changed: 44 additions & 102 deletions

File tree

Dockerfile

Lines changed: 0 additions & 22 deletions
This file was deleted.

benchmark.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
1-
#! /usr/bin/env python
1+
#! /usr/bin/env python3
22
import scraperwiki
33
import os
4-
from six.moves import range
54

65
rows = [{'id': i, 'test': i * 2, 's': "abc"} for i in range(1000)]
76

87
try:
98
os.remove('scraperwiki.sqlite')
10-
except OSError:
9+
except FileNotFoundError:
1110
pass
1211

1312
scraperwiki.sql.save(['id'], rows)

pyproject.toml

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ authors = [
1010
{ name = "ScraperWiki Developers", email = "hello@scraperwiki.com" }
1111
]
1212
license = { text = "GPLv3+" }
13-
readme = "README.md" # Assumes you have a README file
13+
readme = "README.rst"
1414
urls = { Repository = "https://github.com/cantabular/scraperwiki-python" }
1515
classifiers = [
1616
"Intended Audience :: Developers",
@@ -25,10 +25,8 @@ classifiers = [
2525
"Topic :: Database :: Front-Ends",
2626
]
2727
dependencies = [
28-
"requests==2.32.5",
29-
"six==1.17.0",
30-
"sqlalchemy==2.0.45",
31-
"alembic==1.17.2",
28+
"sqlalchemy>=2,<3",
29+
"alembic",
3230
]
3331

3432
[tool.setuptools.packages.find]

save_speedtest.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
#! /usr/bin/env python
1+
#! /usr/bin/env python3
22
import scraperwiki
3-
from six.moves import range
43

54
rows = [{'id': i, 'test': i * 2, 's': "xx"*i} for i in range(10)]
65

scraperwiki/__init__.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
#!/usr/bin/env python
2-
# Thomas Levine, ScraperWiki Limited
3-
41
'''
52
Local version of ScraperWiki Utils, documentation here:
63
https://scraperwiki.com/docs/python/python_help_documentation/

scraperwiki/sql.py

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,25 +10,23 @@
1010

1111
import alembic.ddl
1212
import sqlalchemy
13-
import six
1413

1514
DATABASE_NAME = os.environ.get("SCRAPERWIKI_DATABASE_NAME",
1615
"sqlite:///scraperwiki.sqlite")
1716

1817
DATABASE_TIMEOUT = float(os.environ.get("SCRAPERWIKI_DATABASE_TIMEOUT", 300))
1918
SECONDS_BETWEEN_COMMIT = 2
20-
unicode = str
2119

2220
# The scraperwiki.sqlite.SqliteError exception
2321
SqliteError = sqlalchemy.exc.SQLAlchemyError
2422

2523
class Blob(bytes):
26-
2724
"""
2825
Represents a blob as a string.
2926
"""
27+
pass
28+
3029
PYTHON_SQLITE_TYPE_MAP = {
31-
str: sqlalchemy.types.Text,
3230
str: sqlalchemy.types.Text,
3331
int: sqlalchemy.types.BigInteger,
3432
bool: sqlalchemy.types.Boolean,
@@ -37,19 +35,10 @@ class Blob(bytes):
3735
datetime.date: sqlalchemy.types.Date,
3836
datetime.datetime: sqlalchemy.types.DateTime,
3937

38+
bytes: sqlalchemy.types.LargeBinary,
4039
Blob: sqlalchemy.types.LargeBinary,
4140
}
4241

43-
if bytes is not str:
44-
# On 2.7, bytes *is* str, so we don't want to overwrite that.
45-
PYTHON_SQLITE_TYPE_MAP[bytes] = sqlalchemy.types.LargeBinary
46-
47-
try:
48-
PYTHON_SQLITE_TYPE_MAP[long] = sqlalchemy.types.BigInteger
49-
except NameError:
50-
pass
51-
52-
5342
class _State:
5443

5544
"""
@@ -183,7 +172,7 @@ def select(query, data=None):
183172

184173
rows = []
185174
for row in result:
186-
rows.append(dict(list(row._mapping.items())))
175+
rows.append(dict(row._mapping))
187176

188177
return rows
189178

@@ -268,7 +257,7 @@ def save_var(name, value):
268257
if column_type == sqlalchemy.types.LargeBinary:
269258
value_blob = value
270259
else:
271-
value_blob = unicode(value).encode('utf-8')
260+
value_blob = str(value).encode('utf-8')
272261

273262
values = dict(name=name,
274263
value_blob=value_blob,

scraperwiki/utils.py

Lines changed: 23 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,53 @@
1-
#!/usr/bin/env python
2-
# utils.py
3-
# David Jones, ScraperWiki Limited
4-
# Thomas Levine, ScraperWiki Limited
5-
61
'''
72
Local version of ScraperWiki Utils, documentation here:
83
https://scraperwiki.com/docs/python/python_help_documentation/
94
'''
105
import os
6+
import shutil
117
import sys
128
import warnings
139
import tempfile
14-
import six.moves.urllib.parse
15-
import six.moves.urllib.request
16-
import requests
10+
import urllib.parse
11+
import urllib.request
1712

1813

1914
def scrape(url, params=None, user_agent=None):
2015
'''
2116
Scrape a URL optionally with parameters.
22-
This is effectively a wrapper around urllib2.urlopen.
17+
This is effectively a wrapper around urllib.request.urlopen.
2318
'''
2419

2520
headers = {}
2621

2722
if user_agent:
2823
headers['User-Agent'] = user_agent
2924

30-
data = params and six.moves.urllib.parse.urlencode(params) or None
31-
req = six.moves.urllib.request.Request(url, data=data, headers=headers)
32-
f = six.moves.urllib.request.urlopen(req)
25+
data = None
26+
if params:
27+
data = urllib.parse.urlencode(params).encode('utf-8')
28+
29+
req = urllib.request.Request(url, data=data, headers=headers)
3330

34-
text = f.read()
35-
f.close()
31+
with urllib.request.urlopen(req) as f:
32+
text = f.read()
3633

3734
return text
3835

3936

4037
def pdftoxml(pdfdata, options=""):
4138
"""converts pdf file to xml file"""
39+
if not shutil.which('pdftohtml'):
40+
warnings.warn(
41+
'scraperwiki.pdftoxml requires pdftohtml, but pdftohtml was not found '
42+
'in the PATH. If you wish to use this function, you probably need to '
43+
'install pdftohtml.'
44+
)
45+
return None
4246
pdffout = tempfile.NamedTemporaryFile(suffix='.pdf')
4347
pdffout.write(pdfdata)
4448
pdffout.flush()
4549

46-
xmlin = tempfile.NamedTemporaryFile(mode='r', suffix='.xml')
50+
xmlin = tempfile.NamedTemporaryFile(mode='r', suffix='.xml', encoding="utf-8")
4751
tmpxml = xmlin.name # "temph.xml"
4852
cmd = 'pdftohtml -xml -nodrm -zoom 1.5 -enc UTF-8 -noframes {} "{}" "{}"'.format(
4953
options, pdffout.name, os.path.splitext(tmpxml)[0])
@@ -55,29 +59,14 @@ def pdftoxml(pdfdata, options=""):
5559
#xmlfin = open(tmpxml)
5660
xmldata = xmlin.read()
5761
xmlin.close()
58-
return xmldata.decode('utf-8')
59-
60-
61-
def _in_box():
62-
return os.environ.get('HOME', None) == '/home'
62+
return xmldata
6363

6464

6565
def status(type, message=None):
66-
assert type in ['ok', 'error']
67-
68-
# if not running in a ScraperWiki platform box, silently do nothing
69-
if not _in_box():
70-
return "Not in box"
71-
72-
url = os.environ.get("SW_STATUS_URL", "https://app.quickcode.io/api/status")
73-
if url == "OFF":
74-
# For development mode
75-
return
66+
"""Retained for backwards compatibility."""
67+
warnings.warn("status() is no longer in use following ScraperWiki/Quickcode application shutdown", DeprecationWarning, stacklevel=2)
68+
return
7669

77-
# send status update to the box
78-
r = requests.post(url, data={'type': type, 'message': message})
79-
r.raise_for_status()
80-
return r.content
8170

8271
def swimport(scrapername):
8372
return __import__(scrapername)

tests.py

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
#!/usr/bin/env python
21
import datetime
32
import json
43
import os
@@ -13,7 +12,6 @@
1312
from unittest import TestCase, main
1413

1514
import scraperwiki
16-
import six
1715

1816
import sys
1917
# scraperwiki.sql._State.echo = True
@@ -64,9 +62,9 @@ def test_date(self):
6462
date1 = datetime.datetime.now()
6563
date2 = datetime.date.today()
6664
scraperwiki.sql.save_var(u"weird\u1234", date1)
67-
self.assertEqual(scraperwiki.sql.get_var(u"weird\u1234"), six.text_type(date1))
65+
self.assertEqual(scraperwiki.sql.get_var(u"weird\u1234"), str(date1))
6866
scraperwiki.sql.save_var(u"weird\u1234", date2)
69-
self.assertEqual(scraperwiki.sql.get_var(u"weird\u1234"), six.text_type(date2))
67+
self.assertEqual(scraperwiki.sql.get_var(u"weird\u1234"), str(date2))
7068

7169
def test_save_multiple_values(self):
7270
scraperwiki.sql.save_var(u'foo\xc3', u'hello')
@@ -94,7 +92,7 @@ def test_insert(self):
9492
""")
9593
((colname, value, _type),) = self.cursor.fetchall()
9694
expected = [(u"birthday\xfe", u"\u1234November 30, 1888", "text",)]
97-
observed = [(colname, type(b'')(value).decode('utf-8'), _type)]
95+
observed = [(colname, value.decode('utf-8'), _type)]
9896
self.assertEqual(observed, expected)
9997

10098
class SaveAndCheck(TestCase):
@@ -269,7 +267,7 @@ def test_lxml_string(self):
269267
self.save_and_check(
270268
{"text": s},
271269
"lxml",
272-
[(six.text_type(s),)]
270+
[(str(s),)]
273271
)
274272

275273
def test_save_and_drop(self):
@@ -320,7 +318,7 @@ def test_save_date(self):
320318
scraperwiki.sql.select("* FROM swdata"))
321319

322320
self.assertEqual(
323-
{u'keys': [u'birthday\xaa'], u'data': [(six.text_type(d),)]},
321+
{u'keys': [u'birthday\xaa'], u'data': [(str(d),)]},
324322
scraperwiki.sql.execute("SELECT * FROM swdata"))
325323

326324
self.assertEqual(str(d), self.rawdate(column=u"birthday\xaa"))
@@ -331,7 +329,7 @@ def test_save_datetime(self):
331329
scraperwiki.sql.save([], {"birthday": d},
332330
table_name="datetimetest")
333331

334-
exemplar = six.text_type(d)
332+
exemplar = str(d)
335333
# SQLAlchemy appears to convert with extended precision.
336334
exemplar += ".000000"
337335

@@ -347,13 +345,11 @@ def test_save_datetime(self):
347345
class TestStatus(TestCase):
348346
'Test that the status endpoint works.'
349347

350-
def test_does_nothing_if_called_outside_box(self):
351-
scraperwiki.status('ok')
352-
353-
def test_raises_exception_with_invalid_type_field(self):
354-
self.assertRaises(AssertionError, scraperwiki.status, 'hello')
348+
def test_status(self):
349+
with warnings.catch_warnings():
350+
warnings.filterwarnings("ignore", category=DeprecationWarning)
355351

356-
# XXX neeed some mocking tests for case of run inside a box
352+
self.assertEqual(scraperwiki.status('ok'), None)
357353

358354
class TestUnicodeColumns(TestCase):
359355
maxDiff = None
@@ -385,6 +381,3 @@ def test_import_scraperwiki_utils(self):
385381

386382
def test_import_scraperwiki_special_utils(self):
387383
self.sw.pdftoxml
388-
389-
if __name__ == '__main__':
390-
main()

0 commit comments

Comments
 (0)