-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathgenerate_zipcode_data.py
More file actions
75 lines (63 loc) · 2.21 KB
/
Copy pathgenerate_zipcode_data.py
File metadata and controls
75 lines (63 loc) · 2.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import json
import os
import urllib.request
from io import BytesIO
from zipfile import ZipFile
# We download our US Zipcode data from Geonames.org and then store that data
# as an array of dicitionaries in memory
US_ZIPCODE_DATA_DOWNLOAD_URL = (
"https://download.geonames.org/export/zip/US.zip"
)
US_ZIPCODE_TXT_FILE_NAME = "US.txt"
basepath = os.path.dirname(os.path.abspath(__file__))
US_ZIPCODE_DATA_PATH = os.path.join(
basepath, "tztrout/data/us_zipcode_data.json"
)
def _get_latest_us_zipcode_data():
"""
Download the latest Geonames.org US Zipcode data zip file
and extract the US zipcode information from a txt file located
inside the downloaded zip.
"""
url = urllib.request.urlopen(US_ZIPCODE_DATA_DOWNLOAD_URL)
with ZipFile(BytesIO(url.read())) as us_zipcode_zip_file:
return us_zipcode_zip_file.open(US_ZIPCODE_TXT_FILE_NAME).readlines()
def generate_us_zipcode_data(skip_download=False):
"""
Generate a list of US zipcode data from the txt file in the latest
Geonames.org database and save it as a JSON file.
Pass skip_download=True to use the already-committed us_zipcode_data.json instead of fetching
fresh data from Geonames.org.
"""
if skip_download:
print(
"Skipping Geonames download, using existing us_zipcode_data.json"
)
return
print("Downloading Geonames data...")
us_zipcode_data = []
us_zipcode_txt_file_data = _get_latest_us_zipcode_data()
for line in us_zipcode_txt_file_data:
zip = line.decode().split("\t")
# The order of fields is documented in the readme file of the
# geonames download.
us_zipcode_data.append(
{
"zip": zip[1],
"city": zip[2],
"state": zip[4],
"latitude": float(zip[9]),
"longitude": float(zip[10]),
}
)
print(f"There are {len(us_zipcode_data)} zipcodes in the US")
with open(US_ZIPCODE_DATA_PATH, "w") as f:
json.dump(
us_zipcode_data,
f,
indent=2,
sort_keys=True,
separators=(",", ": "),
)
if __name__ == "__main__":
generate_us_zipcode_data()