From a3fa0f9bd95fdf1f53ea76ae2b576ab7f4f4a7ea Mon Sep 17 00:00:00 2001 From: Finn-Thorben Sell Date: Tue, 26 May 2020 14:48:18 +0200 Subject: [PATCH 1/5] restructure to allow for sql import script --- Dockerfile | 4 +- Pipfile | 3 + Pipfile.lock | 131 +++++++++++++++++- main.py | 44 ++++++ osm_tile_data_extract/__init__.py | 0 osm_tile_data_extract/api_client.py | 0 .../generate_extracts.py | 45 +----- osm_tile_data_extract/import_extracts.py | 89 ++++++++++++ osm_tile_data_extract/util.py | 38 +++++ 9 files changed, 309 insertions(+), 45 deletions(-) create mode 100755 main.py create mode 100644 osm_tile_data_extract/__init__.py create mode 100644 osm_tile_data_extract/api_client.py rename generate_extracts.py => osm_tile_data_extract/generate_extracts.py (77%) mode change 100755 => 100644 create mode 100644 osm_tile_data_extract/import_extracts.py create mode 100644 osm_tile_data_extract/util.py diff --git a/Dockerfile b/Dockerfile index dc7e7bc..0ddfe05 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,9 +8,9 @@ RUN apt install -y --no-install-recommends pipenv osmctools rsync ADD Pipfile Pipfile.lock /app/src/ WORKDIR /app/src RUN pipenv install --system --deploy --ignore-pipfile -ADD generate_extracts.py /app/src/ +ADD osm_tile_data_extract /app/ # add image metadata VOLUME /app/tmp VOLUME /app/out -ENTRYPOINT ["/app/src/generate_extracts.py", "-w", "/app/tmp", "-o", "/app/out"] +ENTRYPOINT ["/app/src/main.py", "-w", "/app/tmp", "-o", "/app/out"] diff --git a/Pipfile b/Pipfile index 043801f..11faeab 100644 --- a/Pipfile +++ b/Pipfile @@ -7,3 +7,6 @@ verify_ssl = true [packages] mercantile = "*" +swaggerpy = "*" +pyswagger = "*" +psycopg2-binary = "*" diff --git a/Pipfile.lock b/Pipfile.lock index 91729fa..4106955 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "7cffd18d7d3f70b619ce481dae52c7120f2d6c1d26694d2d2cfb74dc8587e05e" + "sha256": "dd3b7b42f8fa88a7fd158d61950ec30f50876797dc9f072a9591f34e9afd47b4" }, "pipfile-spec": 6, "requires": {}, @@ -14,6 +14,20 @@ ] }, "default": { + "certifi": { + "hashes": [ + "sha256:1d987a998c75633c40847cc966fcf5904906c920a7f17ef374f5aa4282abd304", + "sha256:51fcb31174be6e6664c5f69e3e1691a2d72a1a12e90f872cbdb1567eb47b6519" + ], + "version": "==2020.4.5.1" + }, + "chardet": { + "hashes": [ + "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae", + "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691" + ], + "version": "==3.0.4" + }, "click": { "hashes": [ "sha256:d2b5255c7c6349bc1bd1e59e08cd12acbbd63ce649f2588755783aa94dfb6b1a", @@ -21,6 +35,13 @@ ], "version": "==7.1.2" }, + "idna": { + "hashes": [ + "sha256:7588d1c14ae4c77d74036e8c22ff447b26d0fde8f007354fd48a7814db15b7cb", + "sha256:a068a21ceac8a4d63dbfd964670474107f541babbd2250d61922f029858365fa" + ], + "version": "==2.9" + }, "mercantile": { "hashes": [ "sha256:9d773dd96d68350c8e3871099a39c0df7d0d9938158478aa1c1a9bcce773122d", @@ -28,6 +49,114 @@ ], "index": "pypi", "version": "==1.1.4" + }, + "psycopg2-binary": { + "hashes": [ + "sha256:008da3ab51adc70a5f1cfbbe5db3a22607ab030eb44bcecf517ad11a0c2b3cac", + "sha256:07cf82c870ec2d2ce94d18e70c13323c89f2f2a2628cbf1feee700630be2519a", + "sha256:08507efbe532029adee21b8d4c999170a83760d38249936038bd0602327029b5", + "sha256:107d9be3b614e52a192719c6bf32e8813030020ea1d1215daa86ded9a24d8b04", + "sha256:17a0ea0b0eabf07035e5e0d520dabc7950aeb15a17c6d36128ba99b2721b25b1", + "sha256:3286541b9d85a340ee4ed42732d15fc1bb441dc500c97243a768154ab8505bb5", + "sha256:3939cf75fc89c5e9ed836e228c4a63604dff95ad19aed2bbf71d5d04c15ed5ce", + "sha256:40abc319f7f26c042a11658bf3dd3b0b3bceccf883ec1c565d5c909a90204434", + "sha256:51f7823f1b087d2020d8e8c9e6687473d3d239ba9afc162d9b2ab6e80b53f9f9", + "sha256:6bb2dd006a46a4a4ce95201f836194eb6a1e863f69ee5bab506673e0ca767057", + "sha256:702f09d8f77dc4794651f650828791af82f7c2efd8c91ae79e3d9fe4bb7d4c98", + "sha256:7036ccf715925251fac969f4da9ad37e4b7e211b1e920860148a10c0de963522", + "sha256:7b832d76cc65c092abd9505cc670c4e3421fd136fb6ea5b94efbe4c146572505", + "sha256:8f74e631b67482d504d7e9cf364071fc5d54c28e79a093ff402d5f8f81e23bfa", + "sha256:930315ac53dc65cbf52ab6b6d27422611f5fb461d763c531db229c7e1af6c0b3", + "sha256:96d3038f5bd061401996614f65d27a4ecb62d843eb4f48e212e6d129171a721f", + "sha256:a20299ee0ea2f9cca494396ac472d6e636745652a64a418b39522c120fd0a0a4", + "sha256:a34826d6465c2e2bbe9d0605f944f19d2480589f89863ed5f091943be27c9de4", + "sha256:a69970ee896e21db4c57e398646af9edc71c003bc52a3cc77fb150240fefd266", + "sha256:b9a8b391c2b0321e0cd7ec6b4cfcc3dd6349347bd1207d48bcb752aa6c553a66", + "sha256:ba13346ff6d3eb2dca0b6fa0d8a9d999eff3dcd9b55f3a890f12b0b6362b2b38", + "sha256:bb0608694a91db1e230b4a314e8ed00ad07ed0c518f9a69b83af2717e31291a3", + "sha256:c8830b7d5f16fd79d39b21e3d94f247219036b29b30c8270314c46bf8b732389", + "sha256:cac918cd7c4c498a60f5d2a61d4f0a6091c2c9490d81bc805c963444032d0dab", + "sha256:cc30cb900f42c8a246e2cb76539d9726f407330bc244ca7729c41a44e8d807fb", + "sha256:ccdc6a87f32b491129ada4b87a43b1895cf2c20fdb7f98ad979647506ffc41b6", + "sha256:d1a8b01f6a964fec702d6b6dac1f91f2b9f9fe41b310cbb16c7ef1fac82df06d", + "sha256:e004db88e5a75e5fdab1620fb9f90c9598c2a195a594225ac4ed2a6f1c23e162", + "sha256:eb2f43ae3037f1ef5e19339c41cf56947021ac892f668765cd65f8ab9814192e", + "sha256:fa466306fcf6b39b8a61d003123d442b23707d635a5cb05ac4e1b62cc79105cd" + ], + "index": "pypi", + "version": "==2.8.5" + }, + "pyaml": { + "hashes": [ + "sha256:29a5c2a68660a799103d6949167bd6c7953d031449d08802386372de1db6ad71", + "sha256:67081749a82b72c45e5f7f812ee3a14a03b3f5c25ff36ec3b290514f8c4c4b99" + ], + "version": "==20.4.0" + }, + "pyswagger": { + "hashes": [ + "sha256:1c0c64f20d8d0baa735cd2b36cf8f62abdf856af1d6b214a236c994f0d325017", + "sha256:6bde5b9aeebf0b032556aecd898e47264e84eefecd6249a0dc0dd826fd0640dd" + ], + "index": "pypi", + "version": "==0.8.39" + }, + "pyyaml": { + "hashes": [ + "sha256:06a0d7ba600ce0b2d2fe2e78453a470b5a6e000a985dd4a4e54e436cc36b0e97", + "sha256:240097ff019d7c70a4922b6869d8a86407758333f02203e0fc6ff79c5dcede76", + "sha256:4f4b913ca1a7319b33cfb1369e91e50354d6f07a135f3b901aca02aa95940bd2", + "sha256:69f00dca373f240f842b2931fb2c7e14ddbacd1397d57157a9b005a6a9942648", + "sha256:73f099454b799e05e5ab51423c7bcf361c58d3206fa7b0d555426b1f4d9a3eaf", + "sha256:74809a57b329d6cc0fdccee6318f44b9b8649961fa73144a98735b0aaf029f1f", + "sha256:7739fc0fa8205b3ee8808aea45e968bc90082c10aef6ea95e855e10abf4a37b2", + "sha256:95f71d2af0ff4227885f7a6605c37fd53d3a106fcab511b8860ecca9fcf400ee", + "sha256:b8eac752c5e14d3eca0e6dd9199cd627518cb5ec06add0de9d32baeee6fe645d", + "sha256:cc8955cfbfc7a115fa81d85284ee61147059a753344bc51098f3ccd69b0d7e0c", + "sha256:d13155f591e6fcc1ec3b30685d50bf0711574e2c0dfffd7644babf8b5102ca1a" + ], + "version": "==5.3.1" + }, + "requests": { + "hashes": [ + "sha256:43999036bfa82904b6af1d99e4882b560e5e2c68e5c4b0aa03b655f3d7d73fee", + "sha256:b3f43d496c6daba4493e7c431722aeb7dbc6288f52a6e04e7b6023b0247817e6" + ], + "version": "==2.23.0" + }, + "six": { + "hashes": [ + "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259", + "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced" + ], + "version": "==1.15.0" + }, + "swaggerpy": { + "hashes": [ + "sha256:fa54bc2b17eb9ed181bc0b5321b180839f14ce6c86792450d92e9fcdb1acad1f" + ], + "index": "pypi", + "version": "==0.2.1" + }, + "urllib3": { + "hashes": [ + "sha256:3018294ebefce6572a474f0604c2021e33b3fd8006ecd11d62107a5d2a963527", + "sha256:88206b0eb87e6d677d424843ac5209e3fb9d0190d0ee169599165ec25e9d9115" + ], + "version": "==1.25.9" + }, + "validate-email": { + "hashes": [ + "sha256:784719dc5f780be319cdd185dc85dd93afebdb6ebb943811bc4c7c5f9c72aeaf" + ], + "version": "==1.3" + }, + "websocket-client": { + "hashes": [ + "sha256:0fc45c961324d79c781bab301359d5a1b00b13ad1b10415a4780229ef71a5549", + "sha256:d735b91d6d1692a6a181f2a8c9e0238e5f6373356f561bb9dc4c7af36f452010" + ], + "version": "==0.57.0" } }, "develop": {} diff --git a/main.py b/main.py new file mode 100755 index 0000000..e024163 --- /dev/null +++ b/main.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python3 + +import argparse +import os + +from pathlib import Path +from osm_tile_data_extract.generate_extracts import Program as GenerateExtracts +from osm_tile_data_extract.import_extracts import Program as ImportExtracts + + +def parse_args() -> argparse.Namespace: + def directory_type(raw: str): + p = Path(raw) + p.mkdir(exist_ok=True) + if not p.is_dir(): + raise argparse.ArgumentTypeError(f'Path {raw} is not a directory') + return p + + parser = argparse.ArgumentParser('osm-tile-data-extract') + parser.add_argument('-w', '--working-dir', dest='working_dir', type=directory_type, + default=os.path.join(os.path.dirname(__file__), 'tmp'), + help='Working directory in which intermediate and temporary files are stored') + parser.add_argument('-o', '--output-dir', dest='output_dir', type=directory_type, + default=os.path.join(os.path.dirname(__file__), 'out')) + + sub_parsers = parser.add_subparsers(dest='command') + GenerateExtracts.add_args(sub_parsers.add_parser('generate-extracts', + description='Extract similarly sized files from the latest' + 'OpenStreetMap planet dump')) + ImportExtracts.add_args(sub_parsers.add_parser('import-extracts', + description='Import one pbf extract into postgresql and afterwards' + 'dump the database')) + + return parser.parse_args() + + +if __name__ == '__main__': + args = parse_args() + if args.command == 'generate-extracts': + program = GenerateExtracts(args) + elif args.command == 'import-extracts': + program = ImportExtracts(args) + + program.run() diff --git a/osm_tile_data_extract/__init__.py b/osm_tile_data_extract/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/osm_tile_data_extract/api_client.py b/osm_tile_data_extract/api_client.py new file mode 100644 index 0000000..e69de29 diff --git a/generate_extracts.py b/osm_tile_data_extract/generate_extracts.py old mode 100755 new mode 100644 similarity index 77% rename from generate_extracts.py rename to osm_tile_data_extract/generate_extracts.py index 2d52d20..72e8873 --- a/generate_extracts.py +++ b/osm_tile_data_extract/generate_extracts.py @@ -1,6 +1,4 @@ -#! /usr/bin/env python3 import os -import sys import argparse import subprocess import time @@ -9,55 +7,23 @@ from multiprocessing import Lock from concurrent.futures import ThreadPoolExecutor, Future from pathlib import Path - - -class Colors: - HEADER = '\033[95m' - OKBLUE = '\033[94m' - OKGREEN = '\033[92m' - WARNING = '\033[93m' - FAIL = '\033[91m' - ENDC = '\033[0m' - BOLD = '\033[1m' - UNDERLINE = '\033[4m' - - -def print_error(m: str): - print(f'{Colors.FAIL}{m}{Colors.ENDC}', file=sys.stderr) +from .util import * class Program: @staticmethod - def parse_args() -> argparse.Namespace: - def directory_type(raw: str): - p = Path(raw) - p.mkdir(exist_ok=True) - if not p.is_dir(): - raise argparse.ArgumentTypeError(f'Path {raw} is not a directory') - return p - - parser = argparse.ArgumentParser('generate_extracts', - description='Extract similarly sized files from the latest OpenStreetMap ' - 'Planet dump.') + def add_args(parser: argparse.ArgumentParser): parser.add_argument('-p', '--planet-dump', dest='planet_dump', default='https://planet.openstreetmap.org/pbf/planet-latest.osm.pbf', help='Url of the source pbf file') - parser.add_argument('-w', '--working-dir', dest='working_dir', type=directory_type, - default=os.path.join(os.path.dirname(__file__), 'tmp'), - help='Working directory to which the planet dump gets downloaded and in which intermediate ' - 'split files are stored.') - parser.add_argument('-o', '--output-dir', dest='output_dir', type=directory_type, - default=os.path.join(os.path.dirname(__file__), 'out')) parser.add_argument('-s', '--target-size', dest='target_size', default=1.5 * 10 ** 9, type=int, help='Target files will not be larger than this size in bytes') parser.add_argument('-z', '--max-zoom', dest='max_zoom', default=9, type=int, help='Maximum zoom level above which no further splitting will be performed') parser.add_argument('--processes', default=(max(1, os.cpu_count() - 2)), type=int, help='How many concurrent processes to use') - return parser.parse_args() - def __init__(self): - args = self.parse_args() + def __init__(self, args: argparse.Namespace): self.working_dir = args.working_dir self.out_dir = args.output_dir self.target_size = args.target_size @@ -161,8 +127,3 @@ def extract(self, source: mercantile.Tile): with self.lock_running_futures: self.running_futures += 1 future.add_done_callback(lambda result: self._on_future_done(result)) - - -if __name__ == '__main__': - p = Program() - p.run() diff --git a/osm_tile_data_extract/import_extracts.py b/osm_tile_data_extract/import_extracts.py new file mode 100644 index 0000000..a493d24 --- /dev/null +++ b/osm_tile_data_extract/import_extracts.py @@ -0,0 +1,89 @@ +import argparse +import os +import subprocess +import time +from typing import Union + +import psycopg2 +from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT +import mercantile +from osm_tile_data_extract.util import * + + +class Program: + @staticmethod + def add_args(parser: argparse.ArgumentParser): + def auth_type(raw: str) -> list: + if ':' not in raw or len(raw.split(':')) != 2: + raise argparse.ArgumentTypeError('Authentication has invalid format') + return raw.split(':') + + parser.add_argument('--mapping-url', dest='mapping_url', type=str, required=True, + help='Base URL under which a tileserver-mapping server is reachable') + parser.add_argument('--mapping-auth', dest='mapping_auth', type=auth_type, required=True, + help=': combination used to authenticate at the tileserver-mapping') + parser.add_argument('-x', type=int, required=True, + help='X coordinate of tile to import') + parser.add_argument('-y', type=int, required=True, + help='y coordinate of tile to import') + parser.add_argument('-z', type=int, required=True, + help='z coordinate of tile to import') + + db_group = parser.add_argument_group(title='PostgreSQL connection parameters', + description='The configured user must be a superuser to create the needed' + 'PostgreSQL extensions (postgis and hstore).') + db_group.add_argument('--db-host', type=str, default='localhost') + db_group.add_argument('--db-port', type=int, default=5432) + db_group.add_argument('--db-user', type=str, default='osm_tile_data_extract') + db_group.add_argument('--db-password', type=str, default='osm_tile_data_extract') + + def __init__(self, args: argparse.Namespace): + self.api = ApiClient(args.mapping_url, args.mapping_auth) + self.tile = mercantile.Tile(args.x, args.y, args.z) + + self.working_dir = args.working_dir + self.out_dir = args.output_dir + + self.db_host = args.db_host + self.db_port = args.db_port + self.db_user = args.db_user + self.db_password = args.db_password + self.db_conn = None + + def run(self): + print('Connecting to PostgreSQL server') + self.db_conn = psycopg2.connect(f'host={self.db_host} port={self.db_port} dbname=postgres ' + f'user={self.db_user} password={self.db_password}') + self.db_conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) + + self._download_pbf() + self._create_postgres_db() + self._import_into_postgres() + + def _download_pbf(self): + print(f'Downloading pbf of {self.tile}') + dump = self.api.get_planet_dump(self.tile) + subprocess.run(['wget', '-N', '-nv', '--show-progress', dump['file']], check=True, cwd=self.working_dir) + + def _create_postgres_db(self): + database = f'osm_tile_data_extract_{self.tile.z}_{self.tile.x}_{self.tile.y}' + # create new database only if necessary + with self.db_conn.cursor() as cursor: + cursor.execute('SELECT datname FROM pg_database;') + if (database,) not in cursor.fetchall(): + print(f'Creating PostgreSQL database {database}') + cursor.execute(f'CREATE DATABASE {database} OWNER %s', [self.db_user]) + + # reconnect to the correct database + self.db_conn.close() + self.db_conn = psycopg2.connect(f'host={self.db_host} port={self.db_port} dbname={database} ' + f'user={self.db_user} password={self.db_password}') + + # enable needed postgres extensions + print('Enabling needed PostgreSQL extensions') + with self.db_conn.cursor() as cursor: + cursor.execute('CREATE EXTENSION postgis;') + cursor.execute('CREATE EXTENSION hstore;') + + def _import_into_postgres(self): + pass diff --git a/osm_tile_data_extract/util.py b/osm_tile_data_extract/util.py new file mode 100644 index 0000000..3bf3f00 --- /dev/null +++ b/osm_tile_data_extract/util.py @@ -0,0 +1,38 @@ +import sys +import mercantile +from urllib.parse import urlparse + +from pyswagger import App, Security +from pyswagger.contrib.client.requests import Client + + +class Colors: + HEADER = '\033[95m' + OKBLUE = '\033[94m' + OKGREEN = '\033[92m' + WARNING = '\033[93m' + FAIL = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + + +def print_error(m: str): + print(f'{Colors.FAIL}{m}{Colors.ENDC}', file=sys.stderr) + + +class ApiClient: + def __init__(self, mapping_url: str, auth: list): + self.app = App._create_(f'{mapping_url}/schema.json') + auth = Security(self.app) + self.client = Client(auth) + + def get_planet_dump(self, tile: mercantile.Tile) -> dict: + operation = self.app.op['api_v1_planet_dumps_list'] + response = self.client.request(operation()).data + matching_dumps = [i for i in response if i['x'] == tile.x and i['y'] == tile.y and i['z'] == tile.z] + + if len(matching_dumps) == 0: + raise LookupError(f'Planet dump for {tile} does not exist') + else: + return matching_dumps[0] From 0ee5732deba2434decccbd607547a03b42e8e3c6 Mon Sep 17 00:00:00 2001 From: Finn-Thorben Sell Date: Thu, 28 May 2020 13:38:27 +0200 Subject: [PATCH 2/5] do import in docker database --- Dockerfile | 14 ++- Pipfile | 11 +- Pipfile.lock | 149 +++++++++++++++++------ osm_tile_data_extract/import_extracts.py | 74 ++++++----- 4 files changed, 167 insertions(+), 81 deletions(-) diff --git a/Dockerfile b/Dockerfile index 0ddfe05..9522ffb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,14 +1,24 @@ FROM docker.io/debian:buster-slim # install dependencies +ARG PG_VERSION=11 +ENV PG_VERSION=${PG_VERSION} RUN apt update -RUN apt install -y --no-install-recommends pipenv osmctools rsync +RUN apt install -y --no-install-recommends pipenv python3-setuptools rsync postgresql-${PG_VERSION} \ + osmctools osm2pgsql git libpq-dev gcc python3-dev +RUN apt install -y postgresql-${PG_VERSION}-postgis + +RUN pg_dropcluster --stop ${PG_VERSION} main # add sources ADD Pipfile Pipfile.lock /app/src/ WORKDIR /app/src RUN pipenv install --system --deploy --ignore-pipfile -ADD osm_tile_data_extract /app/ +RUN apt purge -y libpq-dev gcc python3-dev +RUN apt autoremove -y + +ADD main.py /app/src/ +ADD osm_tile_data_extract /app/src/osm_tile_data_extract # add image metadata VOLUME /app/tmp diff --git a/Pipfile b/Pipfile index 11faeab..7f02dcd 100644 --- a/Pipfile +++ b/Pipfile @@ -4,9 +4,18 @@ url = "https://pypi.org/simple" verify_ssl = true [dev-packages] +ipython = "*" [packages] mercantile = "*" swaggerpy = "*" pyswagger = "*" -psycopg2-binary = "*" +#psycopg2-binary = "*" + +[packages.tileserver] +git = "https://github.com/tilezen/tileserver.git" +ref = "v2.2.0" + +[packages.vector-datasource] +git = "https://github.com/mapzen/vector-datasource.git" +ref = "v1.8.0" diff --git a/Pipfile.lock b/Pipfile.lock index 4106955..c6e6e38 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "dd3b7b42f8fa88a7fd158d61950ec30f50876797dc9f072a9591f34e9afd47b4" + "sha256": "c5b2df15a2404ad773e5040c1463106962e9a25ffb37cae9b1d9f8570aa0721e" }, "pipfile-spec": 6, "requires": {}, @@ -50,42 +50,6 @@ "index": "pypi", "version": "==1.1.4" }, - "psycopg2-binary": { - "hashes": [ - "sha256:008da3ab51adc70a5f1cfbbe5db3a22607ab030eb44bcecf517ad11a0c2b3cac", - "sha256:07cf82c870ec2d2ce94d18e70c13323c89f2f2a2628cbf1feee700630be2519a", - "sha256:08507efbe532029adee21b8d4c999170a83760d38249936038bd0602327029b5", - "sha256:107d9be3b614e52a192719c6bf32e8813030020ea1d1215daa86ded9a24d8b04", - "sha256:17a0ea0b0eabf07035e5e0d520dabc7950aeb15a17c6d36128ba99b2721b25b1", - "sha256:3286541b9d85a340ee4ed42732d15fc1bb441dc500c97243a768154ab8505bb5", - "sha256:3939cf75fc89c5e9ed836e228c4a63604dff95ad19aed2bbf71d5d04c15ed5ce", - "sha256:40abc319f7f26c042a11658bf3dd3b0b3bceccf883ec1c565d5c909a90204434", - "sha256:51f7823f1b087d2020d8e8c9e6687473d3d239ba9afc162d9b2ab6e80b53f9f9", - "sha256:6bb2dd006a46a4a4ce95201f836194eb6a1e863f69ee5bab506673e0ca767057", - "sha256:702f09d8f77dc4794651f650828791af82f7c2efd8c91ae79e3d9fe4bb7d4c98", - "sha256:7036ccf715925251fac969f4da9ad37e4b7e211b1e920860148a10c0de963522", - "sha256:7b832d76cc65c092abd9505cc670c4e3421fd136fb6ea5b94efbe4c146572505", - "sha256:8f74e631b67482d504d7e9cf364071fc5d54c28e79a093ff402d5f8f81e23bfa", - "sha256:930315ac53dc65cbf52ab6b6d27422611f5fb461d763c531db229c7e1af6c0b3", - "sha256:96d3038f5bd061401996614f65d27a4ecb62d843eb4f48e212e6d129171a721f", - "sha256:a20299ee0ea2f9cca494396ac472d6e636745652a64a418b39522c120fd0a0a4", - "sha256:a34826d6465c2e2bbe9d0605f944f19d2480589f89863ed5f091943be27c9de4", - "sha256:a69970ee896e21db4c57e398646af9edc71c003bc52a3cc77fb150240fefd266", - "sha256:b9a8b391c2b0321e0cd7ec6b4cfcc3dd6349347bd1207d48bcb752aa6c553a66", - "sha256:ba13346ff6d3eb2dca0b6fa0d8a9d999eff3dcd9b55f3a890f12b0b6362b2b38", - "sha256:bb0608694a91db1e230b4a314e8ed00ad07ed0c518f9a69b83af2717e31291a3", - "sha256:c8830b7d5f16fd79d39b21e3d94f247219036b29b30c8270314c46bf8b732389", - "sha256:cac918cd7c4c498a60f5d2a61d4f0a6091c2c9490d81bc805c963444032d0dab", - "sha256:cc30cb900f42c8a246e2cb76539d9726f407330bc244ca7729c41a44e8d807fb", - "sha256:ccdc6a87f32b491129ada4b87a43b1895cf2c20fdb7f98ad979647506ffc41b6", - "sha256:d1a8b01f6a964fec702d6b6dac1f91f2b9f9fe41b310cbb16c7ef1fac82df06d", - "sha256:e004db88e5a75e5fdab1620fb9f90c9598c2a195a594225ac4ed2a6f1c23e162", - "sha256:eb2f43ae3037f1ef5e19339c41cf56947021ac892f668765cd65f8ab9814192e", - "sha256:fa466306fcf6b39b8a61d003123d442b23707d635a5cb05ac4e1b62cc79105cd" - ], - "index": "pypi", - "version": "==2.8.5" - }, "pyaml": { "hashes": [ "sha256:29a5c2a68660a799103d6949167bd6c7953d031449d08802386372de1db6ad71", @@ -138,6 +102,10 @@ "index": "pypi", "version": "==0.2.1" }, + "tileserver": { + "git": "https://github.com/tilezen/tileserver.git", + "ref": "v2.2.0" + }, "urllib3": { "hashes": [ "sha256:3018294ebefce6572a474f0604c2021e33b3fd8006ecd11d62107a5d2a963527", @@ -151,6 +119,10 @@ ], "version": "==1.3" }, + "vector-datasource": { + "git": "https://github.com/mapzen/vector-datasource.git", + "ref": "v1.8.0" + }, "websocket-client": { "hashes": [ "sha256:0fc45c961324d79c781bab301359d5a1b00b13ad1b10415a4780229ef71a5549", @@ -159,5 +131,106 @@ "version": "==0.57.0" } }, - "develop": {} + "develop": { + "backcall": { + "hashes": [ + "sha256:38ecd85be2c1e78f77fd91700c76e14667dc21e2713b63876c0eb901196e01e4", + "sha256:bbbf4b1e5cd2bdb08f915895b51081c041bac22394fdfcfdfbe9f14b77c08bf2" + ], + "version": "==0.1.0" + }, + "decorator": { + "hashes": [ + "sha256:41fa54c2a0cc4ba648be4fd43cff00aedf5b9465c9bf18d64325bc225f08f760", + "sha256:e3a62f0520172440ca0dcc823749319382e377f37f140a0b99ef45fecb84bfe7" + ], + "version": "==4.4.2" + }, + "ipython": { + "hashes": [ + "sha256:5b241b84bbf0eb085d43ae9d46adf38a13b45929ca7774a740990c2c242534bb", + "sha256:f0126781d0f959da852fb3089e170ed807388e986a8dd4e6ac44855845b0fb1c" + ], + "index": "pypi", + "version": "==7.14.0" + }, + "ipython-genutils": { + "hashes": [ + "sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8", + "sha256:eb2e116e75ecef9d4d228fdc66af54269afa26ab4463042e33785b887c628ba8" + ], + "version": "==0.2.0" + }, + "jedi": { + "hashes": [ + "sha256:cd60c93b71944d628ccac47df9a60fec53150de53d42dc10a7fc4b5ba6aae798", + "sha256:df40c97641cb943661d2db4c33c2e1ff75d491189423249e989bcea4464f3030" + ], + "version": "==0.17.0" + }, + "parso": { + "hashes": [ + "sha256:158c140fc04112dc45bca311633ae5033c2c2a7b732fa33d0955bad8152a8dd0", + "sha256:908e9fae2144a076d72ae4e25539143d40b8e3eafbaeae03c1bfe226f4cdf12c" + ], + "version": "==0.7.0" + }, + "pexpect": { + "hashes": [ + "sha256:0b48a55dcb3c05f3329815901ea4fc1537514d6ba867a152b581d69ae3710937", + "sha256:fc65a43959d153d0114afe13997d439c22823a27cefceb5ff35c2178c6784c0c" + ], + "markers": "sys_platform != 'win32'", + "version": "==4.8.0" + }, + "pickleshare": { + "hashes": [ + "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca", + "sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56" + ], + "version": "==0.7.5" + }, + "prompt-toolkit": { + "hashes": [ + "sha256:563d1a4140b63ff9dd587bda9557cffb2fe73650205ab6f4383092fb882e7dc8", + "sha256:df7e9e63aea609b1da3a65641ceaf5bc7d05e0a04de5bd45d05dbeffbabf9e04" + ], + "version": "==3.0.5" + }, + "ptyprocess": { + "hashes": [ + "sha256:923f299cc5ad920c68f2bc0bc98b75b9f838b93b599941a6b63ddbc2476394c0", + "sha256:d7cc528d76e76342423ca640335bd3633420dc1366f258cb31d05e865ef5ca1f" + ], + "version": "==0.6.0" + }, + "pygments": { + "hashes": [ + "sha256:647344a061c249a3b74e230c739f434d7ea4d8b1d5f3721bc0f3558049b38f44", + "sha256:ff7a40b4860b727ab48fad6360eb351cc1b33cbf9b15a0f689ca5353e9463324" + ], + "version": "==2.6.1" + }, + "six": { + "hashes": [ + "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259", + "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced" + ], + "version": "==1.15.0" + }, + "traitlets": { + "hashes": [ + "sha256:70b4c6a1d9019d7b4f6846832288f86998aa3b9207c6821f3578a6a6a467fe44", + "sha256:d023ee369ddd2763310e4c3eae1ff649689440d4ae59d7485eb4cfbbe3e359f7" + ], + "version": "==4.3.3" + }, + "wcwidth": { + "hashes": [ + "sha256:cafe2186b3c009a04067022ce1dcd79cb38d8d65ee4f4791b8888d6599d1bbe1", + "sha256:ee73862862a156bf77ff92b09034fc4825dd3af9cf81bc5b360668d425f3c5f1" + ], + "version": "==0.1.9" + } + } } diff --git a/osm_tile_data_extract/import_extracts.py b/osm_tile_data_extract/import_extracts.py index a493d24..58d5f0c 100644 --- a/osm_tile_data_extract/import_extracts.py +++ b/osm_tile_data_extract/import_extracts.py @@ -10,6 +10,9 @@ from osm_tile_data_extract.util import * +PG_VERSION = os.environ.get('PG_VERSION') + + class Program: @staticmethod def add_args(parser: argparse.ArgumentParser): @@ -29,61 +32,52 @@ def auth_type(raw: str) -> list: parser.add_argument('-z', type=int, required=True, help='z coordinate of tile to import') - db_group = parser.add_argument_group(title='PostgreSQL connection parameters', - description='The configured user must be a superuser to create the needed' - 'PostgreSQL extensions (postgis and hstore).') - db_group.add_argument('--db-host', type=str, default='localhost') - db_group.add_argument('--db-port', type=int, default=5432) - db_group.add_argument('--db-user', type=str, default='osm_tile_data_extract') - db_group.add_argument('--db-password', type=str, default='osm_tile_data_extract') - def __init__(self, args: argparse.Namespace): self.api = ApiClient(args.mapping_url, args.mapping_auth) self.tile = mercantile.Tile(args.x, args.y, args.z) - self.working_dir = args.working_dir - self.out_dir = args.output_dir - - self.db_host = args.db_host - self.db_port = args.db_port - self.db_user = args.db_user - self.db_password = args.db_password - self.db_conn = None + self.working_dir = os.path.abspath(args.working_dir) + self.out_dir = os.path.abspath(args.output_dir) + self.pbf_file_name = None # type: Union[None, str] + self.db_process = None # type: Union[None, subprocess.Popen] def run(self): - print('Connecting to PostgreSQL server') - self.db_conn = psycopg2.connect(f'host={self.db_host} port={self.db_port} dbname=postgres ' - f'user={self.db_user} password={self.db_password}') - self.db_conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) - self._download_pbf() self._create_postgres_db() - self._import_into_postgres() + #self._import_into_postgres() def _download_pbf(self): print(f'Downloading pbf of {self.tile}') dump = self.api.get_planet_dump(self.tile) subprocess.run(['wget', '-N', '-nv', '--show-progress', dump['file']], check=True, cwd=self.working_dir) + self.pbf_file_name = str(dump['file']).rsplit('/')[-1] def _create_postgres_db(self): - database = f'osm_tile_data_extract_{self.tile.z}_{self.tile.x}_{self.tile.y}' - # create new database only if necessary - with self.db_conn.cursor() as cursor: - cursor.execute('SELECT datname FROM pg_database;') - if (database,) not in cursor.fetchall(): - print(f'Creating PostgreSQL database {database}') - cursor.execute(f'CREATE DATABASE {database} OWNER %s', [self.db_user]) - - # reconnect to the correct database - self.db_conn.close() - self.db_conn = psycopg2.connect(f'host={self.db_host} port={self.db_port} dbname={database} ' - f'user={self.db_user} password={self.db_password}') + # create new postgresql cluster + db_dir = os.path.join(self.working_dir, 'pg_data') + subprocess.run(['rm', '-rf', db_dir]) + os.makedirs(db_dir) + subprocess.run(['pg_createcluster', PG_VERSION, 'main', '--start', '--datadir', db_dir], check=True) + db_port = subprocess.run(['pg_lsclusters', '--no-header'], check=True, text=True, stdout=subprocess.PIPE)\ + .stdout.split(' ')[2] - # enable needed postgres extensions - print('Enabling needed PostgreSQL extensions') - with self.db_conn.cursor() as cursor: - cursor.execute('CREATE EXTENSION postgis;') - cursor.execute('CREATE EXTENSION hstore;') + # create database + print('Creating PostgreSQL database with extensions') + subprocess.run(['su', 'postgres', '-c', 'echo "CREATE USER tile_data PASSWORD \'tile_data\';" | psql'], check=True, + stdout=subprocess.DEVNULL) + subprocess.run(['su', 'postgres', '-c', 'echo "CREATE DATABASE tile_data with OWNER tile_data;" | psql'], + check=True, stdout=subprocess.DEVNULL) + subprocess.run(['su', 'postgres', '-c', 'echo "CREATE EXTENSION postgis;" | psql -d tile_data'], + check=True, stdout=subprocess.DEVNULL) + subprocess.run(['su', 'postgres', '-c', 'echo "CREATE EXTENSION hstore;" | psql -d tile_data'], + check=True, stdout=subprocess.DEVNULL) def _import_into_postgres(self): - pass + subprocess.run([ + 'osm2pgsql', '--slim', '--drop', '--hstore-all', '-C 3000', '-S osm2pgsql.style', + '-d', self.db_dbname, + '-P', self.db_port, + '-U', self.db_user, + '--number-processes', max(1, os.cpu_count() - 2), + os.path.join(self.working_dir, self.pbf_file_name) + ], check=True, cwd='') From 9bd9afacc1306bc96b73aea58a48f7449222490d Mon Sep 17 00:00:00 2001 From: Finn-Thorben Sell Date: Thu, 28 May 2020 14:40:13 +0200 Subject: [PATCH 3/5] implement importing to postgresql --- Dockerfile | 22 ++++-- Pipfile | 12 +-- Pipfile.lock | 14 +--- osm_tile_data_extract/import_extracts.py | 99 +++++++++++++++++------- osm_tile_data_extract/util.py | 4 + 5 files changed, 98 insertions(+), 53 deletions(-) diff --git a/Dockerfile b/Dockerfile index 9522ffb..0a63c96 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,23 +4,29 @@ FROM docker.io/debian:buster-slim ARG PG_VERSION=11 ENV PG_VERSION=${PG_VERSION} RUN apt update -RUN apt install -y --no-install-recommends pipenv python3-setuptools rsync postgresql-${PG_VERSION} \ - osmctools osm2pgsql git libpq-dev gcc python3-dev +RUN apt install -y --no-install-recommends python3 python pipenv python3-setuptools python3-dev python-wheel \ + python-setuptools python-pip rsync postgresql-${PG_VERSION} osmctools osm2pgsql git libpq-dev gcc make unzip postgis RUN apt install -y postgresql-${PG_VERSION}-postgis RUN pg_dropcluster --stop ${PG_VERSION} main +# add dependency sources +RUN git clone -b v1.8.0 https://github.com/mapzen/vector-datasource.git /app/src/vector-datasource +RUN pip install -r /app/src/vector-datasource/requirements.txt +ENV PYTHONPATH=${PYTHONPATH}:/app/src/vector-datasource +RUN apt purge -y python-setuptools python-pip + # add sources -ADD Pipfile Pipfile.lock /app/src/ -WORKDIR /app/src -RUN pipenv install --system --deploy --ignore-pipfile +ADD Pipfile Pipfile.lock /app/src/osm_tile_data_extract/ +WORKDIR /app/src/osm_tile_data_extract +RUN pipenv install --system --deploy RUN apt purge -y libpq-dev gcc python3-dev RUN apt autoremove -y -ADD main.py /app/src/ -ADD osm_tile_data_extract /app/src/osm_tile_data_extract +ADD main.py /app/src/osm_tile_data_extract +ADD osm_tile_data_extract /app/src/osm_tile_data_extract/osm_tile_data_extract # add image metadata VOLUME /app/tmp VOLUME /app/out -ENTRYPOINT ["/app/src/main.py", "-w", "/app/tmp", "-o", "/app/out"] +ENTRYPOINT ["/app/src/osm_tile_data_extract/main.py", "-w", "/app/tmp", "-o", "/app/out"] diff --git a/Pipfile b/Pipfile index 7f02dcd..b48875a 100644 --- a/Pipfile +++ b/Pipfile @@ -1,3 +1,6 @@ +[requires] +python_version = "3" + [[source]] name = "pypi" url = "https://pypi.org/simple" @@ -10,12 +13,3 @@ ipython = "*" mercantile = "*" swaggerpy = "*" pyswagger = "*" -#psycopg2-binary = "*" - -[packages.tileserver] -git = "https://github.com/tilezen/tileserver.git" -ref = "v2.2.0" - -[packages.vector-datasource] -git = "https://github.com/mapzen/vector-datasource.git" -ref = "v1.8.0" diff --git a/Pipfile.lock b/Pipfile.lock index c6e6e38..bd54615 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,10 +1,12 @@ { "_meta": { "hash": { - "sha256": "c5b2df15a2404ad773e5040c1463106962e9a25ffb37cae9b1d9f8570aa0721e" + "sha256": "2a608c5ef27ce533242f3f379b6e0eed5feb5fbf5e5f17aadb396744c0ca7e0d" }, "pipfile-spec": 6, - "requires": {}, + "requires": { + "python_version": "3" + }, "sources": [ { "name": "pypi", @@ -102,10 +104,6 @@ "index": "pypi", "version": "==0.2.1" }, - "tileserver": { - "git": "https://github.com/tilezen/tileserver.git", - "ref": "v2.2.0" - }, "urllib3": { "hashes": [ "sha256:3018294ebefce6572a474f0604c2021e33b3fd8006ecd11d62107a5d2a963527", @@ -119,10 +117,6 @@ ], "version": "==1.3" }, - "vector-datasource": { - "git": "https://github.com/mapzen/vector-datasource.git", - "ref": "v1.8.0" - }, "websocket-client": { "hashes": [ "sha256:0fc45c961324d79c781bab301359d5a1b00b13ad1b10415a4780229ef71a5549", diff --git a/osm_tile_data_extract/import_extracts.py b/osm_tile_data_extract/import_extracts.py index 58d5f0c..f204465 100644 --- a/osm_tile_data_extract/import_extracts.py +++ b/osm_tile_data_extract/import_extracts.py @@ -1,15 +1,12 @@ import argparse import os import subprocess -import time +import importlib from typing import Union -import psycopg2 -from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT import mercantile from osm_tile_data_extract.util import * - PG_VERSION = os.environ.get('PG_VERSION') @@ -38,46 +35,96 @@ def __init__(self, args: argparse.Namespace): self.working_dir = os.path.abspath(args.working_dir) self.out_dir = os.path.abspath(args.output_dir) - self.pbf_file_name = None # type: Union[None, str] - self.db_process = None # type: Union[None, subprocess.Popen] + self.pbf_file_name = None # type: Union[None, str] + + @property + def db_dbname(self) -> str: + return f'tile_data_{self.tile.x}_{self.tile.y}_{self.tile.z}' + + @property + def db_port(self) -> str: + return subprocess.run(['pg_lsclusters', '--no-header'], check=True, text=True, stdout=subprocess.PIPE) \ + .stdout.split(' ')[2] def run(self): self._download_pbf() self._create_postgres_db() - #self._import_into_postgres() + self._download_shapefiles() + self._import_into_postgres() + self._post_import() + self._dump_postgres_db() + subprocess.run(['pg_ctlcluster', PG_VERSION, 'main', 'stop'], check=True) + self._upload_dump() def _download_pbf(self): - print(f'Downloading pbf of {self.tile}') + print_stage(f'Downloading pbf of {self.tile}') dump = self.api.get_planet_dump(self.tile) - subprocess.run(['wget', '-N', '-nv', '--show-progress', dump['file']], check=True, cwd=self.working_dir) + subprocess.run(['wget', '-N', dump['file']], check=True, cwd=self.working_dir) self.pbf_file_name = str(dump['file']).rsplit('/')[-1] def _create_postgres_db(self): # create new postgresql cluster + print_stage('Creating PostgreSQL cluster') db_dir = os.path.join(self.working_dir, 'pg_data') - subprocess.run(['rm', '-rf', db_dir]) + subprocess.run(['rm', '-rf', db_dir], check=True) os.makedirs(db_dir) - subprocess.run(['pg_createcluster', PG_VERSION, 'main', '--start', '--datadir', db_dir], check=True) - db_port = subprocess.run(['pg_lsclusters', '--no-header'], check=True, text=True, stdout=subprocess.PIPE)\ - .stdout.split(' ')[2] + subprocess.run(['pg_createcluster', PG_VERSION, 'main', '--start', '--datadir', db_dir], check=True, + stdout=subprocess.DEVNULL) # create database - print('Creating PostgreSQL database with extensions') - subprocess.run(['su', 'postgres', '-c', 'echo "CREATE USER tile_data PASSWORD \'tile_data\';" | psql'], check=True, - stdout=subprocess.DEVNULL) - subprocess.run(['su', 'postgres', '-c', 'echo "CREATE DATABASE tile_data with OWNER tile_data;" | psql'], + print_stage('Creating PostgreSQL database with extensions') + subprocess.run(['su', 'postgres', '-c', 'echo "CREATE USER tile_data PASSWORD \'tile_data\';" | psql'], check=True, stdout=subprocess.DEVNULL) - subprocess.run(['su', 'postgres', '-c', 'echo "CREATE EXTENSION postgis;" | psql -d tile_data'], + subprocess.run( + ['su', 'postgres', '-c', f'echo "CREATE DATABASE {self.db_dbname} OWNER tile_data;" | psql'], + check=True, stdout=subprocess.DEVNULL) + subprocess.run(['su', 'postgres', '-c', f'echo "CREATE EXTENSION postgis;" | psql -d {self.db_dbname}'], check=True, stdout=subprocess.DEVNULL) - subprocess.run(['su', 'postgres', '-c', 'echo "CREATE EXTENSION hstore;" | psql -d tile_data'], + subprocess.run(['su', 'postgres', '-c', f'echo "CREATE EXTENSION hstore;" | psql -d {self.db_dbname}'], check=True, stdout=subprocess.DEVNULL) + def _download_shapefiles(self): + print_stage('Downloading shapefiles') + bootstrap_dir = os.path.join(self.working_dir, 'vector-bootstrap') + os.makedirs(bootstrap_dir, exist_ok=True) + subprocess.run(['python', './bootstrap.py'], cwd='/app/src/vector-datasource/data', check=True) + subprocess.run(['make', '-f', '/app/src/vector-datasource/data/Makefile-import-data'], + check=True, cwd=bootstrap_dir) + def _import_into_postgres(self): + print_stage("Importing data with osm2pgsql") subprocess.run([ - 'osm2pgsql', '--slim', '--drop', '--hstore-all', '-C 3000', '-S osm2pgsql.style', - '-d', self.db_dbname, - '-P', self.db_port, - '-U', self.db_user, - '--number-processes', max(1, os.cpu_count() - 2), - os.path.join(self.working_dir, self.pbf_file_name) - ], check=True, cwd='') + 'su', 'postgres', '-c', + f'osm2pgsql --slim --hstore-all -C 3000 ' + f'-S /app/src/vector-datasource/osm2pgsql.style ' + f'-d {self.db_dbname} ' + f'-P {self.db_port} ' + f'-U postgres ' + f'--number-processes {max(1, os.cpu_count() - 2)} ' + f'{os.path.join(self.working_dir, self.pbf_file_name)}' + ], check=True) + + def _post_import(self): + print_stage('Importing shapefiles') + subprocess.run(['su', 'postgres', '-c', f'./import-shapefiles.sh | psql -d {self.db_dbname}'], + check=True, cwd=os.path.join(self.working_dir, 'vector-bootstrap')) + + print_stage('Processing imported data') + subprocess.run(['su', 'postgres', '-c', f'./perform-sql-updates.sh -d {self.db_dbname}'], + check=True, cwd='/app/src/vector-datasource/data') + + def _dump_postgres_db(self): + print_stage('Dumping PostgreSQL database') + file_path = os.path.join(self.working_dir, 'db.pg_dump') + with open(file_path, 'wb') as f: + subprocess.run(['su', 'postgres', '-c', + f'pg_dump -p {self.db_port} -d {self.db_dbname} --format custom'], + check=True, cwd=self.out_dir, stdout=f) + subprocess.run(['chown', '0:0', file_path]) + + def _upload_dump(self): + print_stage('Uploading PostgreSQL dump to tileserver-mapping') + file_path = os.path.join(self.working_dir, 'db.pg_dump') + + +# /app/src/osm_tile_data_extract/main.py -w /app/tmp -o /app/out import-extracts --mapping-url http://localhost:8000 --mapping-auth ftsell:foobar123 -z 2 -x 0 -y 3 diff --git a/osm_tile_data_extract/util.py b/osm_tile_data_extract/util.py index 3bf3f00..773536b 100644 --- a/osm_tile_data_extract/util.py +++ b/osm_tile_data_extract/util.py @@ -17,6 +17,10 @@ class Colors: UNDERLINE = '\033[4m' +def print_stage(m: str): + print(f'{Colors.OKBLUE}-----> {m}{Colors.ENDC}') + + def print_error(m: str): print(f'{Colors.FAIL}{m}{Colors.ENDC}', file=sys.stderr) From fd7fe776012bb87aabd660d982510d2ea45e9484 Mon Sep 17 00:00:00 2001 From: Finn-Thorben Sell Date: Thu, 28 May 2020 23:10:58 +0200 Subject: [PATCH 4/5] implement pg_dump upload --- Dockerfile | 3 ++- osm_tile_data_extract/import_extracts.py | 7 +++-- osm_tile_data_extract/util.py | 34 +++++++++++++++++++++++- 3 files changed, 38 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index 0a63c96..385214c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,7 +5,8 @@ ARG PG_VERSION=11 ENV PG_VERSION=${PG_VERSION} RUN apt update RUN apt install -y --no-install-recommends python3 python pipenv python3-setuptools python3-dev python-wheel \ - python-setuptools python-pip rsync postgresql-${PG_VERSION} osmctools osm2pgsql git libpq-dev gcc make unzip postgis + python-setuptools python-pip rsync postgresql-${PG_VERSION} osmctools osm2pgsql git libpq-dev gcc make unzip \ + postgis curl RUN apt install -y postgresql-${PG_VERSION}-postgis RUN pg_dropcluster --stop ${PG_VERSION} main diff --git a/osm_tile_data_extract/import_extracts.py b/osm_tile_data_extract/import_extracts.py index f204465..b1370d8 100644 --- a/osm_tile_data_extract/import_extracts.py +++ b/osm_tile_data_extract/import_extracts.py @@ -30,7 +30,7 @@ def auth_type(raw: str) -> list: help='z coordinate of tile to import') def __init__(self, args: argparse.Namespace): - self.api = ApiClient(args.mapping_url, args.mapping_auth) + self.api = ApiClient(args.mapping_url, args.mapping_auth[0], args.mapping_auth[1]) self.tile = mercantile.Tile(args.x, args.y, args.z) self.working_dir = os.path.abspath(args.working_dir) @@ -125,6 +125,5 @@ def _dump_postgres_db(self): def _upload_dump(self): print_stage('Uploading PostgreSQL dump to tileserver-mapping') file_path = os.path.join(self.working_dir, 'db.pg_dump') - - -# /app/src/osm_tile_data_extract/main.py -w /app/tmp -o /app/out import-extracts --mapping-url http://localhost:8000 --mapping-auth ftsell:foobar123 -z 2 -x 0 -y 3 + self.api.upload_sql_dump(self.tile, file_path) + subprocess.run(['rsync', file_path, self.out_dir], check=True) diff --git a/osm_tile_data_extract/util.py b/osm_tile_data_extract/util.py index 773536b..f3b90d7 100644 --- a/osm_tile_data_extract/util.py +++ b/osm_tile_data_extract/util.py @@ -1,5 +1,6 @@ import sys import mercantile +import subprocess from urllib.parse import urlparse from pyswagger import App, Security @@ -26,9 +27,13 @@ def print_error(m: str): class ApiClient: - def __init__(self, mapping_url: str, auth: list): + def __init__(self, mapping_url: str, username: str, password: str): + self.username = username + self.password = password + self.app = App._create_(f'{mapping_url}/schema.json') auth = Security(self.app) + auth.update_with('Basic', (username, password)) self.client = Client(auth) def get_planet_dump(self, tile: mercantile.Tile) -> dict: @@ -40,3 +45,30 @@ def get_planet_dump(self, tile: mercantile.Tile) -> dict: raise LookupError(f'Planet dump for {tile} does not exist') else: return matching_dumps[0] + + def upload_sql_dump(self, tile: mercantile.Tile, dump_path: str): + # check if an sql dump object already exists + operation = self.app.op['api_v1_postgresql_dumps_list'] + response = self.client.request(operation()).data + matching_dumps = [i for i in response if i['x'] == tile.x and i['y'] == tile.y and i['z'] == tile.z] + + if len(matching_dumps) == 0: + operation = self.app.op['api_v1_postgresql_dumps_create'] + subprocess.run(['curl', '-u', f'{self.username}:{self.password}', + '-F', f'file=@{dump_path}', '-F', f'x={tile.x}', '-F', f'y={tile.y}', '-F', f'z={tile.z}', + '--request', str(operation.method).upper(), + f'{self.app.schemes[0]}://{self.app.root.host}{operation.path}'], + check=True, stdout=subprocess.DEVNULL) + + else: + # get dump id + operation = self.app.op['api_v1_postgresql_dumps_list'] + response = self.client.request(operation()).data + matching_dumps = [i for i in response if i['x'] == tile.x and i['y'] == tile.y and i['z'] == tile.z] + id = matching_dumps[0]['id'] + + operation = self.app.op['api_v1_postgresql_dumps_partial_update'] + subprocess.run(['curl', '-u', f'{self.username}:{self.password}', '-F', f'file=@{dump_path}', + '--request', str(operation.method).upper(), + str(f'{self.app.schemes[0]}://{self.app.root.host}{operation.path}').format(id=id)], + check=True, stdout=subprocess.DEVNULL) From 4d797e3ba20926d619f48d3de75c24443500612e Mon Sep 17 00:00:00 2001 From: Finn-Thorben Sell Date: Fri, 29 May 2020 13:50:52 +0200 Subject: [PATCH 5/5] implement uploading of finished files --- main.py | 9 ++++++++ osm_tile_data_extract/generate_extracts.py | 2 ++ osm_tile_data_extract/import_extracts.py | 9 -------- osm_tile_data_extract/util.py | 27 ++++++++++++---------- 4 files changed, 26 insertions(+), 21 deletions(-) diff --git a/main.py b/main.py index e024163..8463ab8 100755 --- a/main.py +++ b/main.py @@ -16,12 +16,21 @@ def directory_type(raw: str): raise argparse.ArgumentTypeError(f'Path {raw} is not a directory') return p + def auth_type(raw: str) -> list: + if ':' not in raw or len(raw.split(':')) != 2: + raise argparse.ArgumentTypeError('Authentication has invalid format') + return raw.split(':') + parser = argparse.ArgumentParser('osm-tile-data-extract') parser.add_argument('-w', '--working-dir', dest='working_dir', type=directory_type, default=os.path.join(os.path.dirname(__file__), 'tmp'), help='Working directory in which intermediate and temporary files are stored') parser.add_argument('-o', '--output-dir', dest='output_dir', type=directory_type, default=os.path.join(os.path.dirname(__file__), 'out')) + parser.add_argument('--mapping-url', dest='mapping_url', type=str, required=True, + help='Base URL under which a tileserver-mapping server is reachable') + parser.add_argument('--mapping-auth', dest='mapping_auth', type=auth_type, required=True, + help=': combination used to authenticate at the tileserver-mapping') sub_parsers = parser.add_subparsers(dest='command') GenerateExtracts.add_args(sub_parsers.add_parser('generate-extracts', diff --git a/osm_tile_data_extract/generate_extracts.py b/osm_tile_data_extract/generate_extracts.py index 72e8873..7b1a4a6 100644 --- a/osm_tile_data_extract/generate_extracts.py +++ b/osm_tile_data_extract/generate_extracts.py @@ -34,6 +34,7 @@ def __init__(self, args: argparse.Namespace): self.lock_running_futures = Lock() self.executor = ThreadPoolExecutor(max_workers=args.processes) + self.api = ApiClient(args.mapping_url, args.mapping_auth[0], args.mapping_auth[1]) def run(self): self.download_planet_dump() @@ -98,6 +99,7 @@ def _generate_tile(self, tile: mercantile.Tile): if target_file.stat().st_size < self.target_size: print(f'{Colors.OKGREEN}{tile} has reached target size{Colors.ENDC}') + self.api.upload_planet_dump(tile, str(target_file.absolute())) subprocess.run(['rsync', str(target_file.absolute()), str(self.out_dir)], check=True) else: self.extract(tile) diff --git a/osm_tile_data_extract/import_extracts.py b/osm_tile_data_extract/import_extracts.py index b1370d8..173c9b5 100644 --- a/osm_tile_data_extract/import_extracts.py +++ b/osm_tile_data_extract/import_extracts.py @@ -13,15 +13,6 @@ class Program: @staticmethod def add_args(parser: argparse.ArgumentParser): - def auth_type(raw: str) -> list: - if ':' not in raw or len(raw.split(':')) != 2: - raise argparse.ArgumentTypeError('Authentication has invalid format') - return raw.split(':') - - parser.add_argument('--mapping-url', dest='mapping_url', type=str, required=True, - help='Base URL under which a tileserver-mapping server is reachable') - parser.add_argument('--mapping-auth', dest='mapping_auth', type=auth_type, required=True, - help=': combination used to authenticate at the tileserver-mapping') parser.add_argument('-x', type=int, required=True, help='X coordinate of tile to import') parser.add_argument('-y', type=int, required=True, diff --git a/osm_tile_data_extract/util.py b/osm_tile_data_extract/util.py index f3b90d7..e7e244f 100644 --- a/osm_tile_data_extract/util.py +++ b/osm_tile_data_extract/util.py @@ -46,14 +46,15 @@ def get_planet_dump(self, tile: mercantile.Tile) -> dict: else: return matching_dumps[0] - def upload_sql_dump(self, tile: mercantile.Tile, dump_path: str): - # check if an sql dump object already exists - operation = self.app.op['api_v1_postgresql_dumps_list'] + def _upload_dump(self, dump_type: str, tile: mercantile.Tile, dump_path: str): + # check if a dump object already exists + operation = self.app.op[f'api_v1_{dump_type}_list'] response = self.client.request(operation()).data matching_dumps = [i for i in response if i['x'] == tile.x and i['y'] == tile.y and i['z'] == tile.z] if len(matching_dumps) == 0: - operation = self.app.op['api_v1_postgresql_dumps_create'] + # create new object + operation = self.app.op[f'api_v1_{dump_type}_create'] subprocess.run(['curl', '-u', f'{self.username}:{self.password}', '-F', f'file=@{dump_path}', '-F', f'x={tile.x}', '-F', f'y={tile.y}', '-F', f'z={tile.z}', '--request', str(operation.method).upper(), @@ -61,14 +62,16 @@ def upload_sql_dump(self, tile: mercantile.Tile, dump_path: str): check=True, stdout=subprocess.DEVNULL) else: - # get dump id - operation = self.app.op['api_v1_postgresql_dumps_list'] - response = self.client.request(operation()).data - matching_dumps = [i for i in response if i['x'] == tile.x and i['y'] == tile.y and i['z'] == tile.z] - id = matching_dumps[0]['id'] - - operation = self.app.op['api_v1_postgresql_dumps_partial_update'] + # patch existing object + dump_id = matching_dumps[0]['id'] + operation = self.app.op[f'api_v1_{dump_type}_partial_update'] subprocess.run(['curl', '-u', f'{self.username}:{self.password}', '-F', f'file=@{dump_path}', '--request', str(operation.method).upper(), - str(f'{self.app.schemes[0]}://{self.app.root.host}{operation.path}').format(id=id)], + str(f'{self.app.schemes[0]}://{self.app.root.host}{operation.path}').format(id=dump_id)], check=True, stdout=subprocess.DEVNULL) + + def upload_planet_dump(self, tile: mercantile.Tile, dump_path: str): + self._upload_dump('planet_dumps', tile, dump_path) + + def upload_sql_dump(self, tile: mercantile.Tile, dump_path: str): + self._upload_dump('postgresql_dumps', tile, dump_path)