From cf652f79451cc06f0970b32ad6e41c0382202320 Mon Sep 17 00:00:00 2001 From: Jochen Topf Date: Tue, 19 Aug 2025 16:24:27 +0200 Subject: [PATCH] WIP Add osm2pgsql-expire command **work in progress** New osm2pgsql-expire command that helps with debugging/visualizing expire tiles issues. See man/osm2pgsql-expire.md for details. --- CMakeLists.txt | 3 + man/CMakeLists.txt | 8 + man/osm2pgsql-expire.md | 111 ++++++++++ src/expire-output.cpp | 2 +- src/osm2pgsql-expire.cpp | 437 +++++++++++++++++++++++++++++++++++++++ src/tile.cpp | 37 ++++ src/tile.hpp | 8 + tests/test-tile.cpp | 17 ++ 8 files changed, 622 insertions(+), 1 deletion(-) create mode 100644 man/osm2pgsql-expire.md create mode 100644 src/osm2pgsql-expire.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 9a5fbadd5..d203ee565 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -271,6 +271,9 @@ else() target_link_libraries(osm2pgsql-gen osm2pgsql_lib ${LIBS} ${POTRACE_LIBRARY} ${OpenCV_LIBS}) endif() +add_executable(osm2pgsql-expire src/osm2pgsql-expire.cpp) +target_link_libraries(osm2pgsql-expire osm2pgsql_lib ${LIBS}) + ############################################################# # Optional "clang-tidy" target ############################################################# diff --git a/man/CMakeLists.txt b/man/CMakeLists.txt index 94171469d..8f2356e49 100644 --- a/man/CMakeLists.txt +++ b/man/CMakeLists.txt @@ -26,6 +26,14 @@ if(PANDOC) list(APPEND MANPAGE_TARGETS osm2pgsql.1) + add_custom_command(OUTPUT osm2pgsql-expire.1 + COMMAND ${PANDOC} ${PANDOC_MAN_OPTIONS} -o osm2pgsql-expire.1 + ${CMAKE_CURRENT_SOURCE_DIR}/osm2pgsql-expire.md + DEPENDS osm2pgsql-expire.md manpage.template + COMMENT "Building manpage osm2pgsql-expire.1" + VERBATIM) + list(APPEND MANPAGE_TARGETS osm2pgsql-expire.1) + if(BUILD_GEN) add_custom_command(OUTPUT osm2pgsql-gen.1 COMMAND ${PANDOC} ${PANDOC_MAN_OPTIONS} -o osm2pgsql-gen.1 diff --git a/man/osm2pgsql-expire.md b/man/osm2pgsql-expire.md new file mode 100644 index 000000000..5425ab75a --- /dev/null +++ b/man/osm2pgsql-expire.md @@ -0,0 +1,111 @@ +# NAME + +osm2pgsql-expire - Visualize expire output + +# SYNOPSIS + +**osm2pgsql-expire** \[*OPTIONS*\] *OSM-FILE* (1) +**osm2pgsql-expire** \[*OPTIONS*\] *OSM-FILE* (1) +**osm2pgsql-expire** *TILES-FILE* (2) + +# DESCRIPTION + +The expire command can be used for two things: + +1. **To check what tiles some OSM data is in.** If an *OSM-FILE* is specified + that file is read and the tiles calculated in which the objects in that file + are. Note that the file must not be a change file but a regular OSM data + file! +2. **Visualize tile list.** If a *TILE-FILE* (presumably generated by osm2pgsql) + is specified, a GeoJSON file is generated showing all mentioned tiles. In + this mode all command line options are ignored. + +Read the *Expire* chapter of the osm2pgsql manual +(https://osm2pgsql.org/doc/manual.html#expire) for details on how to +interpret the `-m, \--mode` and `\--full-area-limit` options. + +# OPTIONS + +This program follows the usual GNU command line syntax, with long options +starting with two dashes (`--`). Mandatory arguments to long options are +mandatory for short options too. + +# MAIN OPTIONS + +-b, \--buffer=VALUE +: Set buffer size around geometry relative to tile size. + +-f, \--format=FORMAT +: Output format. Options are 'tiles' (default) or 'geojson'. The GeoJSON output + uses the Web Mercator projection (EPSG:3857) which is supported by many + programs although, strictly speaking, it is not allowed by the GeoJSON spec. + +\--full-area-limit=VALUE +: Set full area limit. + +-m, \--mode=MODE +: Set expire mode. One of `boundary_only`, `full_area` (default), and `hybrid`. + +-z, \--zoom=ZOOM +: Zoom level on which to calculate tiles. + +# HELP/VERSION OPTIONS + +-h, \--help +: Print help. + +-V, \--version +: Print osm2pgsql version. + +# LOGGING OPTIONS + +\--log-level=LEVEL +: Set log level ('debug', 'info' (default), 'warn', or 'error'). + +\--log-progress=VALUE +: Enable (`true`) or disable (`false`) progress logging. Setting this to + `auto` will enable progress logging on the console and disable it + if the output is redirected to a file. Default: true. + +\--log-sql +: Enable logging of SQL commands for debugging. + +\--log-sql-data +: Enable logging of all data added to the database. This will write out + a huge amount of data! For debugging. + +-v, \--verbose +: Same as `--log-level=debug`. + +# DATABASE OPTIONS + +-d, \--database=NAME +: The name of the PostgreSQL database to connect to. If this parameter + contains an `=` sign or starts with a valid URI prefix (`postgresql://` or + `postgres://`), it is treated as a conninfo string. See the PostgreSQL + manual for details. + +-U, \--username=NAME, \--user=NAME +: Postgresql user name. + +-W, \--password +: Force password prompt. + +-H, \--host=HOSTNAME +: Database server hostname or unix domain socket location. + +-P, \--port=PORT +: Database server port. + +\--schema=SCHEMA +: Default for various schema settings throughout osm2pgsql (default: `public`). + The schema must exist in the database and be writable by the database user. + +# SEE ALSO + +* [osm2pgsql website](https://osm2pgsql.org) +* [osm2pgsql manual](https://osm2pgsql.org/doc/manual.html) +* **osm2pgsql**(1) +* **postgres**(1) +* **osmcoastline**(1) + diff --git a/src/expire-output.cpp b/src/expire-output.cpp index 2d203e9b9..bd2f822d4 100644 --- a/src/expire-output.cpp +++ b/src/expire-output.cpp @@ -45,7 +45,7 @@ std::size_t expire_output_t::output_tiles_to_file( auto const count = for_each_tile( tiles_at_maxzoom, m_minzoom, m_maxzoom, [&](tile_t const &tile) { - fmt::print(outfile, "{}/{}/{}\n", tile.zoom(), tile.x(), tile.y()); + fmt::print(outfile, "{}\n", tile.to_zxy()); }); (void)std::fclose(outfile); diff --git a/src/osm2pgsql-expire.cpp b/src/osm2pgsql-expire.cpp new file mode 100644 index 000000000..619f86ffa --- /dev/null +++ b/src/osm2pgsql-expire.cpp @@ -0,0 +1,437 @@ +/** + * SPDX-License-Identifier: GPL-2.0-or-later + * + * This file is part of osm2pgsql (https://osm2pgsql.org/). + * + * Copyright (C) 2006-2025 by the osm2pgsql developer community. + * For a full list of authors see the git log. + */ + +#include "command-line-app.hpp" +#include "expire-config.hpp" +#include "expire-output.hpp" +#include "expire-tiles.hpp" +#include "format.hpp" +#include "geom-from-osm.hpp" +#include "geom-functions.hpp" +#include "geom.hpp" +#include "input.hpp" +#include "logging.hpp" +#include "middle-ram.hpp" +#include "middle.hpp" +#include "osmdata.hpp" +#include "output.hpp" +#include "reprojection.hpp" +#include "tile.hpp" +#include "version.hpp" + +#include + +#include +#include +#include + +namespace { + +struct config_t +{ + expire_config_t expire_config; + std::string input_file; + std::string mode{"full_area"}; + std::string format{"tiles"}; + std::shared_ptr projection; + command_t command = command_t::process; + uint32_t zoom = 0; +}; + +class output_expire_t : public output_t +{ +public: + output_expire_t(std::shared_ptr const &mid, + std::shared_ptr thread_pool, + options_t const &options, config_t const &cfg); + + output_expire_t(output_expire_t const &) = default; + output_expire_t &operator=(output_expire_t const &) = default; + + output_expire_t(output_expire_t &&) = default; + output_expire_t &operator=(output_expire_t &&) = default; + + ~output_expire_t() override; + + std::shared_ptr + clone(std::shared_ptr const &mid, + std::shared_ptr const ©_thread) const override; + + void start() override {} + void stop() override {} + void sync() override {} + void wait() override {} + + void pending_way(osmid_t /*id*/) override {} + void pending_relation(osmid_t /*id*/) override {} + + void node_add(osmium::Node const &node) override; + void way_add(osmium::Way *way) override; + void relation_add(osmium::Relation const &rel) override; + + void node_modify(osmium::Node const & /*node*/) override {} + void way_modify(osmium::Way * /*way*/) override {} + void relation_modify(osmium::Relation const & /*rel*/) override {} + + void node_delete(osmium::Node const & /*node*/) override {} + void way_delete(osmium::Way * /*way*/) override {} + void relation_delete(osmium::Relation const & /*rel*/) override {} + + void merge_expire_trees(output_t * /*other*/) override {} + + void print(std::string const &format); + +private: + config_t m_config; + expire_tiles m_expire_tiles; + expire_output_t m_expire_output; +}; // class output_expire_t + +std::shared_ptr output_expire_t::clone( + std::shared_ptr const & /*mid*/, + std::shared_ptr const & /*copy_thread*/) const +{ + return std::make_shared(*this); +} + +output_expire_t::output_expire_t(std::shared_ptr const &mid, + std::shared_ptr thread_pool, + options_t const &options, config_t const &cfg) +: output_t(mid, std::move(thread_pool), options), m_config(cfg), + m_expire_tiles(cfg.zoom, cfg.projection) +{ +} + +output_expire_t::~output_expire_t() = default; + +void output_expire_t::node_add(osmium::Node const &node) +{ + if (node.tags().empty()) { + return; + } + + auto const geom_merc = + geom::transform(geom::create_point(node), *m_config.projection); + + m_expire_tiles.from_geometry(geom_merc, m_config.expire_config); +} + +void output_expire_t::way_add(osmium::Way *way) +{ + if (way->tags().empty()) { + return; + } + + auto const counts = middle().nodes_get_list(&way->nodes()); + if (counts != way->nodes().size()) { + log_error("Missing nodes in way {}.", way->id()); + } + + osmium::memory::Buffer buffer{1024, osmium::memory::Buffer::auto_grow::yes}; + + geom::geometry_t geom; + + if (way->is_closed()) { + log_debug("Creating polygon from closed way {}...", way->id()); + geom::create_polygon(&geom, *way, &buffer); + } + + if (geom.is_null()) { + log_debug("Creating linestring from way {}...", way->id()); + geom::create_linestring(&geom, *way); + } + + if (geom.is_null()) { + log_warn("Creating geometry from way {} failed.", way->id()); + return; + } + + auto const geom_merc = geom::transform(geom, *m_config.projection); + + m_expire_tiles.from_geometry(geom_merc, m_config.expire_config); +} + +void output_expire_t::relation_add(osmium::Relation const &relation) +{ + if (relation.tags().empty()) { + return; + } + + osmium::memory::Buffer buffer{1024, osmium::memory::Buffer::auto_grow::yes}; + + auto const num_members = middle().rel_members_get( + relation, &buffer, + osmium::osm_entity_bits::node | osmium::osm_entity_bits::way); + + if (num_members == 0) { + log_warn("No node/way members found for relation {}.", relation.id()); + return; + } + + for (auto &node : buffer.select()) { + if (!node.location().valid()) { + node.set_location(middle().get_node_location(node.id())); + } + } + + for (auto &way : buffer.select()) { + middle().nodes_get_list(&way.nodes()); + } + + std::string const type = relation.tags()["type"]; + + osmium::memory::Buffer tmp_buffer{1024, + osmium::memory::Buffer::auto_grow::yes}; + geom::geometry_t geom; + if (type == "multipolygon") { + log_debug("Creating multipolygon from relation {}...", relation.id()); + geom::create_multipolygon(&geom, relation, buffer, &tmp_buffer); + } else if (type == "route" || type == "multilinestring") { + log_debug("Creating multilinestring from relation {}...", + relation.id()); + geom::create_multilinestring(&geom, buffer, false); + } else { + log_debug("Creating geometry collection from relation {}.", + relation.id()); + geom::create_collection(&geom, buffer); + } + + if (geom.is_null()) { + log_warn("Creating geometry from relation {} failed.", relation.id()); + return; + } + + auto const geom_merc = geom::transform(geom, *m_config.projection); + + m_expire_tiles.from_geometry(geom_merc, m_config.expire_config); +} + +std::string tile_to_json(tile_t const &tile) +{ + auto const box = tile.box(0); + + nlohmann::json const feature_json = {{"type", "Feature"}, + {"geometry", + {{"type", "Polygon"}, + {"coordinates", + {{{box.min_x(), box.min_y()}, + {box.min_x(), box.max_y()}, + {box.max_x(), box.max_y()}, + {box.max_x(), box.min_y()}, + {box.min_x(), box.min_y()}}}}}}, + {"properties", + {{"z", tile.zoom()}, + {"x", tile.x()}, + {"y", tile.y()}, + {"label", tile.to_zxy()}}}}; + + return feature_json.dump(); +} + +std::string geojson_start() +{ + // The GeoJSON Specification (RFC 7946) only allows lon/lat coordinates, + // but other CRSes are widely supported though this syntax from an earlier + // draft of the GeoJSON spec. + nlohmann::json const type_json = { + {"type", "name"}, + {"properties", {{"name", "urn:ogc:def:crs:EPSG::3857"}}}}; + + return fmt::format("{}{}{}\n", R"({"type": "FeatureCollection", "crs":)", + type_json.dump(), R"(, "features": [)"); +} + +std::string geojson_end() { return "]}\n"; } + +void print_tiles(std::vector const &tiles) +{ + fmt::print("{}\n", geojson_start()); + bool first = true; + for (auto const &tile : tiles) { + fmt::print("{}{}\n", (first ? "" : ","), tile_to_json(tile)); + first = false; + } + fmt::print("{}", geojson_end()); +} + +// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) +config_t parse_command_line(int argc, char *argv[]) +{ + config_t cfg; + + command_line_app_t app{"osm2pgsql-expire -- Visualize expire output\n"}; + app.get_formatter()->column_width(38); + + app.add_option("OSMFILE", cfg.input_file) + ->description("Input file") + ->type_name("FILE"); + + app.add_option("-b,--buffer", cfg.expire_config.buffer) + ->description("Set buffer size around geometry relative to tile size") + ->type_name("VALUE"); + + app.add_option("-f,--format", cfg.format) + ->description("Output format ('tiles', 'geojson')") + ->type_name("FORMAT"); + + app.add_option("--full-area-limit", cfg.expire_config.full_area_limit) + ->description("Set full area limit") + ->type_name("VALUE"); + + app.add_option("-m,--mode", cfg.mode) + ->description( + "Set expire mode ('boundary_only', 'full_area', 'hybrid')") + ->type_name("MODE"); + + app.add_option("-z,--zoom", cfg.zoom) + ->description("Set zoom level") + ->type_name("ZOOM"); + + try { + app.parse(argc, argv); + } catch (...) { + log_info("osm2pgsql-expire version {}", get_osm2pgsql_version()); + throw; + } + + if (app.want_help()) { + std::cout << app.help(); + cfg.command = command_t::help; + return cfg; + } + + if (app.want_version()) { + cfg.command = command_t::version; + return cfg; + } + + if (cfg.format != "tiles" && cfg.format != "geojson") { + throw std::runtime_error{ + "Value for --format must be 'tiles' or 'geojson'."}; + } + + if (cfg.mode == "boundary_only") { + cfg.expire_config.mode = expire_mode::boundary_only; + } else if (cfg.mode == "full_area") { + cfg.expire_config.mode = expire_mode::full_area; + } else if (cfg.mode == "hybrid") { + cfg.expire_config.mode = expire_mode::hybrid; + } else { + throw std::runtime_error{"Value for --mode must be 'boundary_only', " + "'full_area', or 'hybrid'."}; + } + + return cfg; +} + +void output_expire_t::print(std::string const &format) +{ + auto const tiles = m_expire_tiles.get_tiles(); + if (format == "tiles") { + for (auto const &qk : tiles) { + auto const tile = tile_t::from_quadkey(qk, m_config.zoom); + fmt::print(stdout, "{}\n", tile.to_zxy()); + } + return; + } + + assert(format == "geojson"); + + fmt::print("{}\n", geojson_start()); + bool first = true; + for (auto const &qk : tiles) { + fmt::print("{}{}\n", (first ? "" : ","), + tile_to_json(tile_t::from_quadkey(qk, m_config.zoom))); + first = false; + } + fmt::print("{}", geojson_end()); +} + +} // anonymous namespace + +// NOLINTNEXTLINE(bugprone-exception-escape) +int main(int argc, char *argv[]) +{ + try { + auto cfg = parse_command_line(argc, argv); + + cfg.projection = reprojection::create_projection(PROJ_SPHERE_MERC); + + if (cfg.command == command_t::help) { + // Already handled inside parse_command_line() + return 0; + } + + if (cfg.command == command_t::version) { + print_version("osm2pgsql-expire"); + return 0; + } + + log_info("osm2pgsql-expire version {}", get_osm2pgsql_version()); + log_warn("This is an EXPERIMENTAL extension to osm2pgsql."); + + double const distance = tile_t::EARTH_CIRCUMFERENCE / + static_cast(1UL << cfg.zoom) * + cfg.expire_config.buffer; + + log_info("Settings:"); + log_info(" input_file={}", cfg.input_file); + log_info(" buffer={}", cfg.expire_config.buffer); + log_info(" distance={:.2f} web mercator units", distance); + log_info(" full_area_limit={}", cfg.expire_config.full_area_limit); + log_info(" mode={}", cfg.mode); + log_info(" zoom={}", cfg.zoom); + + auto const input = osmium::split_string(cfg.input_file, '.'); + if (input.empty()) { + throw std::runtime_error{"Missing input file"}; + } + + auto const &suffix = input.back(); + if (suffix == "osm" || suffix == "pbf" || suffix == "opl") { + // input is an OSM file + auto thread_pool = std::make_shared(1U); + log_debug("Started pool with {} threads.", + thread_pool->num_threads()); + + options_t options; + options.projection = cfg.projection; + auto middle = create_middle(thread_pool, options); + middle->start(); + + auto output = std::make_shared( + middle->get_query_instance(), thread_pool, options, cfg); + + osmdata_t osmdata{middle, output, options}; + + std::vector files; + files.emplace_back(cfg.input_file); + process_files(files, &osmdata, false, false); + + output->print(cfg.format); + } else { + // input is a tiles file + std::ifstream file{cfg.input_file}; + std::string str; + std::vector tiles; + while (std::getline(file, str)) { + tiles.push_back(tile_t::from_zxy(str)); + } + print_tiles(tiles); + } + } catch (std::exception const &e) { + log_error("{}", e.what()); + return 1; + } catch (...) { + log_error("Unknown exception."); + return 1; + } + + return 0; +} diff --git a/src/tile.cpp b/src/tile.cpp index e47f91e7e..3f33a9a91 100644 --- a/src/tile.cpp +++ b/src/tile.cpp @@ -9,6 +9,17 @@ #include "tile.hpp" +#include "format.hpp" + +#include + +#include + +std::string tile_t::to_zxy() const +{ + return fmt::format("{}/{}/{}", zoom(), x(), y()); +} + geom::point_t tile_t::to_tile_coords(geom::point_t p, unsigned int pixel_extent) const noexcept { @@ -55,6 +66,19 @@ uint32_t deinterleave_lowuint32(uint64_t word) noexcept return static_cast(word); } +uint32_t parse_num_with_max(std::string const &str, uint32_t max) +{ + std::size_t pos = 0; + auto const value = std::stoul(str, &pos); + if (pos != str.size()) { + throw std::invalid_argument{"extra characters"}; + } + if (value >= max) { + throw std::invalid_argument{"value to large"}; + } + return static_cast(value); +} + } // anonymous namespace quadkey_t tile_t::quadkey() const noexcept @@ -68,3 +92,16 @@ tile_t tile_t::from_quadkey(quadkey_t quadkey, uint32_t zoom) noexcept return {zoom, deinterleave_lowuint32(quadkey.value()), deinterleave_lowuint32(quadkey.value() >> 1U)}; } + +tile_t tile_t::from_zxy(std::string const &zxy) +{ + auto const p = osmium::split_string(zxy, '/'); + if (p.size() != 3) { + throw fmt_error("Invalid tile '{}'.", zxy); + } + + auto const zoom = parse_num_with_max(p[0], MAX_ZOOM); + uint32_t const max = 1UL << zoom; + + return {zoom, parse_num_with_max(p[1], max), parse_num_with_max(p[2], max)}; +} diff --git a/src/tile.hpp b/src/tile.hpp index 08e6fdf69..3c52e4cf3 100644 --- a/src/tile.hpp +++ b/src/tile.hpp @@ -208,6 +208,9 @@ class tile_t geom::point_t to_world_coords(geom::point_t p, unsigned int pixel_extent) const noexcept; + /// Convert to string in format "ZOOM/X/Y". + std::string to_zxy() const; + /// The center of this tile in web mercator (EPSG:3857) units. geom::point_t center() const noexcept; @@ -256,6 +259,11 @@ class tile_t */ static tile_t from_quadkey(quadkey_t quadkey, uint32_t zoom) noexcept; + /** + * Construct tile from string in format "ZOOM/X/Y" + */ + static tile_t from_zxy(std::string const &zxy); + private: static constexpr uint32_t INVALID_ZOOM = std::numeric_limits::max(); diff --git a/tests/test-tile.cpp b/tests/test-tile.cpp index e75d29e37..f5d6489ff 100644 --- a/tests/test-tile.cpp +++ b/tests/test-tile.cpp @@ -30,14 +30,17 @@ TEST_CASE("tile access and comparison", "[NoDB]") REQUIRE(a.zoom() == 3); REQUIRE(a.x() == 2); REQUIRE(a.y() == 1); + REQUIRE(a.to_zxy() == "3/2/1"); REQUIRE(b.zoom() == 3); REQUIRE(b.x() == 2); REQUIRE(b.y() == 1); + REQUIRE(b.to_zxy() == "3/2/1"); REQUIRE(c.zoom() == 3); REQUIRE(c.x() == 1); REQUIRE(c.y() == 2); + REQUIRE(c.to_zxy() == "3/1/2"); REQUIRE(a == b); REQUIRE_FALSE(a != b); @@ -51,6 +54,20 @@ TEST_CASE("tile access and comparison", "[NoDB]") REQUIRE(c < a); } +TEST_CASE("tile_t from zxy string", "[NoDB]") +{ + REQUIRE(tile_t::from_zxy("0/0/0") == tile_t(0, 0, 0)); + REQUIRE(tile_t::from_zxy("2/3/3") == tile_t(2, 3, 3)); + REQUIRE(tile_t::from_zxy("31/3/27") == tile_t(31, 3, 27)); + + REQUIRE_THROWS(tile_t::from_zxy("")); + REQUIRE_THROWS(tile_t::from_zxy("a/c/c")); + REQUIRE_THROWS(tile_t::from_zxy("1a/0/0")); + REQUIRE_THROWS(tile_t::from_zxy("32/0/0")); + REQUIRE_THROWS(tile_t::from_zxy("2/4/0")); + REQUIRE_THROWS(tile_t::from_zxy("2/0/4")); +} + TEST_CASE("tile_t coordinates zoom=0", "[NoDB]") { tile_t const tile{0, 0, 0};