Skip to content

Commit 3dd5381

Browse files
committed
add priority and skip lists
1 parent f78d652 commit 3dd5381

File tree

4 files changed

+79
-6
lines changed

4 files changed

+79
-6
lines changed

importer/src/hierarchyitem.cpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "hierarchyitem.h"
22
#include "utils.h"
33

4+
#include <fstream>
45
#include <iostream>
56
#include <stdexcept>
67

@@ -36,6 +37,60 @@ HierarchyItem::HierarchyItem(const pqxx::row &row)
3637
m_name_extra = get_with_def(m_data_extra, "brand");
3738
}
3839

40+
// trim from start (in place)
41+
static inline void ltrim(std::string &s)
42+
{
43+
s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int ch) { return !std::isspace(ch); }));
44+
}
45+
46+
// trim from end (in place)
47+
static inline void rtrim(std::string &s)
48+
{
49+
s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !std::isspace(ch); }).base(),
50+
s.end());
51+
}
52+
53+
// trim from both ends (in place)
54+
static inline void trim(std::string &s)
55+
{
56+
ltrim(s);
57+
rtrim(s);
58+
}
59+
60+
static std::set<std::string> load_list(const std::string &fname)
61+
{
62+
std::set<std::string> d;
63+
if (fname.empty())
64+
return d;
65+
66+
std::ifstream f(fname);
67+
std::string line;
68+
if (!f)
69+
{
70+
std::cerr << "Failed to open a file: " << fname << std::endl;
71+
throw std::runtime_error("File cannot be opened");
72+
}
73+
74+
while (std::getline(f, line))
75+
{
76+
trim(line);
77+
if (!line.empty())
78+
d.insert(line);
79+
}
80+
81+
return d;
82+
}
83+
84+
void HierarchyItem::load_priority_list(const std::string &fname)
85+
{
86+
s_priority_types = load_list(fname);
87+
}
88+
89+
void HierarchyItem::load_skip_list(const std::string &fname)
90+
{
91+
s_skip_types = load_list(fname);
92+
}
93+
3994
bool HierarchyItem::keep() const
4095
{
4196
if (m_type.find_first_not_of(allowed_type_chars) != std::string::npos)

importer/src/hierarchyitem.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@ class HierarchyItem
3333

3434
void print_branch(unsigned int offset) const;
3535

36+
public:
37+
static void load_priority_list(const std::string &fname);
38+
static void load_skip_list(const std::string &fname);
39+
3640
private:
3741
hindex m_id;
3842
hindex m_linked_id{ 0 };

importer/src/main.cpp

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,18 +35,29 @@ int main(int argc, char *argv[])
3535
std::string database_path;
3636
std::string postal_country_parser;
3737
std::string postal_address_parser_dir;
38+
std::string type_priority_list;
39+
std::string type_skip_list;
3840
bool verbose_address_expansion = false;
3941

4042
{
4143
po::options_description generic("Geocoder NLP importer options");
42-
generic.add_options()("help", "Help message")("version,v", "Data format version")(
43-
"poly,p", po::value<std::string>(&polyjson),
44-
"Boundary of the imported region in GeoJSON format")(
45-
"postal-country", po::value<std::string>(&postal_country_parser),
46-
"libpostal country preference for this database")(
44+
generic.add_options()("help", "Help message")("version,v", "Data format version");
45+
generic.add_options()("poly,p", po::value<std::string>(&polyjson),
46+
"Boundary of the imported region in GeoJSON format");
47+
generic.add_options()("postal-country", po::value<std::string>(&postal_country_parser),
48+
"libpostal country preference for this database");
49+
generic.add_options()(
4750
"postal-address", po::value<std::string>(&postal_address_parser_dir),
4851
"libpostal address parser directory. If not specified, global libpostal parser directory "
49-
"preference is used.")("verbose", "Verbose address expansion");
52+
"preference is used.");
53+
generic.add_options()(
54+
"priority", po::value<std::string>(&type_priority_list),
55+
"File with OSM tags that are kept even if there is no name associated with the location");
56+
generic.add_options()(
57+
"skip", po::value<std::string>(&type_skip_list),
58+
"File with OSM tags for locations that should be dropped even if there is a name "
59+
"associated with the location");
60+
generic.add_options()("verbose", "Verbose address expansion");
5061

5162
po::options_description hidden("Hidden options");
5263
hidden.add_options()("output-directory", po::value<std::string>(&database_path),
@@ -103,6 +114,9 @@ int main(int argc, char *argv[])
103114
std::cout << "Loaded border GeoJSON. Geometry string length: " << border.size() << "\n";
104115
}
105116

117+
HierarchyItem::load_priority_list(type_priority_list);
118+
HierarchyItem::load_skip_list(type_skip_list);
119+
106120
Hierarchy hierarchy;
107121

108122
std::string postgres_dblink;

0 commit comments

Comments
 (0)