diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..875b1fb --- /dev/null +++ b/.clang-format @@ -0,0 +1,14 @@ +--- +# We'll use defaults from the LLVM style, but with 4 columns indentation. +BasedOnStyle: GNU +ColumnLimit: 100 +--- +Language: Cpp +AllowShortFunctionsOnASingleLine: Inline +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +SpaceBeforeParens: ControlStatements + +AlignConsecutiveAssignments: Consecutive +AlignConsecutiveDeclarations: Consecutive +AlignConsecutiveDeclarations: Consecutive \ No newline at end of file diff --git a/.gitignore b/.gitignore index 8cf1bee..1567605 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,7 @@ *~ /*.pro.user +build/ +.vscode +*.code-workspace +CMakeLists.txt.user diff --git a/CMakeLists.txt b/CMakeLists.txt index ad247d2..49511e2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,7 +5,7 @@ project(geocoder-nlp DESCRIPTION "Geocoder NLP") set(CMAKE_INCLUDE_CURRENT_DIR ON) -set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED True) include(FindPkgConfig) @@ -13,11 +13,14 @@ include(FeatureSummary) include(GNUInstallDirs) find_package(PkgConfig REQUIRED) +find_package(nlohmann_json 3.2.0 REQUIRED) +find_package(Boost 1.30 COMPONENTS program_options REQUIRED) pkg_check_modules(MARISA marisa IMPORTED_TARGET) pkg_check_modules(KYOTOCABINET kyotocabinet IMPORTED_TARGET) -pkg_check_modules(POSTAL postal IMPORTED_TARGET) +pkg_check_modules(POSTAL libpostal IMPORTED_TARGET) pkg_check_modules(SQLITE3 sqlite3 IMPORTED_TARGET) +pkg_check_modules(LIBPQXX libpqxx IMPORTED_TARGET) set(SRC src/geocoder.cpp @@ -31,6 +34,32 @@ set(HEAD include_directories(thirdparty/sqlite3pp/headeronly_src) include_directories(src) +# boost +include_directories(${Boost_INCLUDE_DIR}) + +# importer +set(IMPSRC + importer/src/config.h + importer/src/main.cpp + importer/src/hierarchy.cpp + importer/src/hierarchy.h + importer/src/hierarchyitem.cpp + importer/src/hierarchyitem.h + importer/src/normalization.cpp + importer/src/normalization.h + importer/src/utils.cpp + importer/src/utils.h +) +add_executable(geocoder-importer ${SRC} ${HEAD} ${IMPSRC}) +target_link_libraries(geocoder-importer + PkgConfig::MARISA + PkgConfig::KYOTOCABINET + PkgConfig::POSTAL + PkgConfig::SQLITE3 + PkgConfig::LIBPQXX + nlohmann_json::nlohmann_json + ${Boost_LIBRARIES}) + # demo codes add_executable(geocoder-nlp demo/geocoder-nlp.cpp @@ -38,7 +67,10 @@ add_executable(geocoder-nlp ${HEAD}) target_link_libraries(geocoder-nlp - -lmarisa -lkyotocabinet -lpostal -lsqlite3) + PkgConfig::MARISA + PkgConfig::KYOTOCABINET + PkgConfig::POSTAL + PkgConfig::SQLITE3) add_executable(nearby-line demo/nearby-line.cpp @@ -46,7 +78,10 @@ add_executable(nearby-line ${HEAD}) target_link_libraries(nearby-line - -lmarisa -lkyotocabinet -lpostal -lsqlite3) + PkgConfig::MARISA + PkgConfig::KYOTOCABINET + PkgConfig::POSTAL + PkgConfig::SQLITE3) add_executable(nearby-point demo/nearby-point.cpp @@ -54,8 +89,15 @@ add_executable(nearby-point ${HEAD}) target_link_libraries(nearby-point - -lmarisa -lkyotocabinet -lpostal -lsqlite3) + PkgConfig::MARISA + PkgConfig::KYOTOCABINET + PkgConfig::POSTAL + PkgConfig::SQLITE3) +# install +install(TARGETS geocoder-importer + DESTINATION ${CMAKE_INSTALL_BINDIR}) +# summary feature_summary(WHAT ALL FATAL_ON_MISSING_REQUIRED_PACKAGES) diff --git a/Database.md b/Database.md new file mode 100644 index 0000000..e0e1a40 --- /dev/null +++ b/Database.md @@ -0,0 +1,49 @@ +# Geocoder NLP database format + +The geocoder database consists of several files which are expected to be in the +same directory. All locations are described using singe coordinate to keep the +files as small as possible. + +The files composing a database are: + +1. geonlp-primary.sqlite: SQLite database with location description and coordinate +2. geonlp-normalized.trie: MARISA database with normalized strings +3. geonlp-normalized-id.kch: Kyoto Cabinet database for linking MARISA and primary IDs + +## geonlp-primary.sqlite + +SQLite database contains location description, their organization into hierarchy +of objects. + +Table `object_primary` keeps location description. In this table, objects are +stored sequentially (in terms of their `id`) according to the positioning in the +object hierarchy with the children stored after parents. Table `hierarchy` has a +record for each item (`id` from `object_primary`) with the children consisting +of parent ID (`prim_id`) and the ID of the last child (`last_subobject`). + +Object types are stored separately in `type` table with the type ID used in +`object_primary`. + +Spatial queries are indexed using R-Tree with `box_id` used as a reference in +`object_primary`. Namely, as all objects are stored as points, for storage +efficiency, objects next to each other are set to have the same `box_id` and are +found through `-rtree` tables. + +Table `meta` keeps database format version and is used to check version +compatibility. + +## geonlp-normalized.trie + +All normalized strings are stored in MARISA database +(https://github.com/s-yata/marisa-trie). Normalized strings are formed from +`name` and other similar fields of `object_primary` table in +`geonlp-primary.sqlite`. All strings are pushed into MARISA database that +assigns its internal ID for each of the strings. + +## geonlp-normalized-id.kch + +Kyoto Cabinet (https://dbmx.net/kyotocabinet/) database for linking MARISA and +primary IDs. Hash database variant is used where `key` is an ID provided by +MARISA for a search string and value is an array of bytes consisting of +`object_primary` IDs stored as `uint32_t` one after another. The array is stored +using `std::string`. \ No newline at end of file diff --git a/Makefile b/Makefile deleted file mode 100644 index c5eea0a..0000000 --- a/Makefile +++ /dev/null @@ -1,100 +0,0 @@ - -###################################################### -# Compiler and libraries -CXX := g++ - -LIBPOSTAL_INCLUDE=-I/usr/local/include -#LIBPOSTAL_INCLUDE=-I../libpostal-install/include - -LIBPOSTAL_LIB=-lpostal -#LIBPOSTAL_LIB=-l:libpostal.a -l:libsnappy.a -#LIBPOSTAL_LIB=-L../libpostal-install/lib -l:libpostal.a - -SQLITE_LIB=-lsqlite3 -#SQLITE_LIB=-l:libsqlite3.a - -LD_EXTRA_OPTIONS += -pthread -lmarisa -lkyotocabinet -#LD_EXTRA_OPTIONS += -ldl -static-libgcc -static-libstdc++ - -CXX_EXTRA_OPTIONS += -DGEONLP_PRINT_DEBUG_QUERIES -CXX_EXTRA_OPTIONS += -DGEONLP_PRINT_DEBUG -CXX_EXTRA_OPTIONS += -DGEONLP_PRINT_SQL - -###################################################### - -SRCSUBDIR := src -OBJSUBDIR := obj - -INCLUDE = $(LIBPOSTAL_INCLUDE) -Ithirdparty/sqlite3pp/headeronly_src -I$(SRCSUBDIR) -LIBRARIES += $(LIBPOSTAL_LIB) $(SQLITE_LIB) - -OBJS = $(patsubst $(SRCSUBDIR)/%.cpp,$(OBJSUBDIR)/%.o,$(wildcard $(SRCSUBDIR)/*.cpp)) - -CXX_EXTRA_OPTIONS += -std=c++11 -CXXFLAGS := -Wall -O2 -g $(EXTRA_OPTIONS) $(CXX_EXTRA_OPTIONS) $(INCLUDE) - -AR = ar -LD = g++ - -all: $(OBJSUBDIR) geocoder-nlp nearby-line nearby-point - -clean: - rm -rf core* $(APPNAME) $(OBJSUBDIR) - -geocoder-nlp: $(OBJS) $(OBJSUBDIR)/demo_geocoder-nlp.o - @echo - @echo "--------- LINKING --- $@ " - rm -f $(APPNAME) - $(LD) -o $@ $^ $(LIBRARIES) $(LD_EXTRA_OPTIONS) - @echo - @echo '--------- Make done ' - @echo - -nearby-line: $(OBJS) $(OBJSUBDIR)/demo_nearby-line.o - @echo - @echo "--------- LINKING --- $@ " - rm -f $(APPNAME) - $(LD) -o $@ $^ $(LIBRARIES) $(LD_EXTRA_OPTIONS) - @echo - @echo '--------- Make done ' - @echo - -nearby-point: $(OBJS) $(OBJSUBDIR)/demo_nearby-point.o - @echo - @echo "--------- LINKING --- $@ " - rm -f $(APPNAME) - $(LD) -o $@ $^ $(LIBRARIES) $(LD_EXTRA_OPTIONS) - @echo - @echo '--------- Make done ' - @echo - -$(OBJSUBDIR): - @echo - @echo "--------- Making dir: $@ " - mkdir -p $(OBJSUBDIR) - @echo - -$(OBJSUBDIR)/%.o: $(SRCSUBDIR)/%.cpp - @echo - @echo "------------ $< " - $(CXX) -c $(CPPFLAGS) $(CXXFLAGS) -o $@ $< - @echo - -$(OBJSUBDIR)/demo_geocoder-nlp.o: demo/geocoder-nlp.cpp - @echo - @echo "------------ $< " - $(CXX) -c $(CPPFLAGS) $(CXXFLAGS) -o $@ $< - @echo - -$(OBJSUBDIR)/demo_nearby-line.o: demo/nearby-line.cpp - @echo - @echo "------------ $< " - $(CXX) -c $(CPPFLAGS) $(CXXFLAGS) -o $@ $< - @echo - -$(OBJSUBDIR)/demo_nearby-point.o: demo/nearby-point.cpp - @echo - @echo "------------ $< " - $(CXX) -c $(CPPFLAGS) $(CXXFLAGS) -o $@ $< - @echo - diff --git a/README.md b/README.md index 095bda0..9beac08 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,11 @@ -# geocoder-nlp +# Geocoder NLP -This is a geocoder C++ library that uses libpostal to parse the user +This is a geocoder C++ library that targets offline use by mobile +applications. It is able to perform forward and reverse geocoding. +For forward geocoding, it uses libpostal to parse the user request, normalize the parsed result, and search for the match in -geocoder database. +geocoder database. In addition to traditional reverse geocoding, it is +able to find points of interest close to the reference point or line. The library includes demo program showing how to use it. Its also used as one of the geocoders in OSM Scout Server @@ -29,7 +32,7 @@ libraries mentioned above. ## Databases At present, the datasets required for the geocoder to function are distributed -as a part of OSM Scout Server datasets . +as a part of OSM Scout Server datasets. If you use the geocoder with the full libpostal installation, you don't need to get the libpostal datasets from that location, but can use the datasets @@ -43,8 +46,10 @@ To use country-specific datasets, you would have to get: In addition, the prepared geocoder databases are available at geocoder/SELECT THE NEEDED ONES. +Database format is described in [separate document](Database.md). + ## Acknowledgments -libpostal: https://github.com/openvenues/libpostal +libpostal: Used for input parsing; https://github.com/openvenues/libpostal -libosmscout: http://libosmscout.sourceforge.net +Nominatim: Used for data import; https://nominatim.org/ diff --git a/importer/Makefile b/importer/Makefile deleted file mode 100644 index f11e4ab..0000000 --- a/importer/Makefile +++ /dev/null @@ -1,74 +0,0 @@ - - -###################################################### -# Compiler and libraries -CXX := g++ - -LIBPOSTAL_INCLUDE=-I/usr/local/include -#LIBPOSTAL_INCLUDE=-I../../libpostal-install/include - -LIBPOSTAL_LIB=-lpostal -#LIBPOSTAL_LIB=-l:libpostal.a -l:libsnappy.a -#LIBPOSTAL_LIB=-L../../libpostal-install/lib -l:libpostal.a - -SQLITE_LIB=-lsqlite3 -#SQLITE_LIB=-l:libsqlite3.a - -OSMSCOUT=$(shell pwd)/libosmscout/install - -#LD_EXTRA_OPTIONS += -pthread -ldl -static-libgcc -static-libstdc++ - -#CXX_EXTRA_OPTIONS += -DGEONLP_PRINT_DEBUG_QUERIES -#CXX_EXTRA_OPTIONS += -DGEONLP_PRINT_DEBUG - -###################################################### - -APPNAME := importer - -SRCSUBDIR := src -OBJSUBDIR := obj - -INCLUDE = -I$(OSMSCOUT)/include $(LIBPOSTAL_INCLUDE) -I../src -I../thirdparty/sqlite3pp/headeronly_src -I$(SRCSUBDIR) -LIBRARIES += -L$(OSMSCOUT)/lib $(LIBPOSTAL_LIB) $(SQLITE_LIB) -losmscout -lmarisa -lkyotocabinet -Wl,-rpath=$(OSMSCOUT)/lib - -OBJS = $(patsubst $(SRCSUBDIR)/%.cpp,$(OBJSUBDIR)/%.o,$(wildcard $(SRCSUBDIR)/*.cpp)) \ - $(patsubst ../src/%.cpp,$(OBJSUBDIR)/main_%.o,$(wildcard ../src/*.cpp)) - -CXX_EXTRA_OPTIONS += -std=c++11 -CXXFLAGS := -O2 -g $(EXTRA_OPTIONS) $(CXX_EXTRA_OPTIONS) $(INCLUDE) - -AR = ar -LD = g++ - -all: $(OBJSUBDIR) $(APPNAME) - -clean: - rm -rf core* $(APPNAME) $(OBJSUBDIR) - -$(APPNAME): $(OBJS) - @echo - @echo "--------- LINKING --- $@ " - rm -f $(APPNAME) - $(LD) -o $@ $^ $(LIBRARIES) $(LD_EXTRA_OPTIONS) - @echo - @echo '--------- Make done ' - @echo - -$(OBJSUBDIR): - @echo - @echo "--------- Making dir: $@ " - mkdir -p $(OBJSUBDIR) - @echo - -$(OBJSUBDIR)/%.o: $(SRCSUBDIR)/%.cpp - @echo - @echo "------------ $< " - $(CXX) -c $(CPPFLAGS) $(CXXFLAGS) -o $@ $< - @echo - -$(OBJSUBDIR)/main_%.o: ../src/%.cpp - @echo - @echo "------------ $< " - $(CXX) -c $(CPPFLAGS) $(CXXFLAGS) -o $@ $< - @echo - diff --git a/importer/README.md b/importer/README.md index c1ea754..57248a1 100644 --- a/importer/README.md +++ b/importer/README.md @@ -1,9 +1,25 @@ -Import program and scripts. +# Import program and scripts. -Import program is using moodified libosmscout library that can be installed -by `install_deps.sh` script. +Import program is using Nominatim database and filters the data using boundary +given in GeoJSON file. -To generate larger amount of missing tags for the input map stylesheet -and whitelist, the scripts under `scripts/tags` (from main directory -of the project) can be used. +The boundary file can be generated from POLY files as provided by Geofabrik or +given in hierarchy folder of OSM Scout Server (see +https://github.com/rinigus/osmscout-server/tree/master/scripts/import/hierarchy). +As a converter, [poly2geojson](https://github.com/frafra/poly2geojson/) can be +used. +Data can be filtered using data/priority.list and data/skip.list files. Those +list OSM tags in the form where tag type and its value are merged using `_`. Out +of the lists, the priority one gives locations that would be kept in the +database even without names associated with them for reverse geocoding. The +"skip" list allows to specify locations that would be dropped by location type. + +To generate larger amount of tags for the priority list, the scripts +under `scripts/tags` (from main directory of the project) can be used. + +## Used + +- Nominatim Docker https://github.com/mediagis/nominatim-docker/ +- For testing: Nominatim-UI https://github.com/osm-search/nominatim-ui +- poly2geojson: https://github.com/frafra/poly2geojson \ No newline at end of file diff --git a/importer/stylesheet/whitelist b/importer/data/priority.list similarity index 100% rename from importer/stylesheet/whitelist rename to importer/data/priority.list diff --git a/importer/duplicates.sql b/importer/duplicates.sql new file mode 100644 index 0000000..51973be --- /dev/null +++ b/importer/duplicates.sql @@ -0,0 +1,7 @@ +select f.*, o.name from +(select o.name, parent, postal_code, t.name as type_name, count(*) as cnt from object_primary o +join "type" t on t.id = o.type_id +group by o.name, parent, postal_code, t.name +order by cnt desc +limit 25) f +join object_primary o on f.parent=o.id diff --git a/importer/import_pbf.sh b/importer/import_pbf.sh deleted file mode 100755 index 7755a2c..0000000 --- a/importer/import_pbf.sh +++ /dev/null @@ -1,55 +0,0 @@ -#!/bin/bash - -set -e - -if [ "$#" -eq 1 ]; then - D=${1%-latest.osm.pbf} - D=${D%.osm.pbf} - D=${D%.pbf} - D=${D%.osm} - if [ "$1" == "$D" ] - then - D=$1-imported - fi - -elif [ "$#" -eq 2 ]; then - D=${2} -elif [ "$#" -eq 3 ]; then - D=${2} - EXTRA_POST=${3} -elif [ "$#" -eq 4 ]; then - D=${2} - EXTRA_POST=${3} - POSTAL=${4} -else - echo "Usage: ./import_pbf.sh openstreetmapfilename [imported-dir-name] [extra_postcodes] [libpostal_country]" - exit 0 -fi - -PROGPATH=$(dirname `realpath "$0"`) - -echo Make directory: $D -mkdir -p "$D"/tmp - -export LD_LIBRARY_PATH="$PROGPATH"/libosmscout/install/lib:$LD_LIBRARY_PATH - -####################################################### -# list the modules required to generate location index -# -# modules were found by logging the full import and checking -# which files provide and require the needed files -for module in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 17 22; do - "$PROGPATH"/libosmscout/install/bin/Import --altLangOrder en -s $module -e $module --typefile "$PROGPATH"/stylesheet/map.ost --destinationDirectory "$D"/tmp "$1" -done -#"$PROGPATH"/libosmscout/install/bin/Import --altLangOrder en --typefile "$PROGPATH"/stylesheet/map.ost --destinationDirectory "$D"/tmp "$1" - -if [ "$#" -eq 4 ]; then - "$PROGPATH"/importer "$D"/tmp "$D" "$PROGPATH"/stylesheet/whitelist "$EXTRA_POST" "$POSTAL" -elif [ "$#" -eq 3 ]; then - "$PROGPATH"/importer "$D"/tmp "$D" "$PROGPATH"/stylesheet/whitelist "$EXTRA_POST" -else - "$PROGPATH"/importer "$D"/tmp "$D" "$PROGPATH"/stylesheet/whitelist -fi - -echo Removing temporary files -rm -rf "$D"/tmp diff --git a/importer/install_deps.sh b/importer/install_deps.sh deleted file mode 100755 index 777f5e2..0000000 --- a/importer/install_deps.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash - -set -e - -INSTALLDIR=`pwd`/libosmscout/install - -git clone https://github.com/rinigus/libosmscout.git || true -(cd libosmscout && git checkout geocoder-nlp) - -mkdir -p libosmscout/build || true -(cd libosmscout/build && cmake -DCMAKE_INSTALL_PREFIX:PATH=$INSTALLDIR -DBUILD_WITH_OPENMP=OFF -DOSMSCOUT_BUILD_DOC_API=OFF -DOSMSCOUT_BUILD_BINDING_JAVA=OFF -DOSMSCOUT_BUILD_MAP_OPENGL=OFF -DOSMSCOUT_BUILD_MAP_AGG=OFF -DOSMSCOUT_BUILD_MAP_CAIRO=OFF -DOSMSCOUT_BUILD_MAP_SVG=OFF -DOSMSCOUT_BUILD_MAP_QT=OFF -DOSMSCOUT_BUILD_MAP_IOSX=OFF -DOSMSCOUT_BUILD_TESTS=OFF -DOSMSCOUT_BUILD_DEMOS=OFF -DOSMSCOUT_BUILD_BINDING_CSHARP=OFF -DOSMSCOUT_BUILD_CLIENT_QT=OFF -DOSMSCOUT_BUILD_TOOL_OSMSCOUT2=OFF -DOSMSCOUT_BUILD_TOOL_STYLEEDITOR=OFF -DGPERFTOOLS_USAGE=OFF -DOSMSCOUT_BUILD_TOOL_IMPORT=ON -DBUILD_IMPORT_TOOL_FOR_DISTRIBUTION=ON -DBUILD_SHARED_LIBS=OFF .. && make -j8 && make install) diff --git a/importer/src/config.h b/importer/src/config.h new file mode 100644 index 0000000..07e9549 --- /dev/null +++ b/importer/src/config.h @@ -0,0 +1,22 @@ +#ifndef GEOCODER_CONFIG_H +#define GEOCODER_CONFIG_H + +#include + +#define TEMPORARY "TEMPORARY" // set to empty if need to debug import + +/// if there are more expansions that specified, this object will be dropped from normalization +/// table +#define MAX_NUMBER_OF_EXPANSIONS 85 + +/// starting from this length, check wheher the string is suspicious +#define LENGTH_STARTING_SUSP_CHECK 200 + +#define MAX_COMMAS 10 /// maximal number of commas allowed in a name + +#define GEOCODER_IMPORTER_POSTGRES "GEOCODER_IMPORTER_POSTGRES" + +typedef uint64_t hindex; +typedef long long int sqlid; /// type used by IDs in SQLite + +#endif diff --git a/importer/src/hierarchy.cpp b/importer/src/hierarchy.cpp new file mode 100644 index 0000000..d0999d4 --- /dev/null +++ b/importer/src/hierarchy.cpp @@ -0,0 +1,195 @@ +#include "hierarchy.h" + +#include +#include + +Hierarchy::Hierarchy() {} + +Hierarchy::~Hierarchy() {} + +void Hierarchy::add_item(std::shared_ptr &item) +{ + hindex id = item->id(); + hindex parent_id = item->parent_id(); + if (m_items.count(id)) + throw std::runtime_error("Trying to insert item that has been inserted earlier"); + + m_items[id] = item; + auto p = m_items.find(parent_id); + if (p != m_items.end()) + p->second->add_child(item); + else + { + auto root_leaf = m_root.find(parent_id); + if (root_leaf == m_root.end()) + m_root[parent_id] = std::set >({ item }); + else + root_leaf->second.insert(item); + } + + // check if added item was a parent for someone and adjust root accordingly + auto root_leaf = m_root.find(id); + if (root_leaf != m_root.end()) + { + for (auto root_iter : root_leaf->second) + { + if (root_iter->parent_id() != id) + throw std::runtime_error("Mismatch between expected parent and root location"); + item->add_child(root_iter); + } + m_root.erase(root_leaf); + } +} + +bool Hierarchy::add_linked_item(std::shared_ptr &item) +{ + hindex linked = item->linked_id(); + auto tolink = m_items.find(linked); + if (tolink == m_items.end()) + { + std::cout << "Failed to find linked object " << linked << " required by " << item->id() + << ". Skipping linkage.\n"; + return false; + } + + tolink->second->add_linked(item); + return true; +} + +void Hierarchy::cleanup() +{ + for (auto root_iter = m_root.begin(); root_iter != m_root.end(); ++root_iter) + { + std::set > keep; + for (auto item : root_iter->second) + { + item->cleanup_children(); + if (item->keep()) + keep.insert(item); + else + keep.insert(item->children().begin(), item->children().end()); + } + root_iter->second = keep; + + // ensure that the parent is set correctly + for (auto item : root_iter->second) + item->set_parent(root_iter->first, true); + } +} + +void Hierarchy::set_country(const std::string &country, hindex id) +{ + if (!m_items.count(id)) + { + std::cout << "Missing country in the database: " << country << " / " << id << "\n"; + for (auto item : root_items()) + if (item->country() == country) + item->print_branch(0); + } + + auto parent = m_items[id]; + for (auto root_iter = m_root.begin(); root_iter != m_root.end(); ++root_iter) + { + std::set > remove; + for (auto item : root_iter->second) + if (item->country() == country && item->id() != id) + { + parent->add_child(item); + remove.insert(item); + } + std::cout << "Relocated to country: " << country << " - " << remove.size() << "\n"; + for (auto item : remove) + root_iter->second.erase(item); + } +} + +void Hierarchy::finalize() +{ + m_root_finalized = root_items(); + sqlid index = 1; + for (auto item : m_root_finalized) + { + index = item->index(index, 0); + item->set_parent(0); + } + + std::cout << "Hierarchy: active items: " << index + << " / cleared items: " << m_items.size() - index << "\n"; +} + +void Hierarchy::write(sqlite3pp::database &db) const +{ + for (auto item : m_root_finalized) + item->write(db); +} + +std::deque > Hierarchy::root_items() const +{ + std::deque > q; + for (auto root_iter = m_root.begin(); root_iter != m_root.end(); ++root_iter) + for (auto item : root_iter->second) + q.push_back(item); + return q; +} + +size_t Hierarchy::get_root_count() const +{ + size_t count{ 0 }; + for (auto i : m_root) + count += i.second.size(); + return count; +} + +hindex Hierarchy::get_next_nonzero_root_parent() const +{ + for (auto root_iter = m_root.begin(); root_iter != m_root.end(); ++root_iter) + if (root_iter->first) + return root_iter->first; + return 0; +} + +std::set Hierarchy::get_root_countries() const +{ + std::set missing; + for (auto item : root_items()) + missing.insert(item->country()); + return missing; +} + +bool Hierarchy::has_item(hindex id) const +{ + return m_items.count(id); +} + +void Hierarchy::print(bool full) const +{ + std::set root_ids; + for (auto item : root_items()) + { + if (full) + item->print_branch(0); + else + item->print_item(0); + root_ids.insert(item->id()); + } + + std::cout << (full ? "\n\n" : "") << "Root items:\n"; + for (auto id : root_ids) + std::cout << id << " "; + std::cout << "\n"; + std::cout << "Root items count: " << get_root_count() << "\n"; + + std::cout << "Countries: "; + for (auto c : get_root_countries()) + std::cout << c << " "; + std::cout << "\n"; +} + +void Hierarchy::print_root_with_parent_id(hindex parent_id) const +{ + for (auto item : root_items()) + { + if (item->parent_id() == parent_id) + item->print_item(0); + } +} diff --git a/importer/src/hierarchy.h b/importer/src/hierarchy.h new file mode 100644 index 0000000..8ceeba1 --- /dev/null +++ b/importer/src/hierarchy.h @@ -0,0 +1,47 @@ +#ifndef HIERARCHY_H +#define HIERARCHY_H + +#pragma once + +#include "hierarchyitem.h" + +#include +#include +#include +#include +#include +#include + +class Hierarchy +{ +public: + Hierarchy(); + ~Hierarchy(); + + void add_item(std::shared_ptr &item); + bool add_linked_item(std::shared_ptr &item); + void set_country(const std::string &country, hindex id); + void cleanup(); + void finalize(); + void write(sqlite3pp::database &db) const; + + size_t get_missing_count() const { return m_root.size(); } + size_t get_root_count() const; + bool has_item(hindex id) const; + + hindex get_next_nonzero_root_parent() const; + std::set get_root_countries() const; + + void print(bool full = true) const; + void print_root_with_parent_id(hindex parent_id) const; + +private: + std::deque > root_items() const; + +private: + std::map > m_items; + std::map > > m_root; + std::deque > m_root_finalized; +}; + +#endif diff --git a/importer/src/hierarchyitem.cpp b/importer/src/hierarchyitem.cpp new file mode 100644 index 0000000..ffb3e2a --- /dev/null +++ b/importer/src/hierarchyitem.cpp @@ -0,0 +1,288 @@ +#include "hierarchyitem.h" +#include "utils.h" + +#include +#include +#include + +std::set HierarchyItem::s_priority_types; +std::set HierarchyItem::s_skip_types; + +static std::string allowed_type_chars = "abcdefghijklmnopqrstuvwxyz_-"; + +HierarchyItem::HierarchyItem(const pqxx::row &row) +{ + m_id = row["place_id"].as(0); + m_linked_id = row["linked_place_id"].as(0); + m_parent_id = row["parent_place_id"].as(0); + m_country = row["country_code"].as(""); + m_type = geocoder_type(row["class"].as(""), row["type"].as("")); + m_housenumber = row["housenumber"].as(""); + m_postcode = row["postcode"].as(""); + m_latitude = row["latitude"].as(0); + m_longitude = row["longitude"].as(0); + m_osm_id = row["osm_id"].as(0); + + m_data_name = parse_to_map(row["name"].as("")); + m_data_extra = parse_to_map(row["extra"].as("")); + + set_names(); +} + +// trim from start (in place) +static inline void ltrim(std::string &s) +{ + s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int ch) { return !std::isspace(ch); })); +} + +// trim from end (in place) +static inline void rtrim(std::string &s) +{ + s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !std::isspace(ch); }).base(), + s.end()); +} + +// trim from both ends (in place) +static inline void trim(std::string &s) +{ + ltrim(s); + rtrim(s); +} + +static std::set load_list(const std::string &fname) +{ + std::set d; + if (fname.empty()) + return d; + + std::ifstream f(fname); + std::string line; + if (!f) + { + std::cerr << "Failed to open a file: " << fname << std::endl; + throw std::runtime_error("File cannot be opened"); + } + + while (std::getline(f, line)) + { + trim(line); + if (!line.empty()) + d.insert(line); + } + + return d; +} + +void HierarchyItem::load_priority_list(const std::string &fname) +{ + s_priority_types = load_list(fname); +} + +void HierarchyItem::load_skip_list(const std::string &fname) +{ + s_skip_types = load_list(fname); +} + +bool HierarchyItem::keep() const +{ + if (m_type.find_first_not_of(allowed_type_chars) != std::string::npos) + { + std::cout << "Dropping " << m_type << "\n"; + return false; + } + if (s_skip_types.count(m_type) > 0) + return false; + return !m_name.empty() || s_priority_types.count(m_type) > 0; +} + +bool HierarchyItem::is_duplicate(std::shared_ptr item) const +{ + if (s_priority_types.count(m_type) > 0) + return false; + + if (m_name != item->m_name || m_postcode != item->m_postcode) + return false; + + if (m_type == item->m_type || same_starts_with("building", m_type, item->m_type) + || same_starts_with("highway", m_type, item->m_type)) + return true; + + return false; +} + +void HierarchyItem::add_child(std::shared_ptr child) +{ + m_children.push_back(child); + child->set_parent(m_id); +} + +void HierarchyItem::add_linked(std::shared_ptr linked) +{ + m_data_name.insert(linked->m_data_name.begin(), linked->m_data_name.end()); + m_data_extra.insert(linked->m_data_extra.begin(), linked->m_data_extra.end()); + set_names(); +} + +void HierarchyItem::set_names() +{ + m_name = get_with_def(m_data_name, "name"); + m_name_extra.clear(); + if (!m_housenumber.empty()) + { + m_name_extra = m_name; + m_name = m_housenumber; + } + + if (m_name_extra.empty()) + m_name_extra = get_with_def(m_data_extra, "brand"); +} + +void HierarchyItem::set_parent(hindex parent, bool force) +{ + if (!force && m_parent_id != parent && m_parent_id != 0 && parent != 0) + { + std::cout << "New parent (" << parent << ") for " << m_id << " does not match old one (" + << m_parent_id << ")\n"; + throw std::runtime_error("Mismatch between new and old parent"); + } + m_parent_id = parent; + // for (auto c : m_children) + // c->set_parent(m_id, force); +} + +void HierarchyItem::cleanup_children(bool duplicate_only) +{ + // as a result of this run, children that are supposed to be kept are staying in children + // property. all disposed ones are still pointed to via Hierarchy map, but should not be accessed + // while moving along hierarchy for indexing or writing it + { + std::deque > children; + for (auto item : m_children) + { + item->cleanup_children(); + if (item->keep()) + children.push_back(item); + else + children.insert(children.end(), item->m_children.begin(), item->m_children.end()); + } + m_children = children; + } + + // check for duplicates + bool had_duplicates = false; + for (size_t child_index = 0; child_index < m_children.size(); ++child_index) + { + std::shared_ptr item = m_children[child_index]; + std::deque > children; + std::deque > duplicates; + + children.insert(children.end(), m_children.begin(), m_children.begin() + child_index + 1); + + for (size_t i = child_index + 1; i < m_children.size(); ++i) + if (m_children[i]->is_duplicate(item)) + duplicates.push_back(m_children[i]); + else + children.push_back(m_children[i]); + + // merge duplicates + for (auto &i : duplicates) + { + had_duplicates = true; + item->add_linked(i); + item->m_children.insert(item->m_children.end(), i->m_children.begin(), + i->m_children.end()); + for (auto &i_children : i->m_children) + i_children->set_parent(item->m_id, true); + } + + if (had_duplicates) + item->cleanup_children(true); + + m_children = children; + } + + // set parent, forced + for (auto item : m_children) + item->set_parent(m_id, true); +} + +sqlid HierarchyItem::index(sqlid idx, sqlid parent) +{ + if (!keep()) + throw std::runtime_error("Trying to index a location that was not supposed to be kept"); + m_my_index = idx; + m_parent_index = parent; + ++idx; + for (auto item : m_children) + idx = item->index(idx, m_my_index); + m_last_child_index = idx - 1; + return idx; +} + +void HierarchyItem::write(sqlite3pp::database &db) const +{ + if (!keep()) + throw std::runtime_error("Trying to write a location that was not supposed to be kept"); + + // primary data + std::string name_en = get_with_def(m_data_name, "name:en"); + std::string phone = get_with_def(m_data_extra, "phone"); + std::string website = get_with_def(m_data_extra, "website"); + + { + sqlite3pp::command cmd(db, "INSERT INTO object_primary_tmp (id, postgres_id, name, name_extra, " + "name_en, phone, postal_code, website, parent, longitude, " + "latitude) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"); + cmd.binder() << m_my_index << (int)m_id << m_name << m_name_extra << name_en << phone + << m_postcode << website << m_parent_index << m_longitude << m_latitude; + if (cmd.execute() != SQLITE_OK) + std::cerr << "WriteSQL : error inserting primary data for " << m_id << ", " << m_my_index + << "\n"; + } + + // type + { + std::string command + //= "INSERT INTO object_type_tmp (prim_id, type) VALUES (?, \"" + type + "\")"; + = "INSERT INTO object_type_tmp (prim_id, type) VALUES (?, ?)"; + sqlite3pp::command cmd(db, command.c_str()); + cmd.binder() << m_my_index << m_type; + if (cmd.execute() != SQLITE_OK) + std::cerr << "WriteSQL: error inserting type for " << m_id << ", " << m_my_index << "\n"; + } + + // hierarchy + if (m_last_child_index > m_my_index) + { + sqlite3pp::command cmd(db, "INSERT INTO hierarchy (prim_id, last_subobject) VALUES (?, ?)"); + cmd.binder() << m_my_index << m_last_child_index; + if (cmd.execute() != SQLITE_OK) + std::cerr << "WriteSQL: error inserting hierarchy for " << m_id << ", " << m_my_index + << " - " << m_last_child_index << "\n"; + } + + // children + for (const auto &c : m_children) + c->write(db); +} + +void HierarchyItem::print_item(unsigned int offset) const +{ + std::cout << std::string(offset, ' ') << "- " << m_id << " "; + if (!m_housenumber.empty()) + std::cout << "house " << m_housenumber << " "; + for (const auto &i : m_data_name) + std::cout << i.first << ": " << i.second << " "; + std::cout << "(" << m_my_index << " " << m_last_child_index << ": " + << m_last_child_index - m_my_index << ": " << m_parent_id << ", " << m_country + << ", osmid=" << m_osm_id << ")\n"; + if (m_children.size()) + std::cout << std::string(offset + 2, ' ') << "|\n"; +} + +void HierarchyItem::print_branch(unsigned int offset) const +{ + print_item(offset); + for (auto c : m_children) + c->print_branch(offset + 3); +} diff --git a/importer/src/hierarchyitem.h b/importer/src/hierarchyitem.h new file mode 100644 index 0000000..89657e6 --- /dev/null +++ b/importer/src/hierarchyitem.h @@ -0,0 +1,72 @@ +#ifndef HIERARCHYITEM_H +#define HIERARCHYITEM_H + +#include "config.h" + +#include +#include +#include +#include +#include +#include + +class HierarchyItem +{ +public: + HierarchyItem(const pqxx::row &row); + ~HierarchyItem(){}; + + hindex id() const { return m_id; } + hindex linked_id() const { return m_linked_id; } + hindex parent_id() const { return m_parent_id; } + const std::string &country() const { return m_country; } + bool keep() const; + + const std::deque > &children() { return m_children; } + + void add_child(std::shared_ptr child); + void add_linked(std::shared_ptr linked); + void set_parent(hindex parent, bool force = false); + void cleanup_children(bool duplicate_only = false); + sqlid index(sqlid idx, sqlid parent); + void write(sqlite3pp::database &db) const; + + void print_item(unsigned int offset) const; + void print_branch(unsigned int offset) const; + +public: + static void load_priority_list(const std::string &fname); + static void load_skip_list(const std::string &fname); + +protected: + void set_names(); + bool is_duplicate(std::shared_ptr item) const; + +private: + hindex m_id; + hindex m_linked_id{ 0 }; + hindex m_parent_id; + sqlid m_my_index; + sqlid m_parent_index; + sqlid m_last_child_index; + + std::string m_type; + float m_latitude; + float m_longitude; + uint64_t m_osm_id; + std::string m_country; + std::string m_postcode; + std::string m_housenumber; + std::string m_name; + std::string m_name_extra; + + std::map m_data_name; + std::map m_data_extra; + + std::deque > m_children; + + static std::set s_priority_types; + static std::set s_skip_types; +}; + +#endif diff --git a/importer/src/main.cpp b/importer/src/main.cpp index 99748f6..4c0da69 100644 --- a/importer/src/main.cpp +++ b/importer/src/main.cpp @@ -3,1101 +3,334 @@ /// num_languages in geocoder.c ///////////////////////////////////////////////////////////////////// -#include -#include -#include -#include -#include - +#include "config.h" #include "geocoder.h" +#include "hierarchy.h" +#include "normalization.h" -#include -#include - -#include -#include - -#include -#include -#include +#include +#include #include +#include +#include #include -#include +#include +#include #include +#include +#include +#include +#include +#include -#include - -#define MAX_NUMBER_OF_EXPANSIONS 85 /// if there are more expansions - /// that specified, this object - /// will be dropped from - /// normalization table - -#define LENGTH_STARTING_SUSP_CHECK 200 /// starting from this length, - /// check wheher the string is - /// suspicious - -#define MAX_COMMAS 10 /// maximal number of commas allowed in a name - -#define TEMPORARY "TEMPORARY" // set to empty if need to debug import - -typedef long long int sqlid; /// type used by IDs in SQLite - - -//////////////////////////////////////////////////////////////// -/// White list of POI types to be tracked even without name -std::set< std::string > m_poi_types_whitelist; - -//////////////////////////////////////////////////////////////// -/// Track locations and POIs to avoid adding them in duplicate -/// via location and POI visitors -std::set< std::string > m_address_poi_inserted; - -/////////////////////////////////////////////////////////// -/// Track relationship between objects and fills hierarchy -/// table in the resulting database -class IDTracker -{ -public: - sqlid next() { NextID++; return NextID-1; } - - sqlid get_id(osmscout::FileOffset offset) - { - auto indx = m_ids.find(offset); - if (indx == m_ids.end()) return 0; - return indx->second; - } - - void set_id(osmscout::FileOffset fo, sqlid id) { m_ids[fo] = id; } - - void set_parent(sqlid id, sqlid parent, bool track_children = false) - { - if ( track_children ) - { - m_last_child[id] = 0; - m_parent[id] = parent; - } - - m_last_child[parent] = id; - if ( m_parent.find(parent) != m_parent.end() ) - set_parent(id, m_parent[parent], false); - } - - void set_parent(sqlid id, osmscout::FileOffset foparent, bool track_children = false) - { - set_parent(id, get_id(foparent), track_children); - } - - void write_hierarchy(sqlite3pp::database &db) - { - for (auto a: m_last_child) - { - sqlid id = a.first; - sqlid last_id = a.second; - if (id >= last_id) continue; - - sqlite3pp::command cmd(db, "INSERT INTO hierarchy (prim_id, last_subobject) VALUES (?, ?)"); - cmd.binder() << id << last_id; - cmd.execute(); - } - } - -protected: - sqlid NextID = 1; - - std::map m_ids; - std::map m_parent; - std::map m_last_child; -}; - -//////////////////////////////////////////////////////////////// -/// Global variable tracking IDs and administrative relationship -IDTracker IDs; - -////////////////////////////////////////////////////////////// -/// Additional postal codes - -struct PostalCode { - std::string code; - double latitude, longitude; -}; - -std::map< osmscout::FileOffset, std::vector > additional_postal_codes; - -////////////////////////////////////////////////////////////// -/// libosmscout helper functions - -void GetObjectTypeCoor( const osmscout::DatabaseRef& database, - const osmscout::ObjectFileRef& object, - std::string &type, - osmscout::GeoCoord& coordinates ) -{ - if (object.GetType()==osmscout::RefType::refNode) - { - osmscout::NodeRef node; - - if (database->GetNodeByOffset(object.GetFileOffset(), - node)) { - type=node->GetType()->GetName(); - coordinates = node->GetCoords(); - } - } - else if (object.GetType()==osmscout::RefType::refArea) { - osmscout::AreaRef area; - - if (database->GetAreaByOffset(object.GetFileOffset(), - area)) { - type=area->GetType()->GetName(); - area->GetCenter(coordinates); - } - } - else if (object.GetType()==osmscout::RefType::refWay) { - osmscout::WayRef way; - - if (database->GetWayByOffset(object.GetFileOffset(), - way)) { - type=way->GetType()->GetName(); - coordinates = way->GetCoord(way->nodes.size()/2); - } - } -} - - -///////////////////////////////////////////////////////////////////// -/// WARNING: When adding new languages, increase the language count -/// num_languages in geocoder.c -///////////////////////////////////////////////////////////////////// - -typedef osmscout::FeatureValueReader NameAltReader; -typedef osmscout::FeatureValueReader NameReader; -typedef osmscout::FeatureValueReader PhoneReader; -typedef osmscout::FeatureValueReader PostalCodeReader; -typedef osmscout::FeatureValueReader WebsiteReader; -NameAltReader *nameAltReader{NULL}; -NameReader *nameReader{NULL}; -PhoneReader *phoneReader{NULL}; -PostalCodeReader *postalCodeReader{NULL}; -WebsiteReader *websiteReader{NULL}; -void GetFeatures(const osmscout::FeatureValueBuffer &features, std::string &name, std::string &name_en, std::string &phone, std::string &postal_code, std::string &website) -{ - osmscout::NameFeatureValue *nameValue=nameReader->GetValue(features); - if (nameValue != NULL) - name = nameValue->GetName(); - - osmscout::NameAltFeatureValue *nameAltValue=nameAltReader->GetValue(features); - if (nameAltValue != NULL) - name_en = nameAltValue->GetNameAlt(); - - osmscout::PhoneFeatureValue *phoneValue=phoneReader->GetValue(features); - if (phoneValue != NULL) - phone = phoneValue->GetPhone(); - - osmscout::PostalCodeFeatureValue *postalCodeValue=postalCodeReader->GetValue(features); - if (postalCodeValue != NULL) - postal_code = GeoNLP::Postal::normalize_postalcode(postalCodeValue->GetPostalCode()); - - osmscout::WebsiteFeatureValue *websiteValue=websiteReader->GetValue(features); - if (websiteValue != NULL) - website = websiteValue->GetWebsite(); -} - -void GetObjectFeatures( const osmscout::DatabaseRef& database, - const osmscout::ObjectFileRef& object, - std::string &name, - std::string &name_en, - std::string &phone, - std::string &postal_code, - std::string &website, - bool reset = false ) -{ - if (reset) - { - name = std::string(); - name_en = std::string(); - phone = std::string(); - postal_code = std::string(); - website = std::string(); - } - - if (object.GetType()==osmscout::RefType::refNode) - { - osmscout::NodeRef node; - - if (database->GetNodeByOffset(object.GetFileOffset(), - node)) - GetFeatures(node->GetFeatureValueBuffer(), name, name_en, phone, postal_code, website); - } - else if (object.GetType()==osmscout::RefType::refArea) - { - osmscout::AreaRef area; - - if (database->GetAreaByOffset(object.GetFileOffset(), - area)) - GetFeatures(area->GetFeatureValueBuffer(), name, name_en, phone, postal_code, website); - } - else if (object.GetType()==osmscout::RefType::refWay) - { - osmscout::WayRef way; - - if (database->GetWayByOffset(object.GetFileOffset(), - way)) - GetFeatures(way->GetFeatureValueBuffer(), name, name_en, phone, postal_code, website); - } -} - - -//////////////////////////////////////////////////////////////////////////// -/// SQLite helper functions -void write_type(sqlite3pp::database &db, sqlid id, std::string type) -{ - std::string command = "INSERT INTO object_type_tmp (prim_id, type) VALUES (?, \"" + type + "\")"; - sqlite3pp::command cmd(db, command.c_str()); - cmd.binder() << id; - if (cmd.execute() != SQLITE_OK) - std::cerr << "WriteSQLType: error inserting " << id << " " << type << "\n"; -} - +using json = nlohmann::json; +namespace po = boost::program_options; //////////////////////////////////////////////////////////////////////////// -/// Visitors used while going through libosmscout database +// MAIN -////////////////////////////////////////////////// -class AddrVisitor: public osmscout::AddressVisitor +int main(int argc, char *argv[]) { -public: - AddrVisitor(osmscout::DatabaseRef &database, sqlite3pp::database &db, sqlid parent): - m_database(database), - m_db(db), - m_parent(parent) - {} - - virtual bool Visit(const osmscout::AdminRegion &adminRegion, const osmscout::PostalArea &postalArea, const osmscout::Location &location, const osmscout::Address &address) - { - std::string type; - std::string name; - std::string name_en; - std::string phone; - std::string postal_code = GeoNLP::Postal::normalize_postalcode(postalArea.name); - std::string website; - osmscout::GeoCoord coordinates; - std::string scoutid = address.object.GetName(); - sqlid id; - - // check if we have this object inserted already - if (m_address_poi_inserted.count(scoutid) > 0) - { - // GetObjectFeatures(m_database, address.object, name, name_en, phone, website); - // std::cout << "AddrVisitor: " << address.name << " " << name << " " << scoutid << " inserted already\n"; - return true; - } - - // new object, insert into set - m_address_poi_inserted.insert(scoutid); - - GetObjectTypeCoor(m_database, address.object, type, coordinates); - id = IDs.next(); - - GetObjectFeatures(m_database, address.object, name, name_en, phone, postal_code, website); - - sqlite3pp::command cmd(m_db, "INSERT INTO object_primary_tmp (id, scoutid, name, name_extra, name_en, phone, postal_code, website, parent, longitude, latitude) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"); - cmd.binder() << id - << scoutid - << address.name - << name - << name_en - << phone - << postal_code - << website - << m_parent - << coordinates.GetLon() - << coordinates.GetLat(); - - if (cmd.execute() != SQLITE_OK) - std::cerr << "Error inserting addr " << address.name << "\n"; - - IDs.set_parent(id, m_parent); - write_type(m_db, id, type); - - return true; - } - -protected: - osmscout::DatabaseRef &m_database; - sqlite3pp::database &m_db; - sqlid m_parent; -}; + const int printout_step = 100000; -////////////////////////////////////////////////// -class PoiVisitor: public osmscout::POIVisitor -{ -public: - PoiVisitor(osmscout::DatabaseRef &database, sqlite3pp::database &db, sqlid parent): - m_database(database), - m_db(db), - m_parent(parent) - {} + std::string polyjson; + std::string database_path; + std::string postal_country_parser; + std::string postal_address_parser_dir; + std::string type_priority_list; + std::string type_skip_list; + bool verbose_address_expansion = false; - virtual bool Visit(const osmscout::AdminRegion &adminRegion, const osmscout::POI &poi) { - std::string type; - std::string name; - std::string name_en; - std::string phone; - std::string postal_code; - std::string website; - osmscout::GeoCoord coordinates; - std::string scoutid = poi.object.GetName(); - sqlid id; - - // check if we have this object inserted already - if (m_address_poi_inserted.count(scoutid) > 0) + po::options_description generic("Geocoder NLP importer options"); + generic.add_options()("help,h", "Help message")("version,v", "Data format version"); + generic.add_options()("poly,p", po::value(&polyjson), + "Boundary of the imported region in GeoJSON format"); + generic.add_options()("postal-country", po::value(&postal_country_parser), + "libpostal country preference for this database"); + generic.add_options()( + "postal-address", po::value(&postal_address_parser_dir), + "libpostal address parser directory. If not specified, global libpostal parser directory " + "preference is used."); + generic.add_options()( + "priority", po::value(&type_priority_list), + "File with OSM tags that are kept even if there is no name associated with the location"); + generic.add_options()( + "skip", po::value(&type_skip_list), + "File with OSM tags for locations that should be dropped even if there is a name " + "associated with the location"); + generic.add_options()("verbose", "Verbose address expansion"); + + po::options_description hidden("Hidden options"); + hidden.add_options()("output-directory", po::value(&database_path), + "Output directory for imported database"); + + po::positional_options_description p; + p.add("output-directory", 1); + + po::options_description cmdline_options; + cmdline_options.add(generic).add(hidden); + + po::variables_map vm; + try { - // GetObjectFeatures(m_database, poi.object, name, name_en, phone, website); - // std::cout << "POIVisitor: " << poi.name << " " << name << " " << scoutid << " inserted already\n"; - return true; + po::store(po::command_line_parser(argc, argv).options(cmdline_options).positional(p).run(), + vm); + po::notify(vm); } - - // allow POIs without name only if they are of white-listed types - GetObjectTypeCoor(m_database, poi.object, type, coordinates); - GetObjectFeatures(m_database, poi.object, name, name_en, phone, postal_code, website); - - if (name.empty() && m_poi_types_whitelist.count(type) == 0) - return true; - - // new object, insert into set - m_address_poi_inserted.insert(scoutid); - - id = IDs.next(); - - sqlite3pp::command cmd(m_db, "INSERT INTO object_primary_tmp (id, scoutid, name, name_extra, name_en, phone, postal_code, website, parent, longitude, latitude) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"); - cmd.binder() << id - << scoutid - << poi.name - << name - << name_en - << phone - << postal_code - << website - << m_parent - << coordinates.GetLon() - << coordinates.GetLat(); - - if (cmd.execute() != SQLITE_OK) - std::cerr << "Error inserting POI " << poi.name << "\n"; - - IDs.set_parent(id, m_parent); - write_type(m_db, id, type); - - return true; - } - -protected: - osmscout::DatabaseRef &m_database; - sqlite3pp::database &m_db; - sqlid m_parent; -}; - -/////////////////////////////////////////////////////// -class LocVisitor: public osmscout::LocationVisitor -{ -public: - LocVisitor(osmscout::DatabaseRef &database, sqlite3pp::database &db, sqlid parent): - m_database(database), - m_db(db), - m_parent(parent) - {} - - virtual bool Visit(const osmscout::AdminRegion &adminRegion, const osmscout::PostalArea &postalArea, const osmscout::Location &location) - { - std::string type; - std::string name; - std::string name_en; - std::string phone; - std::string postal_code = GeoNLP::Postal::normalize_postalcode(postalArea.name); - std::string website; - osmscout::GeoCoord coordinates; - sqlid id; - sqlid locID; - - if ( location.objects.size() < 1 ) + catch (std::exception &e) { - std::cout << "Location " << location.name << " has no objects, skipping\n"; - return true; + std::cerr << "Error while parsing options: " << e.what() << "\n\n"; + std::cerr << generic << "\n"; } - GetObjectTypeCoor(m_database, location.objects[ location.objects.size()/2 ], type, coordinates); - locID = id = IDs.next(); - IDs.set_id(location.locationOffset, id); - - GetObjectFeatures(m_database, location.objects[ location.objects.size()/2 ], name, name_en, phone, postal_code, website); - - sqlite3pp::command cmd(m_db, "INSERT INTO object_primary_tmp (id, name, name_extra, name_en, phone, postal_code, website, parent, longitude, latitude) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"); - cmd.binder() << id - << location.name - << name - << name_en - << phone - << postal_code - << website - << m_parent - << coordinates.GetLon() - << coordinates.GetLat(); - - if (cmd.execute() != SQLITE_OK) - std::cerr << "Error inserting location " << location.name << "\n"; - - IDs.set_parent(id, m_parent, true); - write_type(m_db, id, type); - - AddrVisitor addr(m_database, m_db, locID); - m_database->GetLocationIndex()->VisitAddresses(adminRegion, postalArea, location, addr); - - return true; - } - -protected: - osmscout::DatabaseRef &m_database; - sqlite3pp::database &m_db; - sqlid m_parent; -}; - - -/////////////////////////////////////////////////////// -class AdminVisitor: public osmscout::AdminRegionVisitor -{ -public: - - AdminVisitor(osmscout::DatabaseRef &database, sqlite3pp::database &db): - m_database(database), - m_db(db) - {} - - virtual osmscout::AdminRegionVisitor::Action Visit(const osmscout::AdminRegion ®ion) - { - // insert region first - std::string type; - std::string name; - std::string name_en; - std::string phone; - std::string postal_code; - std::string website; - osmscout::GeoCoord coordinates; - sqlid id; - sqlid regionID; - - GetObjectTypeCoor(m_database, region.object, type, coordinates); - regionID = id = IDs.next(); - IDs.set_id(region.regionOffset, id); - - GetObjectFeatures(m_database, region.object, name, name_en, phone, postal_code, website); - - sqlite3pp::command cmd(m_db, "INSERT INTO object_primary_tmp (id, name, name_extra, name_en, phone, postal_code, website, parent, longitude, latitude) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"); - cmd.binder() << id - << region.name - << name - << name_en - << phone - << postal_code - << website - << IDs.get_id(region.parentRegionOffset) - << coordinates.GetLon() - << coordinates.GetLat(); - IDs.set_parent(id, region.parentRegionOffset, true); - - if (cmd.execute() != SQLITE_OK) - std::cerr << "Error inserting region " << region.name << "\n"; - - write_type(m_db, id, type); - - // record region aliases - std::deque saved_names; saved_names.push_back(region.name); - if (region.aliasName.length() > 0 && - find(saved_names.begin(), - saved_names.end(), - region.aliasName) != saved_names.end() ) + if (vm.count("help")) { - sqlite3pp::command cmd(m_db, "INSERT INTO object_primary_tmp (id, name, name_extra, name_en, phone, postal_code, website, parent, longitude, latitude) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"); - - GetObjectTypeCoor(m_database, region.aliasObject, type, coordinates); - id = IDs.next(); - - GetObjectFeatures(m_database, region.aliasObject, name, name_en, phone, postal_code, website, true); - - cmd.binder() << id - << region.aliasName - << name - << name_en - << phone - << postal_code - << website - << IDs.get_id(region.parentRegionOffset) - << coordinates.GetLon() - << coordinates.GetLat(); - if (cmd.execute() != SQLITE_OK) - std::cerr << "Error inserting region alias " << region.aliasName << "\n"; - - write_type(m_db, id, type); - IDs.set_parent(id, region.parentRegionOffset); + std::cout << "Geocoder NLP importer:\n\n" + << "Call as\n\n " << argv[0] << " output-directory\n" + << "\nwhere output-directory is a directory for imported database.\n\n" + << generic << "\n"; + return 0; } - for (auto alias: region.aliases) + if (vm.count(("version"))) { - sqlite3pp::command cmd(m_db, "INSERT INTO object_primary_tmp (id, name, name_extra, name_en, phone, postal_code, website, parent, longitude, latitude) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"); - - osmscout::ObjectFileRef object(alias.objectOffset, osmscout::refNode); - GetObjectTypeCoor(m_database, object, type, coordinates); - id = IDs.next(); - - GetObjectFeatures(m_database, object, name, name_en, phone, postal_code, website, true); - - cmd.binder() << id - << alias.name - << name - << name_en - << phone - << postal_code - << website - << regionID - << coordinates.GetLon() - << coordinates.GetLat(); - if (cmd.execute() != SQLITE_OK) - std::cerr << "Error inserting region alias 2 " << alias.name << "\n"; - - write_type(m_db, id, type); - IDs.set_parent(id, regionID); - } - - LocVisitor loc(m_database, m_db, regionID); - for (const osmscout::PostalArea &parea: region.postalAreas) - { - m_database->GetLocationIndex()->VisitLocations(region, parea, loc, false); + std::cout << GeoNLP::Geocoder::version << "\n"; + return 0; } - PoiVisitor poi(m_database, m_db, regionID); - m_database->GetLocationIndex()->VisitPOIs(region, poi, false); + if (vm.count("verbose")) + verbose_address_expansion = true; - // Add extra postal codes - osmscout::FileOffset rid = region.object.GetFileOffset(); - if (additional_postal_codes.count(rid) > 0) + if (!vm.count("poly")) { - const auto &v = additional_postal_codes.at(rid); - for (auto c: v) - { - sqlite3pp::command cmd(m_db, "INSERT INTO object_primary_tmp (id, postal_code, parent, longitude, latitude) VALUES (?, ?, ?, ?, ?)"); - id = IDs.next(); - cmd.binder() << id - << c.code - << regionID - << c.longitude - << c.latitude; - if (cmd.execute() != SQLITE_OK) - std::cerr << "Error inserting additional postal code " << c.code << "\n"; - - write_type(m_db, id, "postal code"); - IDs.set_parent(id, regionID); - } + std::cerr << "Boundary of the imported region in GeoJSON format is missing\n"; + return -1; } + } - return osmscout::AdminRegionVisitor::visitChildren; - }; - -protected: - osmscout::DatabaseRef &m_database; - sqlite3pp::database &m_db; -}; - -//////////////////////////////////////////////////////////////////////////// -/// Libpostal normalization with search string expansion -void normalize_libpostal(sqlite3pp::database& db, std::string address_expansion_dir, bool verbose) -{ - struct tonorm + // load GeoJSON for surrounding (multi)polygon from poly.json + std::string border; { - std::string name; - sqlid id; - }; + std::ifstream fin(polyjson); + std::istreambuf_iterator begin(fin), end; + std::string b(begin, end); + border = b; + } - std::deque data; - sqlite3pp::query qry(db, "SELECT id, name, name_extra, name_en FROM object_primary_tmp"); - for (auto v : qry) + if (border.size()) { - tonorm d; - sqlid id; - char const *name, *name_extra, *name_en; - v.getter() >> id >> name >> name_extra >> name_en; - - if (name == nullptr) - continue; // no need to add empty name into search index - - d.id = id; - - d.name = name; data.push_back(d); - if (name_extra) { d.name = name_extra; data.push_back(d); } - if (name_en) { d.name = name_en; data.push_back(d); } + json j = json::parse(border); + border = j["geometry"].dump(); + std::cout << "Loaded border GeoJSON. Geometry string length: " << border.size() << "\n"; } - // make a new table for normalized names - db.execute("DROP TABLE IF EXISTS normalized_name"); - db.execute("CREATE " TEMPORARY " TABLE normalized_name (prim_id INTEGER, name TEXT NOT NULL, PRIMARY KEY (name, prim_id))"); + HierarchyItem::load_priority_list(type_priority_list); + HierarchyItem::load_skip_list(type_skip_list); - // load libpostal - if (!libpostal_setup() || !libpostal_setup_language_classifier()) - { - std::cerr << "Failure to load libpostal" << std::endl; - return; - } + Hierarchy hierarchy; - std::vector aed(address_expansion_dir.begin(), address_expansion_dir.end()); - aed.push_back(0); - if ( (address_expansion_dir.empty() && !libpostal_setup_parser() ) || - (!address_expansion_dir.empty() && !libpostal_setup_parser_datadir(aed.data())) ) + std::string postgres_dblink; + const char *env = std::getenv(GEOCODER_IMPORTER_POSTGRES); + if (env) + postgres_dblink = env; + else { - std::cerr << "Failure to load libpostal parser" << std::endl; - return; - } - - // normalize all names - size_t num_expansions; - size_t num_doubles_dropped = 0; - libpostal_normalize_options_t options = libpostal_get_default_options(); - std::vector charbuff; - for (tonorm &d: data) - { - charbuff.resize(d.name.length() + 1); - std::copy(d.name.c_str(), d.name.c_str() + d.name.length() + 1, charbuff.begin()); - - if (verbose) std::cout << d.name << ": " << std::flush; - - // check for sanity before we proceed with expansion - if ( d.name.length() > LENGTH_STARTING_SUSP_CHECK ) - { - size_t digits_space = 0; - for (size_t i=0; i < d.name.length(); ++i) - if ( std::isdigit( charbuff[i] ) || std::isspace( charbuff[i] ) ) - digits_space++; - - if ( (digits_space*1.0) / d.name.length() > 0.5 ) - { - std::cout << "Warning: dropping suspicious name: " << d.name << "\n"; - continue; - } - } - - // check if there are too many commas - if ( std::count(d.name.begin(), d.name.end(), ',') > MAX_COMMAS ) - { - std::cout << "Warning: dropping suspicious name - too many commas: " << d.name << "\n"; - continue; - } - - // insert normalized, but not expanded string - { - char *normalized = libpostal_normalize_string(charbuff.data(), LIBPOSTAL_NORMALIZE_DEFAULT_STRING_OPTIONS); - if (normalized != NULL) - { - sqlite3pp::command cmd(db, "INSERT INTO normalized_name (prim_id, name) VALUES (?,?)"); - std::string s = normalized; - cmd.binder() << d.id - << s; - if (cmd.execute() != SQLITE_OK) - { - // std::cerr << "Error inserting: " << d.id << " " << s << std::endl; - num_doubles_dropped++; - } - - free(normalized); - } - } - - char **expansions = libpostal_expand_address(charbuff.data(), options, &num_expansions); - - if ( num_expansions > MAX_NUMBER_OF_EXPANSIONS ) - { - std::cout << "Warning: large number [" << num_expansions << "] of normalization expansions of " << d.name - << " - dropping it from the table [" << d.id << "]\n"; - // for (size_t i=0; i < 10 && i < num_expansions; i++) - // std::cout << " example expansion: " << expansions[i] << "\n"; - // std::cout << "\n"; - - continue; // don't insert it, its probably wrong anyway - } - - for (size_t i = 0; i < num_expansions; i++) - { - sqlite3pp::command cmd(db, "INSERT INTO normalized_name (prim_id, name) VALUES (?,?)"); - std::string s = expansions[i]; - cmd.binder() << d.id - << s; - if (cmd.execute() != SQLITE_OK) - { - // std::cerr << "Error inserting: " << d.id << " " << s << std::endl; - num_doubles_dropped++; - } - - // to cover the street names that have Dr. or the firstname - // in the front of the mainly used name, add substrings into - // the normalized table as well - const size_t max_substrings = 2; - size_t pos = 1; - for (size_t sbs=0; sbs < max_substrings && pos < s.length(); ++sbs) - { - bool spacefound = false; - for (; pos'housenumber',housenumber) AS housenumber, postcode, ST_X(centroid) as " + "longitude, ST_Y(centroid) as latitude, osm_id " + "from placex pl left join lateral " + "(with recursive prec as (select place_id, linked_place_id " + "from placex where pl.parent_place_id=placex.place_id union select p.place_id, " + "p.linked_place_id from placex p join prec on p.place_id=prec.linked_place_id) select " + "place_id as parent_place_resolved from prec where linked_place_id is null limit 1) as " + "pres on true "; + + // load primary hierarchy { - sqlite3pp::query qry(db, "SELECT name FROM normalized_name"); - for (auto v : qry) + pqxx::result r = txn.exec_params( + base_query + + "where linked_place_id IS NULL and ST_Intersects(ST_GeomFromGeoJSON($1), " + "geometry) order by admin_level", + border); + size_t count = 0; + for (const pqxx::row &row : r) { - std::string name; - v.getter() >> name; - keyset.push_back(name.c_str()); + ++count; + std::shared_ptr item = std::make_shared(row); + hierarchy.add_item(item); + if (count % printout_step == 0) + std::cout << "Imported records: " << count + << "; Root elements: " << hierarchy.get_root_count() + << "; Missing parents: " << hierarchy.get_missing_count() << std::endl; } } - marisa::Trie trie; - trie.build(keyset); - trie.save(GeoNLP::Geocoder::name_normalized_trie(path).c_str()); - - struct norm - { - std::string name; - sqlid prim_id; - }; - - std::deque data; + // load all linked places and merge with the primary ones { - sqlite3pp::query qry(db, "SELECT name, prim_id FROM normalized_name"); - for (auto v : qry) + pqxx::result r = txn.exec_params( + base_query + + "where linked_place_id IS NOT NULL and ST_Intersects(ST_GeomFromGeoJSON($1), " + "geometry) order by admin_level", + border); + size_t count = 0; + size_t failed = 0; + for (const pqxx::row &row : r) { - norm d; - v.getter() >> d.name >> d.prim_id; - data.push_back(d); + ++count; + std::shared_ptr item = std::make_shared(row); + if (!hierarchy.add_linked_item(item)) + failed++; + if (count % printout_step == 0) + std::cout << "Imported linked records: " << count + << "; Root elements: " << hierarchy.get_root_count() + << "; Missing parents: " << hierarchy.get_missing_count() << std::endl; } + std::cout << "Imported linked records: " << count << " / failed to import: " << failed + << "; Root elements: " << hierarchy.get_root_count() + << "; Missing parents: " << hierarchy.get_missing_count() << std::endl; } - std::map< GeoNLP::Geocoder::index_id_key, std::vector > bdata; - for (auto d: data) + // find missing parents for root nodes + std::cout << "Fill missing hierarchies. Root size: " << hierarchy.get_root_count() << "\n"; + for (hindex parent = hierarchy.get_next_nonzero_root_parent(); parent;) { - marisa::Agent agent; - agent.set_query(d.name.c_str()); - if (trie.lookup(agent)) - { - GeoNLP::Geocoder::index_id_key k = agent.key().id(); - if ( bdata.count(k) == 0 ) bdata[k] = std::vector(); - bdata[k].push_back( d.prim_id ); - } - else + pqxx::result r = txn.exec_params(base_query + "where place_id=$1", parent); + bool found = false; + for (auto row : r) { - std::cerr << "Error: cannot find in MARISA trie: " << d.name << std::endl; + std::shared_ptr item = std::make_shared(row); + hierarchy.add_item(item); + found = true; } - } - - { - // create the database object - kyotocabinet::HashDB db; - - db.tune_options(kyotocabinet::HashDB::TSMALL | kyotocabinet::HashDB::TLINEAR); - db.tune_alignment(0); - db.tune_defrag(8); - - // open the database - if (!db.open(GeoNLP::Geocoder::name_normalized_id(path).c_str(), - kyotocabinet::HashDB::OWRITER | kyotocabinet::HashDB::OCREATE)) - { - std::cerr << "open error: " << db.error().name() << std::endl; - return; - } - - std::vector keys; - for (auto a: bdata) - keys.push_back( GeoNLP::Geocoder::make_id_key(a.first) ); - - std::sort( keys.begin(), keys.end() ); - - for (auto key: keys) - { - std::vector &d = bdata[GeoNLP::Geocoder::get_id_key(key)]; - std::sort(d.begin(), d.end()); - std::string value = GeoNLP::Geocoder::make_id_value( d ); - if (!db.set(key, value)) - { - std::cerr << "set error: " << db.error().name() << std::endl; - return; - } - } - - std::cout << "Number of records in normalized id database: " << db.count() << "\n"; - db.close(); - } - - db.execute("DROP TABLE IF EXISTS normalized_name"); -} - - -// trim from start (in place) -static inline void ltrim(std::string &s) { - s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int ch) { - return !std::isspace(ch); - })); -} - -// trim from end (in place) -static inline void rtrim(std::string &s) { - s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { - return !std::isspace(ch); - }).base(), s.end()); -} - -// trim from both ends (in place) -static inline void trim(std::string &s) { - ltrim(s); - rtrim(s); -} - - -//////////////////////////////////////////////////////////////////////////// -// MAIN - -int main(int argc, char* argv[]) -{ - if (argc == 2) - { - std::string option = argv[1]; - if ( option == "--version" ) - std::cout << GeoNLP::Geocoder::version << "\n"; - return 0; - } - - if (argc<4) - { - std::cerr << "importer [postal_codes.csv] [postal_country_parser_code] [address_parser_directory] [verbose]\n"; - std::cerr << "When using optional parameters, you have to specify all of the perceiving ones\n"; - return 1; - } - - std::string map = argv[1]; - std::string database_path = argv[2]; - std::string whitelist_file = argv[3]; - std::string postcodes_fname; - std::string postal_country_parser; - std::string postal_address_parser_dir; - bool verbose_address_expansion = false; - - if (argc > 4) postcodes_fname = argv[4]; - if (argc > 5) postal_country_parser = argv[5]; - if (argc > 6) postal_address_parser_dir = argv[6]; - if (argc > 7 && strcmp("verbose", argv[7])==0 ) verbose_address_expansion = true; - - std::cout << "Starting import: " << map << " -> " << database_path << "\n"; - - // fill white list - { - std::ifstream f(whitelist_file); - std::string line; - if (!f) - { - std::cerr << "Failed to open whitelist file: " << whitelist_file << std::endl; - return -1; - } - - while (std::getline(f, line)) - { - trim(line); - if (!line.empty()) - m_poi_types_whitelist.insert(line); - std::cout << "Whitelisted: " << line << "\n"; - } - } + if (!found) + { + std::cerr << "Missing parent with ID " << parent << " . Stopping import\n"; + hierarchy.print_root_with_parent_id(parent); + std::cerr << "\nSQL:\n" << base_query + "where place_id=" << parent << "\n"; - // load the database and proceed with import - osmscout::DatabaseParameter databaseParameter; - osmscout::DatabaseRef database(new osmscout::Database(databaseParameter)); + return -1; + } - if (!database->Open(map.c_str())) - { - std::cerr << "Cannot open libosmscout database: " << map << std::endl; - return 1; + parent = hierarchy.get_next_nonzero_root_parent(); } - nameReader = new NameReader(*database->GetTypeConfig()); - nameAltReader = new NameAltReader(*database->GetTypeConfig()); - phoneReader = new PhoneReader(*database->GetTypeConfig()); - postalCodeReader = new PostalCodeReader(*database->GetTypeConfig()); - websiteReader = new WebsiteReader(*database->GetTypeConfig()); + // remove all items from hierarchy that are not supposed to be there + std::cout << "Cleanup hierarchy\n"; + hierarchy.cleanup(); - // reverse geocode all postal codes submitted as a separate file - if (!postcodes_fname.empty()) + // find missing countries and move root nodes under them if possible + std::cout << "Try to fill missing parents through countries. Root size: " + << hierarchy.get_root_count() << "\n"; + for (std::string country : hierarchy.get_root_countries()) { - std::ifstream fin(postcodes_fname.c_str()); - osmscout::LocationDescriptionService locationService(database); - std::string line; - size_t postal_counter=0; - while (getline(fin,line)) + for (auto row : txn.exec_params( + base_query + "where rank_address = 4 and country_code = $1 limit 1", country)) { - boost::tokenizer< boost::escaped_list_separator > tok(line); - std::vector cells; - cells.assign(tok.begin(),tok.end()); - if (cells.size() != 4 || cells[0] == "id") - continue; - std::string code = cells[1]; - double latitude, longitude; - try { - latitude = std::stod(cells[2]); - longitude = std::stod(cells[3]); - } - catch (const std::invalid_argument &e) { - continue; // skip this line, it probably doesn't have coordinates - } - - osmscout::GeoCoord coordinates(latitude, longitude); - std::list results; - { - static int o = 0; - if (o % 10000 == 0) { - std::cout << "Postal codes parsed: " << postal_counter << std::endl; - o = 0; + hindex id = row["place_id"].as(0); + if (!hierarchy.has_item(id)) + { + std::shared_ptr item = std::make_shared(row); + hierarchy.add_item(item); } - ++o; - ++postal_counter; - } - if (locationService.ReverseLookupRegion(coordinates, results)) - if (results.size() > 0) - { - const auto r = results.back(); - if (r.adminRegion) - { - osmscout::FileOffset admin = r.adminRegion->object.GetFileOffset(); - PostalCode c; - c.code = GeoNLP::Postal::normalize_postalcode(code); - c.latitude = latitude; - c.longitude = longitude; - additional_postal_codes[admin].push_back(c); - // std::cout << code << " " << r.adminRegion->name << " " - // << r.adminRegion->object.GetFileOffset() << " " << r.adminRegion->parentRegionOffset << " " - // << results.size() << "\n"; - } - } + hierarchy.set_country(country, id); } } - //return -1; - // reverse geocoding: done + hierarchy.finalize(); + txn.commit(); // finalize postgres transactions - sqlite3pp::database db(GeoNLP::Geocoder::name_primary(database_path).c_str()); + // hierarchy.print(false); - db.execute( "PRAGMA journal_mode = OFF" ); - db.execute( "PRAGMA synchronous = OFF" ); - db.execute( "PRAGMA cache_size = 2000000" ); - db.execute( "PRAGMA temp_store = 2" ); - db.execute( "BEGIN TRANSACTION" ); - db.execute( "DROP TABLE IF EXISTS type" ); - db.execute( "DROP TABLE IF EXISTS object_primary" ); - db.execute( "DROP TABLE IF EXISTS object_primary_tmp" ); - db.execute( "DROP TABLE IF EXISTS object_primary_tmp2" ); - db.execute( "DROP TABLE IF EXISTS boxids" ); - db.execute( "DROP TABLE IF EXISTS object_type" ); - db.execute( "DROP TABLE IF EXISTS object_type_tmp" ); - db.execute( "DROP TABLE IF EXISTS hierarchy" ); - db.execute( "DROP TABLE IF EXISTS object_primary_rtree" ); + // Saving data into SQLite + sqlite3pp::database db(GeoNLP::Geocoder::name_primary(database_path).c_str()); - db.execute( "CREATE " TEMPORARY " TABLE object_primary_tmp (id INTEGER PRIMARY KEY AUTOINCREMENT, scoutid TEXT, name TEXT, name_extra TEXT, name_en TEXT, phone TEXT, postal_code TEXT, website TEXT, parent INTEGER, latitude REAL, longitude REAL)"); - db.execute( "CREATE " TEMPORARY " TABLE object_type_tmp (prim_id INTEGER, type TEXT NOT NULL, FOREIGN KEY (prim_id) REFERENCES objects_primary_tmp(id))" ); - db.execute( "CREATE TABLE hierarchy (prim_id INTEGER PRIMARY KEY, last_subobject INTEGER, " - "FOREIGN KEY (prim_id) REFERENCES objects_primary(id), FOREIGN KEY (last_subobject) REFERENCES objects_primary(id))" ); + db.execute("PRAGMA journal_mode = OFF"); + db.execute("PRAGMA synchronous = OFF"); + db.execute("PRAGMA cache_size = 2000000"); + db.execute("PRAGMA temp_store = 2"); + db.execute("BEGIN TRANSACTION"); + db.execute("DROP TABLE IF EXISTS type"); + db.execute("DROP TABLE IF EXISTS object_primary"); + db.execute("DROP TABLE IF EXISTS object_primary_tmp"); + db.execute("DROP TABLE IF EXISTS object_primary_tmp2"); + db.execute("DROP TABLE IF EXISTS boxids"); + db.execute("DROP TABLE IF EXISTS object_type"); + db.execute("DROP TABLE IF EXISTS object_type_tmp"); + db.execute("DROP TABLE IF EXISTS hierarchy"); + db.execute("DROP TABLE IF EXISTS object_primary_rtree"); + + db.execute("CREATE " TEMPORARY " TABLE object_primary_tmp (" + "id INTEGER PRIMARY KEY AUTOINCREMENT, postgres_id INTEGER, name TEXT, name_extra " + "TEXT, name_en TEXT, phone TEXT, postal_code TEXT, website TEXT, parent INTEGER, " + "latitude REAL, longitude REAL)"); + db.execute("CREATE " TEMPORARY " TABLE object_type_tmp (prim_id INTEGER, type TEXT NOT NULL, " + "FOREIGN KEY (prim_id) REFERENCES objects_primary_tmp(id))"); + db.execute("CREATE TABLE hierarchy (prim_id INTEGER PRIMARY KEY, last_subobject INTEGER, " + "FOREIGN KEY (prim_id) REFERENCES objects_primary(id), FOREIGN KEY (last_subobject) " + "REFERENCES objects_primary(id))"); std::cout << "Preliminary filling of the database" << std::endl; - - osmscout::LocationIndexRef locationIndex = database->GetLocationIndex(); - AdminVisitor vis_admin(database, db); - locationIndex->VisitAdminRegions(vis_admin); - IDs.write_hierarchy(db); + hierarchy.write(db); // cleanup from duplicated names - // db.execute( "UPDATE object_primary_tmp SET name_extra=NULL WHERE name_extra='' OR name=name_extra" ); - // db.execute( "UPDATE object_primary_tmp SET name_en=NULL WHERE name_en='' OR name=name_en" ); - db.execute( "UPDATE object_primary_tmp SET name_extra='' WHERE name=name_extra" ); - db.execute( "UPDATE object_primary_tmp SET name_en='' WHERE name=name_en" ); + db.execute("UPDATE object_primary_tmp SET name_extra='' WHERE name=name_extra"); + db.execute("UPDATE object_primary_tmp SET name_en='' WHERE name=name_en"); std::cout << "Reorganizing database tables" << std::endl; - db.execute( "CREATE TABLE type (id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT)" ); - db.execute( "INSERT INTO type (name) SELECT DISTINCT type FROM object_type_tmp" ); - db.execute( "CREATE " TEMPORARY " TABLE object_primary_tmp2 (id INTEGER PRIMARY KEY AUTOINCREMENT, " - "name TEXT, name_extra TEXT, name_en TEXT, phone TEXT, postal_code TEXT, website TEXT, parent INTEGER, type_id INTEGER, latitude REAL, longitude REAL, boxstr TEXT, " - "FOREIGN KEY (type_id) REFERENCES type(id))"); - - db.execute( "INSERT INTO object_primary_tmp2 (id, name, name_extra, name_en, phone, postal_code, website, parent, type_id, latitude, longitude, boxstr) " - "SELECT p.id, p.name, p.name_extra, p.name_en, p.phone, p.postal_code, p.website, p.parent, type.id, p.latitude, p.longitude, " - // LINE BELOW DETERMINES ROUNDING USED FOR BOXES - "CAST(CAST(p.latitude*100 AS INTEGER) AS TEXT) || ',' || CAST(CAST(p.longitude*100 AS INTEGER) AS TEXT) " - "FROM object_primary_tmp p JOIN object_type_tmp tt ON p.id=tt.prim_id " - "JOIN type ON tt.type=type.name" ); - - db.execute( "CREATE " TEMPORARY " TABLE boxids (id INTEGER PRIMARY KEY AUTOINCREMENT, boxstr TEXT, CONSTRAINT struni UNIQUE (boxstr))" ); - db.execute( "INSERT INTO boxids (boxstr) SELECT DISTINCT boxstr FROM object_primary_tmp2" ); - - db.execute( "CREATE TABLE object_primary (id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT, name_extra TEXT, name_en TEXT, phone TEXT, postal_code TEXT, website TEXT, " - "parent INTEGER, type_id INTEGER, latitude REAL, longitude REAL, box_id INTEGER, " - "FOREIGN KEY (type_id) REFERENCES type(id))" ); - db.execute( "INSERT INTO object_primary (id, name, name_extra, name_en, phone, postal_code, website, parent, type_id, latitude, longitude, box_id) " - "SELECT o.id, name, name_extra, name_en, phone, postal_code, website, parent, type_id, latitude, longitude, b.id FROM object_primary_tmp2 o JOIN boxids b ON o.boxstr=b.boxstr" ); - - db.execute( "DROP INDEX IF EXISTS idx_object_primary_box" ); - db.execute( "CREATE INDEX idx_object_primary_box ON object_primary (box_id)" ); - - db.execute( "DROP INDEX IF EXISTS idx_object_primary_postal_code" ); - db.execute( "CREATE INDEX idx_object_primary_postal_code ON object_primary (postal_code)" ); + db.execute("CREATE TABLE type (id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT)"); + db.execute("INSERT INTO type (name) SELECT DISTINCT type FROM object_type_tmp"); + db.execute("CREATE " TEMPORARY + " TABLE object_primary_tmp2 (id INTEGER PRIMARY KEY AUTOINCREMENT, " + "name TEXT, name_extra TEXT, name_en TEXT, phone TEXT, postal_code TEXT, website " + "TEXT, parent INTEGER, type_id INTEGER, latitude REAL, longitude REAL, boxstr TEXT, " + "FOREIGN KEY (type_id) REFERENCES type(id))"); + + db.execute("INSERT INTO object_primary_tmp2 (id, name, name_extra, name_en, phone, postal_code, " + "website, parent, type_id, latitude, longitude, boxstr) " + "SELECT p.id, p.name, p.name_extra, p.name_en, p.phone, p.postal_code, p.website, " + "p.parent, type.id, p.latitude, p.longitude, " + // LINE BELOW DETERMINES ROUNDING USED FOR BOXES + "CAST(CAST(p.latitude*100 AS INTEGER) AS TEXT) || ',' || CAST(CAST(p.longitude*100 AS " + "INTEGER) AS TEXT) " + "FROM object_primary_tmp p JOIN object_type_tmp tt ON p.id=tt.prim_id " + "JOIN type ON tt.type=type.name"); + + db.execute("CREATE " TEMPORARY " TABLE boxids (id INTEGER PRIMARY KEY AUTOINCREMENT, boxstr " + "TEXT, CONSTRAINT struni UNIQUE (boxstr))"); + db.execute("INSERT INTO boxids (boxstr) SELECT DISTINCT boxstr FROM object_primary_tmp2"); + + db.execute("CREATE TABLE object_primary (id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT, " + "name_extra TEXT, name_en TEXT, phone TEXT, postal_code TEXT, website TEXT, " + "parent INTEGER, type_id INTEGER, latitude REAL, longitude REAL, box_id INTEGER, " + "FOREIGN KEY (type_id) REFERENCES type(id))"); + db.execute( + "INSERT INTO object_primary (id, name, name_extra, name_en, phone, postal_code, website, " + "parent, type_id, latitude, longitude, box_id) " + "SELECT o.id, name, name_extra, name_en, phone, postal_code, website, parent, type_id, " + "latitude, longitude, b.id FROM object_primary_tmp2 o JOIN boxids b ON o.boxstr=b.boxstr"); + + db.execute("DROP INDEX IF EXISTS idx_object_primary_box"); + db.execute("CREATE INDEX idx_object_primary_box ON object_primary (box_id)"); + + db.execute("DROP INDEX IF EXISTS idx_object_primary_postal_code"); + db.execute("CREATE INDEX idx_object_primary_postal_code ON object_primary (postal_code)"); std::cout << "Normalize using libpostal" << std::endl; @@ -1106,32 +339,52 @@ int main(int argc, char* argv[]) // Create R*Tree for nearest neighbor search std::cout << "Populating R*Tree" << std::endl; - db.execute( "CREATE VIRTUAL TABLE object_primary_rtree USING rtree(id, minLat, maxLat, minLon, maxLon)" ); - db.execute( "INSERT INTO object_primary_rtree (id, minLat, maxLat, minLon, maxLon) " - "SELECT box_id, min(latitude), max(latitude), min(longitude), max(longitude) from object_primary group by box_id" ); - + db.execute( + "CREATE VIRTUAL TABLE object_primary_rtree USING rtree(id, minLat, maxLat, minLon, maxLon)"); + db.execute("INSERT INTO object_primary_rtree (id, minLat, maxLat, minLon, maxLon) " + "SELECT box_id, min(latitude), max(latitude), min(longitude), max(longitude) from " + "object_primary group by box_id"); + + // Stats view + db.execute("DROP VIEW IF EXISTS type_stats"); + db.execute( + "CREATE VIEW type_stats AS SELECT t.name as type_name, COUNT(*) AS cnt FROM object_primary o " + "JOIN \"type\" t ON t.id = o.type_id GROUP BY t.name ORDER BY cnt desc"); + { + std::cout << "List of most popular imported types\n"; + sqlite3pp::query qry(db, "SELECT type_name, cnt FROM type_stats ORDER BY cnt DESC LIMIT 25"); + for (auto v : qry) + { + std::string name; + int cnt; + v.getter() >> name >> cnt; + std::cout << " " << name << "\t" << cnt << "\n"; + } + } // Recording version - db.execute( "DROP TABLE IF EXISTS meta" ); - db.execute( "CREATE TABLE meta (key TEXT, value TEXT)" ); + db.execute("DROP TABLE IF EXISTS meta"); + db.execute("CREATE TABLE meta (key TEXT, value TEXT)"); { - sqlite3pp::command cmd(db,"INSERT INTO meta (key, value) VALUES (?, ?)"); - std::ostringstream ss; ss << GeoNLP::Geocoder::version; - cmd.binder() << "version" - << ss.str().c_str(); + sqlite3pp::command cmd(db, "INSERT INTO meta (key, value) VALUES (?, ?)"); + std::ostringstream ss; + ss << GeoNLP::Geocoder::version; + cmd.binder() << "version" << ss.str().c_str(); if (cmd.execute() != SQLITE_OK) std::cerr << "WriteSQL: error inserting version information\n"; } - if ( !postal_country_parser.empty() ) + if (!postal_country_parser.empty()) { - std::cout << "Recording postal parser country preference: " << postal_country_parser << "\n"; - std::string cmd = "INSERT INTO meta (key, value) VALUES (\"postal:country:parser\", \"" + postal_country_parser + "\")"; - db.execute( cmd.c_str() ); + std::cout << "Recording postal parser country preference: " << postal_country_parser << "\n"; + std::string cmd = "INSERT INTO meta (key, value) VALUES (\"postal:country:parser\", \"" + + postal_country_parser + "\")"; + db.execute(cmd.c_str()); } - db.execute( "END TRANSACTION" ); - db.execute( "VACUUM" ); - db.execute( "ANALYZE" ); + // finalize + db.execute("END TRANSACTION"); + db.execute("VACUUM"); + db.execute("ANALYZE"); std::cout << "Done\n"; diff --git a/importer/src/normalization.cpp b/importer/src/normalization.cpp new file mode 100644 index 0000000..c2b2651 --- /dev/null +++ b/importer/src/normalization.cpp @@ -0,0 +1,299 @@ +#include "normalization.h" +#include "config.h" +#include "geocoder.h" + +#include +#include +#include + +//////////////////////////////////////////////////////////////////////////// +/// Libpostal normalization with search string expansion +void normalize_libpostal(sqlite3pp::database &db, std::string address_expansion_dir, bool verbose) +{ + struct tonorm + { + std::string name; + sqlid id; + }; + + std::deque data; + sqlite3pp::query qry(db, "SELECT id, name, name_extra, name_en FROM object_primary_tmp"); + for (auto v : qry) + { + tonorm d; + sqlid id; + char const *name, *name_extra, *name_en; + v.getter() >> id >> name >> name_extra >> name_en; + + if (name == nullptr) + continue; // no need to add empty name into search index + + d.id = id; + + d.name = name; + data.push_back(d); + if (name_extra) + { + d.name = name_extra; + data.push_back(d); + } + if (name_en) + { + d.name = name_en; + data.push_back(d); + } + } + + // make a new table for normalized names + db.execute("DROP TABLE IF EXISTS normalized_name"); + db.execute( + "CREATE " TEMPORARY + " TABLE normalized_name (prim_id INTEGER, name TEXT NOT NULL, PRIMARY KEY (name, prim_id))"); + + // load libpostal + if (!libpostal_setup() || !libpostal_setup_language_classifier()) + { + std::cerr << "Failure to load libpostal" << std::endl; + return; + } + + std::vector aed(address_expansion_dir.begin(), address_expansion_dir.end()); + aed.push_back(0); + if ((address_expansion_dir.empty() && !libpostal_setup_parser()) + || (!address_expansion_dir.empty() && !libpostal_setup_parser_datadir(aed.data()))) + { + std::cerr << "Failure to load libpostal parser" << std::endl; + return; + } + + // normalize all names + size_t num_expansions; + size_t num_doubles_dropped = 0; + libpostal_normalize_options_t options = libpostal_get_default_options(); + std::vector charbuff; + for (tonorm &d : data) + { + charbuff.resize(d.name.length() + 1); + std::copy(d.name.c_str(), d.name.c_str() + d.name.length() + 1, charbuff.begin()); + + if (verbose) + std::cout << d.name << ": " << std::flush; + + // check for sanity before we proceed with expansion + if (d.name.length() > LENGTH_STARTING_SUSP_CHECK) + { + size_t digits_space = 0; + for (size_t i = 0; i < d.name.length(); ++i) + if (std::isdigit(charbuff[i]) || std::isspace(charbuff[i])) + digits_space++; + + if ((digits_space * 1.0) / d.name.length() > 0.5) + { + std::cout << "Warning: dropping suspicious name: " << d.name << "\n"; + continue; + } + } + + // check if there are too many commas + if (std::count(d.name.begin(), d.name.end(), ',') > MAX_COMMAS) + { + std::cout << "Warning: dropping suspicious name - too many commas: " << d.name << "\n"; + continue; + } + + // insert normalized, but not expanded string + { + char *normalized = libpostal_normalize_string(charbuff.data(), + LIBPOSTAL_NORMALIZE_DEFAULT_STRING_OPTIONS); + if (normalized != NULL) + { + sqlite3pp::command cmd(db, "INSERT INTO normalized_name (prim_id, name) VALUES (?,?)"); + std::string s = normalized; + cmd.binder() << d.id << s; + if (cmd.execute() != SQLITE_OK) + { + // std::cerr << "Error inserting: " << d.id << " " << s << std::endl; + num_doubles_dropped++; + } + + free(normalized); + } + } + + char **expansions = libpostal_expand_address(charbuff.data(), options, &num_expansions); + + if (num_expansions > MAX_NUMBER_OF_EXPANSIONS) + { + std::cout << "Warning: large number [" << num_expansions + << "] of normalization expansions of " << d.name + << " - dropping it from the table [" << d.id << "]\n"; + // for (size_t i=0; i < 10 && i < num_expansions; i++) + // std::cout << " example expansion: " << expansions[i] << "\n"; + // std::cout << "\n"; + + continue; // don't insert it, its probably wrong anyway + } + + for (size_t i = 0; i < num_expansions; i++) + { + sqlite3pp::command cmd(db, "INSERT INTO normalized_name (prim_id, name) VALUES (?,?)"); + std::string s = expansions[i]; + cmd.binder() << d.id << s; + if (cmd.execute() != SQLITE_OK) + { + // std::cerr << "Error inserting: " << d.id << " " << s << std::endl; + num_doubles_dropped++; + } + + // to cover the street names that have Dr. or the firstname + // in the front of the mainly used name, add substrings into + // the normalized table as well + const size_t max_substrings = 2; + size_t pos = 1; + for (size_t sbs = 0; sbs < max_substrings && pos < s.length(); ++sbs) + { + bool spacefound = false; + for (; pos < s.length(); ++pos) + { + char c = s[pos]; + if (c == ' ') + spacefound = true; + if (spacefound && c != ' ') + break; + } + + if (pos < s.length()) + { + try + { + sqlite3pp::command cmd( + db, "INSERT INTO normalized_name (prim_id, name) VALUES (?,?)"); + std::string s = expansions[i]; + cmd.binder() << d.id << s.substr(pos); + if (cmd.execute() != SQLITE_OK) + { + // std::cerr << "Error inserting: " << d.id << " " << s << std::endl; + num_doubles_dropped++; + } + } + catch (sqlite3pp::database_error e) + { + num_doubles_dropped++; + } + } + } + } + + // Free expansions + libpostal_expansion_array_destroy(expansions, num_expansions); + + if (verbose) + std::cout << "done" << std::endl; + } + + std::cout << "Redundant records skipped: " << num_doubles_dropped << "\n"; + + // Teardown libpostal + libpostal_teardown_parser(); + libpostal_teardown(); + libpostal_teardown_language_classifier(); +} + +//////////////////////////////////////////////////////////////////////////// +/// Libpostal normalization with search string expansion +void normalized_to_final(sqlite3pp::database &db, std::string path) +{ + std::cout << "Inserting normalized data into MARISA trie" << std::endl; + + marisa::Keyset keyset; + + { + sqlite3pp::query qry(db, "SELECT name FROM normalized_name"); + for (auto v : qry) + { + std::string name; + v.getter() >> name; + keyset.push_back(name.c_str()); + } + } + + marisa::Trie trie; + trie.build(keyset); + trie.save(GeoNLP::Geocoder::name_normalized_trie(path).c_str()); + + struct norm + { + std::string name; + sqlid prim_id; + }; + + std::deque data; + { + sqlite3pp::query qry(db, "SELECT name, prim_id FROM normalized_name"); + for (auto v : qry) + { + norm d; + v.getter() >> d.name >> d.prim_id; + data.push_back(d); + } + } + + std::map > bdata; + for (auto d : data) + { + marisa::Agent agent; + agent.set_query(d.name.c_str()); + if (trie.lookup(agent)) + { + GeoNLP::Geocoder::index_id_key k = agent.key().id(); + if (bdata.count(k) == 0) + bdata[k] = std::vector(); + bdata[k].push_back(d.prim_id); + } + else + { + std::cerr << "Error: cannot find in MARISA trie: " << d.name << std::endl; + } + } + + { + // create the database object + kyotocabinet::HashDB db; + + db.tune_options(kyotocabinet::HashDB::TSMALL | kyotocabinet::HashDB::TLINEAR); + db.tune_alignment(0); + db.tune_defrag(8); + + // open the database + if (!db.open(GeoNLP::Geocoder::name_normalized_id(path).c_str(), + kyotocabinet::HashDB::OWRITER | kyotocabinet::HashDB::OCREATE)) + { + std::cerr << "open error: " << db.error().name() << std::endl; + return; + } + + std::vector keys; + for (auto a : bdata) + keys.push_back(GeoNLP::Geocoder::make_id_key(a.first)); + + std::sort(keys.begin(), keys.end()); + + for (auto key : keys) + { + std::vector &d = bdata[GeoNLP::Geocoder::get_id_key(key)]; + std::sort(d.begin(), d.end()); + std::string value = GeoNLP::Geocoder::make_id_value(d); + if (!db.set(key, value)) + { + std::cerr << "set error: " << db.error().name() << std::endl; + return; + } + } + + std::cout << "Number of records in normalized id database: " << db.count() << "\n"; + + db.close(); + } + + db.execute("DROP TABLE IF EXISTS normalized_name"); +} diff --git a/importer/src/normalization.h b/importer/src/normalization.h new file mode 100644 index 0000000..a8458bc --- /dev/null +++ b/importer/src/normalization.h @@ -0,0 +1,11 @@ +#ifndef GEOCODER_NORMALIZATION_H +#define GEOCODER_NORMALIZATION_H + +#include +#include + +void normalize_libpostal(sqlite3pp::database &db, std::string address_expansion_dir, bool verbose); + +void normalized_to_final(sqlite3pp::database &db, std::string path); + +#endif diff --git a/importer/src/utils.cpp b/importer/src/utils.cpp new file mode 100644 index 0000000..d3aa553 --- /dev/null +++ b/importer/src/utils.cpp @@ -0,0 +1,38 @@ +#include "utils.h" + +#include + +using json = nlohmann::json; + +std::string get_with_def(const std::map &m, const std::string &key, + const std::string &defval) +{ + auto it = m.find(key); + if (it == m.end()) + return defval; + return it->second; +} + +std::map parse_to_map(const std::string &js) +{ + std::map m; + if (js.size()) + { + json j = json::parse(js); + for (auto v : j.items()) + m[v.key()] = v.value(); + } + return m; +} + +bool same_starts_with(const std::string &start, const std::string &s1, const std::string &s2) +{ + return s1.rfind(start, 0) == 0 && s2.rfind(start, 0) == 0; +} + +std::string geocoder_type(const std::string &t_class, const std::string &t_value) +{ + if (t_value == "yes" || t_value.empty()) + return t_class; + return t_class + "_" + t_value; +} \ No newline at end of file diff --git a/importer/src/utils.h b/importer/src/utils.h new file mode 100644 index 0000000..74a242e --- /dev/null +++ b/importer/src/utils.h @@ -0,0 +1,16 @@ +#ifndef UTILS_H +#define UTILS_H + +#include +#include + +std::string get_with_def(const std::map &m, const std::string &key, + const std::string &defval = std::string()); + +std::string geocoder_type(const std::string &t_class, const std::string &t_value); + +std::map parse_to_map(const std::string &js); + +bool same_starts_with(const std::string &start, const std::string &s1, const std::string &s2); + +#endif diff --git a/importer/stylesheet/map.ost b/importer/stylesheet/map.ost deleted file mode 100644 index 6412a99..0000000 --- a/importer/stylesheet/map.ost +++ /dev/null @@ -1,2564 +0,0 @@ -OST - -/* - Types: - A type can be support for any of the following types: - * NODE - * WAY: Ways automatically get assigned the following features: Layer - * AREA - * RELATION - - Meaning of the different OPTIONS: - PATH[[FOOT] [BICYLCE] [CAR]]: - This way or area is a traversable path and possibly routable for the given vehicles. - - See: http://wiki.openstreetmap.org/wiki/OSM_tags_for_routing/Access-Restrictions - - If something is a path, the following features are automatically assigned: - * Width - * Grade - * Bridge - * Tunnel - * Roundabout - If something is routable, the following features are automatically assigned: - * Access - * MaxSpeed - OPTIMIZE_LOW_ZOOM: - Optimize this area or way for idplaying in low zoom by - reducing visible complexity - PIN_WAY: - This is a way, even if the path is closed - MULTIPOLYGON: - Type should be handled as multipolygon even if type is not set to multipolygon. - ADMIN_REGION: - The given area or node describes an administrate region - that should be part of the region tree for the city/street - index - POI: - The given area, way or node is a POI and should be indexed - by its name in relation to the enclosing region. If something is a POI and has the - feature "Name" - It automatically get the following features assigned: - * Location - * Address - * Website - * Phone - ADDRESS: - Objects should be indexed as address - It automatically get the following features assigned: - * Location - * Address - LOCATION: - The given way or area should be indexed as location. - MERGE_AREAS: - Areas of this type that "touch" each other and the same attribute values - will get merged. - IGNORESEALAND: - Ignore this type for calculation of land masses (because objects of this type can - occur on sea, too, and thus have no distinguishing character). - IGNORE: - Ignore objects of this type -*/ - -/* - List of max speeds aliases. Normally value for the tag "maxspeed" should always - have a numeric value and the source of the speed limit should be placed into - the value of the tag "source:maxspeed" containing an alias name. - In some country this is not (only) the case, the alias is also sometimes - placed into "maxspeed" itself. This table helps to resolve these aliases - to numeric values again. - - https://wiki.openstreetmap.org/wiki/OSM_tags_for_routing/Maxspeed - https://wiki.openstreetmap.org/wiki/Speed_limits -*/ -MAX SPEEDS - SPEED "CZ:urban" = 50 km/h - SPEED "CZ:rural" = 90 km/h - SPEED "CZ:motorway" = 130 km/h - - SPEED "DE:living_street" = 10 km/h - SPEED "DE:urban" = 50 km/h - SPEED "DE:rural" = 100 km/h - - SPEED "FR:walk" = 6 km/h - SPEED "FR:urban" = 50 km/h - SPEED "FR:rural" = 90 km/h - SPEED "FR:motorway" = 130 km/h - - SPEED "SK:urban" = 50 km/h - SPEED "SK:rural" = 90 km/h - SPEED "SK:motorway" = 130 km/h - - SPEED "IT:urban" = 50 km/h - SPEED "IT:rural" = 90 km/h - SPEED "IT:trunk" = 110 km/h - SPEED "IT:motorway" = 130 km/h - - SPEED "RU:living_street" = 20 km/h - SPEED "RU:urban" = 60 km/h - SPEED "RU:rural" = 90 km/h - SPEED "RU:motorway" = 110 km/h - -/* - Allows you to map surface descriptions to a grade value ([1-5]). - Grade 1 ist very well, grade 5 is very bad. -*/ -GRADES - SURFACE GRADE 1 { - "paved" - "asphalt" - "cobblestone" - "cobblestone:flattened" - "concrete" - "concrete:lanes" - "concrete:plates" - "paving_stones" - "paving_stones:20" - "paving_stones:30" - "sett" - "tarred" - "tartan" - } - - SURFACE GRADE 2 { - "ash" - "clay" - "compacted" - "compacted_gravel" - "fine_gravel" - "gravel" - "gravel;grass" - "grass_paver" - "metal" - "pebblestone" - "stone" - "wood" - } - - SURFACE GRADE 3 { - "unpaved" - "dirt" - "dirt/sand" - "earth" - "grass" - "grass;earth" - "ground" - "mud" - "sand" - "soil" - } - - SURFACE GRADE 4 { - "artificial_turf" - "bark_mulch" - } - -FEATURES - FEATURE Address - DESC - en: "Address" - de: "Adresse" - - FEATURE AdminLevel - DESC - en: "administrative level" - de: "Administrative Hierachie" - - FEATURE Destination - DESC - en: "destination" - de: "Ziel" - - FEATURE Name - DESC - en: "object name" - de: "Objektname" - - FEATURE NameAlt - DESC - en: "object name" - de: "Objektname" - - FEATURE Width - DESC - en: "width" - de: "Breite" - -TYPES - - // - // contour lines - // - - TYPE elevation_contour_major - = WAY ("contour"=="elevation" AND "contour_ext"=="elevation_major") - {Ele} - IGNORESEALAND PIN_WAY - - TYPE elevation_contour_medium - = WAY ("contour"=="elevation" AND "contour_ext"=="elevation_medium") - {Ele} - IGNORESEALAND PIN_WAY - - TYPE elevation_contour_minor - = WAY ("contour"=="elevation" AND "contour_ext"=="elevation_minor") - {Ele} - IGNORESEALAND PIN_WAY - - // - // Motorways and motorways-like (only for cars) - // - - TYPE highway_motorway - = WAY ("highway"=="motorway") - {Name, NameAlt, Ref, Destination} - PATH[CAR] LOCATION OPTIMIZE_LOW_ZOOM PIN_WAY - DESC - en: "motorway" - de: "Autobahn" - - TYPE highway_motorway_trunk - = WAY ("highway"=="trunk" AND EXISTS "motorroad" AND !("motorroad" IN ["no","false","0"])) - {Name, NameAlt, Ref, Destination} - PATH[CAR] LOCATION OPTIMIZE_LOW_ZOOM PIN_WAY - DESC - en: "trunk" - de: "Schnellstraße" - - TYPE highway_motorway_primary - = WAY ("highway"=="primary" AND EXISTS "motorroad" AND !("motorroad" IN ["no","false","0"])) - {Name, NameAlt, Ref, Destination} - PATH[CAR] LOCATION OPTIMIZE_LOW_ZOOM PIN_WAY - - TYPE highway_motorway_link - = WAY ("highway"=="motorway_link") - {Name, NameAlt, Ref, Destination} - PATH[CAR] LOCATION OPTIMIZE_LOW_ZOOM PIN_WAY - - // Other roads mainly for cars - - TYPE highway_trunk - = WAY ("highway"=="trunk") - {Name, NameAlt, Ref, Destination} - PATH[FOOT BICYCLE CAR] LOCATION OPTIMIZE_LOW_ZOOM PIN_WAY - DESC - en: "trunk" - de: "Schnellstraße" - - TYPE highway_trunk_link - = WAY ("highway"=="trunk_link") - {Name, NameAlt, Ref, Destination} - PATH[FOOT BICYCLE CAR] LOCATION OPTIMIZE_LOW_ZOOM PIN_WAY - - TYPE highway_primary - = WAY ("highway"=="primary") - {Name, NameAlt, Ref} - PATH[FOOT BICYCLE CAR] LOCATION OPTIMIZE_LOW_ZOOM PIN_WAY - - TYPE highway_primary_link - = WAY ("highway"=="primary_link") - {Name, NameAlt, Ref} - PATH[FOOT BICYCLE CAR] LOCATION PIN_WAY - - TYPE highway_secondary - = WAY ("highway"=="secondary") - {Name, NameAlt, Ref} - PATH[FOOT BICYCLE CAR] LOCATION OPTIMIZE_LOW_ZOOM PIN_WAY - - TYPE highway_secondary_link - = WAY ("highway"=="secondary_link") - {Name, NameAlt, Ref} - PATH[FOOT BICYCLE CAR] LOCATION PIN_WAY - - TYPE highway_tertiary - = WAY ("highway"=="tertiary") - {Name, NameAlt, Ref} - PATH[FOOT BICYCLE CAR] LOCATION OPTIMIZE_LOW_ZOOM PIN_WAY - - TYPE highway_tertiary_link - = WAY ("highway"=="tertiary_link") - {Name, NameAlt, Ref} - PATH[FOOT BICYCLE CAR] LOCATION PIN_WAY - - TYPE highway_unclassified - = WAY AREA ("highway"=="unclassified") - {Name, NameAlt, Ref} - PATH[FOOT BICYCLE CAR] LOCATION PIN_WAY - - TYPE highway_road - = WAY ("highway"=="road") - {Name, NameAlt, Ref} - PATH[FOOT BICYCLE CAR] LOCATION OPTIMIZE_LOW_ZOOM PIN_WAY - - TYPE highway_residential - = WAY AREA ("highway"=="residential") - {Name, NameAlt, Ref} - PATH[FOOT BICYCLE CAR] LOCATION - - TYPE highway_area__residential IGNORE - = WAY AREA ("area:highway"=="residential") - {Name, NameAlt, Ref} - PATH[FOOT BICYCLE CAR] LOCATION - - TYPE highway_living_street - = WAY AREA ("highway"=="living_street") - {Name, NameAlt} - PATH[FOOT BICYCLE CAR] LOCATION PIN_WAY - DESC - en: "living street" - de: "Spielstraße" - - TYPE highway_service - = WAY AREA ("highway"=="service") - {Name, NameAlt} - PATH[FOOT BICYCLE CAR] LOCATION - DESC - en: "service" - de: "Erschließungsweg" - - // Ways with main designation different to car - - TYPE highway_track - = WAY AREA ("highway"=="track") - {Name, NameAlt} - PATH[FOOT BICYCLE] LOCATION PIN_WAY - DESC - en: "track" - de: "Wirtschaftsweg" - - TYPE highway_pedestrian - = WAY AREA ("highway"=="pedestrian") - {Name, NameAlt} - PATH[FOOT] LOCATION PIN_WAY MERGE_AREAS - DESC - en: "pedestrian" - de: "Fußgängerzone" - - TYPE highway_path - = WAY AREA ("highway"=="path") - {Name, NameAlt} - PATH[FOOT BICYCLE] IGNORESEALAND PIN_WAY - DESC - en: "path" - de: "Merhzweckweg" - - TYPE highway_cycleway - = WAY ("highway"=="cycleway") - {Name, NameAlt} - PATH[BICYCLE] PIN_WAY - DESC - en: "cycleway" - de: "Fahradweg" - - TYPE highway_footway - = WAY AREA ("highway"=="footway") OR AREA ("area:highway"=="footway") - {Name, NameAlt} - PATH[FOOT] LOCATION PIN_WAY - DESC - en: "cycleway" - de: "Fußweg" - - TYPE highway_via_ferrata_easy - = WAY (("highway"=="via_ferrata") AND (("via_ferrata_scale"=="0") OR ("via_ferrata_scale"=="1"))) - {Name, NameAlt} - - TYPE highway_via_ferrata_moderate - = WAY (("highway"=="via_ferrata") AND ("via_ferrata_scale"=="2")) - {Name, NameAlt} - - TYPE highway_via_ferrata_difficult - = WAY (("highway"=="via_ferrata") AND (("via_ferrata_scale"=="3") OR ("via_ferrata_scale"=="4"))) - {Name, NameAlt} - - TYPE highway_via_ferrata_extreme - = WAY (("highway"=="via_ferrata") AND (("via_ferrata_scale"=="5") OR ("via_ferrata_scale"=="6"))) - {Name, NameAlt} - - TYPE highway_bridleway - = WAY AREA ("highway"=="bridleway") - {Name, NameAlt} - PATH PIN_WAY - - TYPE highway_steps - = WAY ("highway"=="steps") - {Name, NameAlt} - PATH[FOOT] - - TYPE highway_services - = NODE AREA ("highway"=="services") - {Name, NameAlt} - ADDRESS - - TYPE highway_bus_guideway - = WAY ("highway"=="bus_guideway") - {Name, NameAlt, Ref} - PATH - - TYPE highway_mini_roundabout - = NODE ("highway"=="mini_roundabout") - {Name, NameAlt} - - TYPE highway_bus_stop - = NODE ("highway"=="bus_stop" OR (!("bus" IN ["no", "false", "0"]) AND "public_transport"=="platform")) - {Name, NameAlt} - POI - - TYPE highway_turning_cycle - = NODE ("highway"=="turning_cycle") - {Name, NameAlt} - - TYPE highway_construction - = WAY ("highway"=="construction") - {Name, NameAlt, Width} - - TYPE highway_roundabout - = WAY AREA ("highway"=="roundabout") - {Name, NameAlt} - PATH[FOOT BICYCLE CAR] PIN_WAY - - TYPE highway_motorway_junction - = NODE ("highway"=="motorway_junction") - {Name, NameAlt, Ref} - PATH[CAR] - - TYPE highway_street_lamp - IGNORE // Too many for mobile offline map? - = NODE ("highway"=="street_lamp") - - // Since rendering of speed cameras is not allowed everwhere, it is disabled by default - TYPE speed_camera - IGNORE - = NODE ("highway"=="speed_camera") - - TYPE highway_traffic_signals - IGNORE // No visualisation yet - = NODE ("highway"=="traffic_signals") - - // - // Waterways and assorted - // - - TYPE waterway_stream - = WAY ("waterway"=="stream") - {Name, NameAlt, Width} - OPTIMIZE_LOW_ZOOM IGNORESEALAND PIN_WAY - - TYPE waterway_river - = WAY ("waterway"=="river") - {Name, NameAlt, Width, Tunnel} - OPTIMIZE_LOW_ZOOM IGNORESEALAND PIN_WAY - - TYPE waterway_riverbank - = AREA ("waterway"=="riverbank" OR ("natural"=="water" AND "water"=="river")) - {Name, NameAlt, Width} - MERGE_AREAS - - TYPE waterway_canal - = WAY AREA ("waterway"=="canal") - {Name, NameAlt, Width} - OPTIMIZE_LOW_ZOOM IGNORESEALAND - - TYPE waterway_ditch - = WAY ("waterway"=="ditch") - {Name, NameAlt, Width} - OPTIMIZE_LOW_ZOOM IGNORESEALAND - - TYPE waterway_drain - = WAY ("waterway"=="drain") - {Name, NameAlt, Width} - OPTIMIZE_LOW_ZOOM IGNORESEALAND - - TYPE waterway_dock - = NODE AREA ("waterway"=="dock") - {Name, NameAlt, Width} - ADDRESS MERGE_AREAS - - TYPE waterway_lock_gate - = NODE ("waterway"=="lock_gate") - - TYPE waterway_turning_point - = NODE ("waterway"=="turning_point") - - TYPE waterway_boatyard - = NODE AREA ("waterway"=="boatyard") - {Name, NameAlt} - ADDRESS - - TYPE waterway_weir - = NODE WAY AREA ("waterway"=="weir") - {Name, NameAlt, Width} - IGNORESEALAND - - TYPE waterway_dam - = WAY AREA ("waterway"=="dam") - {Name, NameAlt, Width} - IGNORESEALAND - - // - // Railways and assorted - // - - TYPE railway_rail - = WAY ("railway"=="rail") - {Bridge, Tunnel, Width} - //OPTIMIZE_LOW_ZOOM - - TYPE railway_tram - = WAY ("railway"=="tram") - {Bridge, Tunnel, Width} - - TYPE railway_light_rail - = WAY ("railway"=="light_rail") - {Bridge, Tunnel, Width} - - TYPE railway_subway - = WAY ("railway"=="subway") - {Bridge, Tunnel, Width} - - TYPE railway_preserved - = WAY ("railway"=="preserved") - {Bridge, Tunnel, Width} - - TYPE railway_disused - = WAY ("railway"=="disused") - {Bridge, Tunnel, Width} - - TYPE railway_abandoned - = WAY ("railway"=="abandoned") - {Bridge, Tunnel, Width} - - TYPE railway_narrow_gauge - = WAY ("railway"=="narrow_gauge") - PATH - - TYPE railway_monorail - = WAY ("railway"=="monorail") - PATH - - TYPE railway_funicular - = WAY ("railway"=="funicular") - {Name} - - TYPE railway_station - = NODE AREA ("railway"=="station") - {Name, NameAlt} - ADDRESS POI - - TYPE railway_halt - = NODE ("railway"=="halt") - {Name, NameAlt} - - TYPE railway_tram_stop - = NODE ("railway"=="tram_stop") - {Name, NameAlt} - POI - - TYPE railway_crossing - = NODE ("railway"=="crossing") - - TYPE railway_level_crossing - = NODE ("railway"=="level_crossing") - - TYPE railway_subway_entrance - = NODE ("railway"=="subway_entrance") - {Name, NameAlt} - - TYPE railway_turntable - = NODE AREA ("railway"=="turntable") - {Bridge, Tunnel} - - TYPE public_transport_platform - = WAY AREA ("public_transport"=="platform" OR - "railway"=="platform" OR - "highway"=="platform") - {Name, NameAlt} - PATH[FOOT] - - // - // Aerialway (http://wiki.openstreetmap.org/wiki/Key:aerialway) - // - - TYPE aerialway_gondola - = WAY (("aerialway"=="gondola") OR ("aerialway"=="cable_car")) - {Name} - - TYPE aerialway_chair_lift - = WAY (("aerialway"=="chair_lift") OR ("aerialway"=="mixed_lift")) - {Name} - - TYPE aerialway_drag_lift - = WAY (("aerialway"=="drag_lift") OR ("aerialway"=="t-bar") OR ("aerialway"=="j-bar") OR ("aerialway"=="platter") OR ("aerialway"=="rope_tow") OR ("aerialway"=="magic_carpet")) - {Name} - - // - // Winter sports - // - - TYPE piste_downhill_easy - = WAY AREA ("piste:type"=="downhill" AND "piste:difficulty"=="easy") - {Name} - - TYPE piste_downhill_intermediate - = WAY AREA ("piste:type"=="downhill" AND "piste:difficulty"=="intermediate") - {Name} - - TYPE piste_downhill_advanced - = WAY AREA ("piste:type"=="downhill" AND "piste:difficulty"=="advanced") - {Name} - - // - // Water transport - // - - TYPE route_ferry - = WAY ("route"=="ferry") - {Name, NameAlt} - OPTIMIZE_LOW_ZOOM IGNORESEALAND - - // - // air transport - // - - TYPE aeroway_aerodrome - = NODE AREA ("aeroway"=="aerodrome") - {Name, NameAlt} - ADDRESS POI - - TYPE aeroway_terminal - = NODE AREA ("aeroway"=="terminal") - {Name, NameAlt} - ADDRESS POI - - TYPE aeroway_helipad - = NODE AREA ("aeroway"=="helipad") - - TYPE aeroway_runway - = WAY AREA ("aeroway"=="runway") // Officially not AREA! - {Ref, Width} - - TYPE aeroway_taxiway - = WAY ("aeroway"=="taxiway") - {Ref, Width} PIN_WAY - - TYPE aeroway_apron - = AREA ("aeroway"=="apron") - {Ref} - - TYPE aeroway_gate - = NODE ("aeroway"=="gate") - {Name, NameAlt} - ADDRESS - - // - // Landuses - // - - TYPE landuse_allotments - = NODE AREA ("landuse"=="allotments") - {Name, NameAlt} - MERGE_AREAS - GROUP landuse - - TYPE landuse_basin - = NODE AREA ("landuse"=="basin") - {Name, NameAlt} - OPTIMIZE_LOW_ZOOM MERGE_AREAS - GROUP landuse - - TYPE landuse_brownfield - = AREA ("landuse"=="brownfield") - {Name, NameAlt} - OPTIMIZE_LOW_ZOOM MERGE_AREAS - GROUP landuse - - TYPE landuse_cemetery - = AREA ("landuse"=="cemetery") - {Name, NameAlt} - POI - GROUP landuse - - TYPE landuse_cemetery_sea - = AREA ("landuse"=="cemetery" AND "cemetery"=="sea") - {Name, NameAlt} - IGNORESEALAND - GROUP landuse - - TYPE landuse_commercial - = NODE AREA ("landuse"=="commercial") - {Name, NameAlt} - POI OPTIMIZE_LOW_ZOOM MERGE_AREAS - GROUP landuse - - TYPE landuse_construction - = NODE AREA ("landuse"=="construction") - {Name, NameAlt} MERGE_AREAS - GROUP landuse - - TYPE landuse_farmland - = AREA ("landuse"=="farmland") - {Name, NameAlt} - OPTIMIZE_LOW_ZOOM MERGE_AREAS - GROUP landuse - - TYPE landuse_farmyard_building - = AREA ("landuse"=="farmyard"AND EXISTS "building" AND !("building" IN ["no","false","0"])) - {Name, NameAlt} - GROUP landuse - - TYPE landuse_farmyard - = AREA ("landuse"=="farmyard") - {Name, NameAlt} MERGE_AREAS - GROUP landuse - - TYPE wood - = NODE AREA ("landuse"=="forest" OR "natural"=="wood" OR "landcover"=="trees") - {Name, NameAlt} - POI OPTIMIZE_LOW_ZOOM MERGE_AREAS - GROUP landuse - - TYPE landuse_garages - = AREA ("landuse"=="garages") - {Name, NameAlt} - GROUP landuse - - TYPE landuse_grass - = NODE AREA ("landuse"=="grass") - {Name, NameAlt} MERGE_AREAS - GROUP landuse - - TYPE landuse_greenfield - = NODE AREA ("landuse"=="greenfield") - {Name, NameAlt} - OPTIMIZE_LOW_ZOOM MERGE_AREAS - GROUP landuse - - TYPE landuse_greenhouse_horticulture - = AREA ("landuse"=="greenhouse_horticulture") - {Name, NameAlt} MERGE_AREAS - GROUP landuse - - TYPE landuse_industrial - = NODE AREA ("landuse"=="industrial") - {Name, NameAlt} - POI OPTIMIZE_LOW_ZOOM MERGE_AREAS - GROUP landuse - - TYPE landuse_landfill - = NODE AREA ("landuse"=="landfill") - {Name, NameAlt} - OPTIMIZE_LOW_ZOOM MERGE_AREAS - GROUP landuse - - TYPE landuse_meadow - = NODE AREA ("landuse"=="meadow") - {Name, NameAlt} MERGE_AREAS - GROUP landuse - - TYPE landuse_military - = NODE AREA ("landuse"=="military") - {Name, NameAlt} - OPTIMIZE_LOW_ZOOM MERGE_AREAS - GROUP landuse - - TYPE landuse_orchard - = NODE AREA ("landuse"=="orchard") - {Name, NameAlt} MERGE_AREAS - GROUP landuse - - TYPE landuse_piste - = NODE AREA ("landuse"=="piste") - {Name, NameAlt} MERGE_AREAS - GROUP landuse - - TYPE landuse_quarry - = NODE AREA ("landuse"=="quarry") - {Name, NameAlt} - OPTIMIZE_LOW_ZOOM MERGE_AREAS - GROUP landuse - - TYPE landuse_railway - = AREA ("landuse"=="railway") - {Name, NameAlt} MERGE_AREAS - GROUP landuse - - TYPE landuse_recreation_ground - = NODE AREA ("landuse"=="recreation_ground") - {Name, NameAlt} - POI OPTIMIZE_LOW_ZOOM MERGE_AREAS - GROUP landuse - - TYPE landuse_reservoir - = NODE AREA ("landuse"=="reservoir") - {Name, NameAlt} - OPTIMIZE_LOW_ZOOM MERGE_AREAS - GROUP landuse - - TYPE landuse_residential - = NODE AREA ("landuse"=="residential") - {Name, NameAlt} - POI OPTIMIZE_LOW_ZOOM MERGE_AREAS - GROUP landuse - - TYPE landuse_retail - = NODE AREA ("landuse"=="retail") - MERGE_AREAS - GROUP landuse - - TYPE landuse_salt_pond - = AREA ("landuse"=="salt_pond") - {Name, NameAlt} - MERGE_AREAS - GROUP landuse - - TYPE landuse_village_green - = NODE AREA ("landuse"=="village_green") - {Name, NameAlt} - MERGE_AREAS - GROUP landuse - - TYPE landuse_vineyard - = AREA ("landuse"=="vineyard") - {Name, NameAlt} - POI MERGE_AREAS - GROUP landuse - - // - // Natural - // - - TYPE natural_bay - = NODE AREA ("natural"=="bay") - {Name, NameAlt} - MERGE_AREAS - - TYPE natural_beach - = NODE AREA ("natural"=="beach") - {Name, NameAlt} - OPTIMIZE_LOW_ZOOM MERGE_AREAS - - TYPE natural_bare_rock - = NODE AREA ("natural"=="bare_rock") - {Name, NameAlt} - OPTIMIZE_LOW_ZOOM MERGE_AREAS - - TYPE natural_cave_entrance - = NODE AREA ("natural"=="cave_entrance") - {Name, NameAlt} - - TYPE natural_cliff - = NODE WAY AREA ("natural"=="cliff") - {Name, NameAlt, Width} - MERGE_AREAS - - TYPE natural_fell - = NODE AREA ("natural"=="fell") - {Name, NameAlt} - OPTIMIZE_LOW_ZOOM MERGE_AREAS - - TYPE natural_glacier - = NODE AREA ("natural"=="glacier") - {Name, NameAlt} - OPTIMIZE_LOW_ZOOM MERGE_AREAS - - TYPE natural_grassland - = AREA ("natural"=="grassland") - {Name, NameAlt} - OPTIMIZE_LOW_ZOOM MERGE_AREAS - - TYPE natural_heath - = NODE AREA ("natural"=="heath") - {Name, NameAlt} - OPTIMIZE_LOW_ZOOM MERGE_AREAS - - TYPE natural_land - = NODE AREA ("natural"=="land") - {Name, NameAlt} - OPTIMIZE_LOW_ZOOM MERGE_AREAS - - TYPE natural_mud - = AREA ("natural"=="mud") - {Name, NameAlt} MERGE_AREAS - - TYPE natural_peak - = NODE ("natural"=="peak") - {Name, NameAlt, Ele} - - TYPE natural_sand - = AREA ("natural"=="sand") - {Name, NameAlt} MERGE_AREAS - - TYPE natural_scree - = NODE AREA ("natural"=="scree") - {Name, NameAlt} MERGE_AREAS - - TYPE natural_scrub - = NODE AREA ("natural"=="scrub") - {Name, NameAlt} - OPTIMIZE_LOW_ZOOM MERGE_AREAS - - TYPE natural_spring - = NODE ("natural"=="spring") - {Name, NameAlt} - - TYPE natural_tree - IGNORE // Too many for mobile? - = NODE ("natural"=="tree") - - TYPE natural_volcano - = NODE ("natural"=="volcano") - {Name, NameAlt} - - TYPE natural_water - = NODE AREA ("natural"=="water") - {Name, NameAlt} - IGNORESEALAND OPTIMIZE_LOW_ZOOM MERGE_AREAS - - TYPE natural_wetland_marsh - = NODE AREA ("natural"=="wetland" AND "wetland"=="marsh") - {Name, NameAlt} - OPTIMIZE_LOW_ZOOM MERGE_AREAS - - TYPE natural_wetland_tidalflat - = NODE AREA ("natural"=="wetland" AND "wetland"=="tidalflat") - {Name, NameAlt} - OPTIMIZE_LOW_ZOOM MERGE_AREAS - - TYPE natural_wetland - = NODE AREA ("natural"=="wetland") - {Name, NameAlt} - OPTIMIZE_LOW_ZOOM MERGE_AREAS - - // - // Man made - // - - TYPE man_made_bridge IGNORE - = AREA ("man_made"=="bridge") - {Name, NameAlt} - MERGE_AREAS - - TYPE man_made_pier - = WAY AREA ("man_made"=="pier") - {Name, NameAlt, Width} - MERGE_AREAS - - TYPE man_made_wastewater_plant - = NODE AREA ("man_made"=="wastewater_plant") - {Name, NameAlt} - MERGE_AREAS - - // - // Leisure - // - - TYPE leisure_playground - = NODE AREA ("leisure"=="playground") - {Name, NameAlt} - ADDRESS POI - - TYPE leisure_sports_centre - = NODE AREA ("leisure"=="sports_centre") - {Name, NameAlt} - ADDRESS POI - - TYPE leisure_stadium - = NODE AREA ("leisure"=="stadium") - {Name, NameAlt} - ADDRESS POI - - TYPE leisure_track - = NODE WAY AREA ("leisure"=="track") - {Name, NameAlt, Ref} - PATH[FOOT] PIN_WAY OPTIMIZE_LOW_ZOOM - - TYPE leisure_pitch - = NODE AREA ("leisure"=="pitch") - {Name, NameAlt} - ADDRESS OPTIMIZE_LOW_ZOOM - - TYPE leisure_golf_course - = NODE AREA ("leisure"=="golf_course") - {Name, NameAlt} - ADDRESS POI OPTIMIZE_LOW_ZOOM - - TYPE leisure_water_park - = NODE AREA ("leisure"=="water_park") - {Name, NameAlt} - ADDRESS OPTIMIZE_LOW_ZOOM POI - - TYPE leisure_swimming_pool - = NODE AREA ("leisure"=="swimming_pool") - {Name, NameAlt} - ADDRESS OPTIMIZE_LOW_ZOOM POI - - TYPE leisure_marina - = NODE AREA ("leisure"=="marina") - {Name, NameAlt} - ADDRESS OPTIMIZE_LOW_ZOOM POI - - TYPE leisure_slipway - = NODE ("leisure"=="slipway") - {Name, NameAlt} - - TYPE leisure_fishing - = NODE AREA ("leisure"=="fishing") - {Name, NameAlt} - ADDRESS OPTIMIZE_LOW_ZOOM MERGE_AREAS - - TYPE leisure_nature_reserve - = NODE AREA ("leisure"=="nature_reserve") - {Name, NameAlt} - MERGE_AREAS - - TYPE leisure_park - = NODE AREA ("leisure"=="park") - {Name, NameAlt} - OPTIMIZE_LOW_ZOOM POI MERGE_AREAS - - TYPE leisure_playground - = NODE AREA ("leisure"=="playground") - {Name, NameAlt} - - TYPE leisure_garden - = NODE AREA ("leisure"=="garden") - {Name, NameAlt} - MERGE_AREAS - - TYPE leisure_common - = NODE AREA ("leisure"=="common") - {Name, NameAlt} - OPTIMIZE_LOW_ZOOM MERGE_AREAS - - TYPE leisure_ice_rink - = NODE AREA ("leisure"=="ice_rink") - {Name, NameAlt} - OPTIMIZE_LOW_ZOOM - - - // - // Aerial way - // - - TYPE aerialway_cable_car - = NODE AREA ("aerialway"=="cable_car") - {Name, NameAlt} - POI - GROUP amenity - - // - // Amenity - // - - TYPE amenity_arts_centre - = NODE AREA ("amenity"=="arts_centre") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_atm - = NODE AREA ("amenity"=="atm") - {Name, NameAlt} - POI - GROUP amenity - - TYPE amenity_bank - = NODE AREA ("amenity"=="bank") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_bar - = NODE AREA ("amenity"=="bar") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_bicycle_parking - = NODE WAY AREA ("amenity"=="bicycle_parking") - {Name, NameAlt, Width} - POI - GROUP amenity, building - - TYPE amenity_bicycle_rental - = NODE AREA ("amenity"=="bicycle_rental") - {Name, NameAlt} - POI - GROUP amenity - - TYPE amenity_biergarten - = NODE AREA ("amenity"=="biergarten") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_bench - IGNORE // Too many for mobile? - = NODE ("amenity"=="bench") - GROUP amenity - - TYPE amenity_brothel - IGNORE // Too many for mobile? - = NODE AREA ("amenity"=="brothel") - GROUP amenity - - TYPE amenity_bureau_de_change - = NODE AREA ("amenity"=="bureau_de_change") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_bus_station - = NODE AREA ("amenity"=="bus_station") - {Name, NameAlt} - GROUP amenity - - TYPE amenity_cafe - = NODE AREA ("amenity"=="cafe") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_car_rental - = NODE AREA ("amenity"=="car_rental") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_car_wash - = NODE AREA ("amenity"=="car_wash") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_charging_station - = NODE ("amenity"=="charging_station") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_cinema - = NODE AREA ("amenity"=="cinema") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_clinic - = NODE AREA ("amenity"=="clinic") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_clock - IGNORE // Too many for mobile? - = NODE ("amenity"=="clock") - GROUP amenity - - TYPE amenity_college - = NODE AREA ("amenity"=="college") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_community_centre - = NODE AREA ("amenity"=="community_centre") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_courthouse - = NODE AREA ("amenity"=="courthouse") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_crematorium - = NODE AREA ("amenity"=="crematorium") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_dentist - = NODE AREA ("amenity"=="dentist") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_doctors - = NODE AREA ("amenity"=="doctors") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_drinking_water - IGNORE // No visualisation yet - = NODE ("amenity"=="drinking_water") - GROUP amenity - - TYPE amenity_embassy - = NODE AREA ("amenity"=="embassy") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_ev_charging - IGNORE // No visualisation yet - = NODE ("amenity"=="ev_charging") - GROUP amenity - - TYPE amenity_fast_food - = NODE AREA ("amenity"=="fast_food") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_fire_station - = NODE AREA ("amenity"=="fire_station") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_fuel - = NODE AREA ("amenity"=="fuel") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_grave_yard - = NODE AREA ("amenity"=="grave_yard") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_grit_bin - IGNORE // No visualisation yet - = NODE ("amenity"=="grit_bin") - GROUP amenity - - TYPE amenity_hospital - = NODE AREA ("amenity"=="hospital") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_ice_cream - = NODE AREA ("amenity"=="ice_cream") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_kindergarten - = NODE AREA ("amenity"=="kindergarten") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_library - = NODE AREA ("amenity"=="library") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_motorcycle_parking - = NODE AREA ("amenity"=="motorcycle_parking") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_nursing_home - = NODE AREA ("amenity"=="nursing_home") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_parking_entrance - IGNORE // No visualisation yet - = NODE ("amenity"=="parking_entrance") - GROUP amenity - - TYPE amenity_parking_space - IGNORE // No visualisation yet - = NODE AREA ("amenity"=="parking_space") - GROUP amenity - - TYPE amenity_parking - = NODE AREA ("amenity"=="parking") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_pharmacy - = NODE AREA ("amenity"=="pharmacy" OR "shop"=="pharmacy") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_photo_booth - IGNORE // Too many for mobile? - = NODE ("amenity"=="photo_booth") - GROUP amenity - - TYPE amenity_place_of_worship - = NODE AREA ("amenity"=="place_of_worship") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_prison - = NODE AREA ("amenity"=="prison") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_police - = NODE AREA ("amenity"=="police") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_post_office - = NODE ("amenity"=="post_office") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_post_box - = NODE ("amenity"=="post_box") - GROUP amenity - - TYPE amenity_pub - = NODE AREA ("amenity"=="pub") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_restaurant - = NODE AREA ("amenity"=="restaurant") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_recycling // there are a number of sub types, not yet defined here... - = NODE AREA ("amenity"=="recycling") - GROUP amenity - - TYPE amenity_sauna - = NODE AREA ("amenity"=="sauna") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_school - = NODE AREA ("amenity"=="school") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_taxi - = NODE AREA ("amenity"=="taxi") - GROUP amenity - - TYPE amenity_theatre - = NODE AREA ("amenity"=="theatre") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_toilets - = NODE ("amenity"=="toilets") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_townhall - = NODE AREA ("amenity"=="townhall") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_university - = NODE AREA ("amenity"=="university") - {Name, NameAlt} - ADDRESS POI - GROUP amenity - - TYPE amenity_vending_machine - IGNORE // Too many for mobile? - = NODE ("amenity"=="vending_machine") - {Name, NameAlt} - GROUP amenity - - TYPE amenity_waste_disposal - IGNORE // Too many for mobile? - = NODE ("amenity"=="waste_disposal") - GROUP amenity - - TYPE amenity_waste_basket - IGNORE // Too many for mobile? - = NODE ("amenity"=="waste_basket") - GROUP amenity - - TYPE amenity_watering_place IGNORE - // Too many for mobile? - = NODE ("amenity"=="watering_place") - GROUP amenity - - TYPE amenity_water_point - IGNORE // Too many for mobile? - = NODE ("amenity"=="water_point") - GROUP amenity - - TYPE amenity_ferry_terminal - = NODE AREA ("amenity"=="ferry_terminal") - {Name, NameAlt} - POI - GROUP amenity - - // Everything else is just an 'amenity' for now - TYPE amenity - = NODE AREA (EXISTS "amenity") - {Name, NameAlt} - ADDRESS - GROUP amenity - - // - // Emergencies - // - - TYPE emergency_defibrillator - = NODE AREA ("emergency"=="defibrillator") - {Name, NameAlt} - ADDRESS POI - GROUP emergency - - TYPE emergency_phone - = NODE AREA ("emergency"=="phone") - {Name, NameAlt} - ADDRESS POI - GROUP emergency - - // - // Shops - // - - TYPE shop_alcohol - = NODE AREA ("shop"=="alcohol") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_bakery - = NODE AREA ("shop"=="bakery") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_beauty - = NODE AREA ("shop"=="beauty") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_beverages - = NODE AREA ("shop"=="beverages") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_bicycle - = NODE AREA ("shop"=="bicycle") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_books - = NODE AREA ("shop"=="books") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_boutique - = NODE AREA ("shop"=="boutique") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_butcher - = NODE AREA ("shop"=="butcher") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_car - = NODE AREA ("shop"=="car") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_car_parts - = NODE AREA ("shop"=="car_parts") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_car_repair - = NODE AREA ("shop"=="car_repair") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_chemist - = NODE AREA ("shop"=="chemist") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_clothes - = NODE AREA ("shop"=="clothes") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_computer - = NODE AREA ("shop"=="computer") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_confectionery - = NODE AREA ("shop"=="confectionery") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_convenience - = NODE AREA ("shop"=="convenience") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_copyshop - = NODE AREA ("shop"=="copyshop") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_cosmetics - = NODE AREA ("shop"=="cosmetics") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_deli - = NODE AREA ("shop"=="deli") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_department_store - = NODE AREA ("shop"=="department_store") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_doityourself - = NODE AREA ("shop"=="doityourself") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_dry_cleaning - = NODE AREA ("shop"=="dry_cleaning") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_electronics - = NODE AREA ("shop"=="electronics") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_florist - = NODE AREA ("shop"=="florist") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_funeral_directors - = NODE AREA ("shop"=="funeral_directors") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_furniture - = NODE AREA ("shop"=="furniture") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_garden_centre - = NODE AREA ("shop"=="garden_centre") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_gift - = NODE AREA ("shop"=="gift") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_greengrocer - = NODE AREA ("shop"=="greengrocer") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_hairdresser - = NODE AREA ("shop"=="hairdresser") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_hardware - = NODE AREA ("shop"=="hardware") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_jewelry - = NODE AREA ("shop"=="jewelry") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_kiosk - = NODE AREA ("shop"=="kiosk") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_laundry - = NODE AREA ("shop"=="laundry") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_mall - = NODE AREA ("shop"=="mall") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_mobile_phone - = NODE AREA ("shop"=="mobile_phone") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_motorcycle - = NODE AREA ("shop"=="motorcycle") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_newsagent - = NODE AREA ("shop"=="newsagent") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_optician - = NODE AREA ("shop"=="optician") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_pet - = NODE AREA ("shop"=="pet") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_shoes - = NODE AREA ("shop"=="shoes") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_sports - = NODE AREA ("shop"=="sports") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_stationery - = NODE AREA ("shop"=="stationery") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_supermarket - = NODE AREA ("shop"=="supermarket") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_toys - = NODE AREA ("shop"=="toys") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_travel_agency - = NODE AREA ("shop"=="travel_agency") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_tyres - = NODE AREA ("shop"=="tyres") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_vacant - = NODE AREA ("shop"=="vacant") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_variety_store - = NODE AREA ("shop"=="variety_store") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - TYPE shop_wine - = NODE AREA ("shop"=="wine") - {Name, NameAlt} - ADDRESS POI - GROUP shop - - // Catch the rest of the shops - TYPE shop - = NODE AREA (EXISTS "shop") - {Name, NameAlt} - ADDRESS POI - - // - // Tourism - // - - TYPE tourism_aquarium - = NODE AREA ("tourism"=="aquarium") - {Name, NameAlt} - ADDRESS POI - GROUP tourism - - TYPE tourism_attraction - = NODE AREA ("tourism"=="attraction") - {Name, NameAlt} - ADDRESS POI - GROUP tourism - - TYPE tourism_artwork - = NODE WAY AREA ("tourism"=="artwork") - {Name, NameAlt, Width, Grade, Bridge, Tunnel, Roundabout} - ADDRESS POI - GROUP tourism - - TYPE tourism_camp_site - = NODE AREA ("tourism"=="camp_site") - {Name, NameAlt} - ADDRESS POI - GROUP tourism - - TYPE tourism_caravan_site - = NODE AREA ("tourism"=="caravan_site") - {Name, NameAlt} - ADDRESS POI - GROUP tourism - - TYPE tourism_picnic_site - = NODE AREA ("tourism"=="picnic_site") - {Name, NameAlt} - ADDRESS POI - GROUP tourism - - TYPE tourism_theme_park - = NODE AREA ("tourism"=="theme_park") - {Name, NameAlt} - ADDRESS POI - GROUP tourism - - TYPE tourism_viewpoint - = NODE ("tourism"=="viewpoint") - {Name, NameAlt} - GROUP tourism - - TYPE tourism_zoo - = NODE AREA ("tourism"=="zoo") - {Name, NameAlt} - ADDRESS POI - GROUP tourism - - TYPE tourism_alpine_hut - = NODE AREA ("tourism"=="alpine_hut") - {Name, NameAlt} - ADDRESS - GROUP tourism - - TYPE tourism_chalet - = NODE AREA ("tourism"=="chalet") - {Name, NameAlt} - ADDRESS POI - GROUP tourism - - TYPE tourism_guest_house - = NODE AREA ("tourism"=="guest_house") - {Name, NameAlt} - ADDRESS POI - GROUP tourism - - TYPE tourism_hostel - = NODE AREA ("tourism"=="hostel") - {Name, NameAlt} - ADDRESS POI - GROUP tourism - - TYPE tourism_hotel - = NODE AREA ("tourism"=="hotel") - {Name, NameAlt} - ADDRESS POI - GROUP tourism - - TYPE tourism_information - = NODE AREA ("tourism"=="information") - {Name, NameAlt} - ADDRESS POI - GROUP tourism - - TYPE tourism_motel - = NODE AREA ("tourism"=="motel") - {Name, NameAlt} - ADDRESS POI - GROUP tourism - - TYPE tourism_museum - = NODE AREA ("tourism"=="museum") - {Name, NameAlt} - ADDRESS POI - GROUP tourism - - TYPE tourism - = NODE AREA (EXISTS "tourism") - {Name, NameAlt} - ADDRESS POI - GROUP tourism - - // - // Historic - // - - TYPE historic_castle - = NODE AREA ("historic"=="castle") - {Name, NameAlt} - ADDRESS POI - GROUP historic - - TYPE historic_manor - = NODE AREA ("historic"=="manor") - {Name, NameAlt} - ADDRESS POI - GROUP historic - - TYPE historic_monument - = NODE AREA ("historic"=="monument") - {Name, NameAlt} - ADDRESS POI - GROUP historic - - TYPE historic_memorial_stolperstein - IGNORE // only for "theme" maps? - = NODE ("historic"=="memorial" AND "memorial:type"=="stolperstein") - {Name, NameAlt} - GROUP historic - - TYPE historic_memorial - = NODE AREA ("historic"=="memorial") - {Name, NameAlt} - ADDRESS POI - GROUP historic - - TYPE historic_ruins - = NODE WAY AREA ("historic"=="ruins") - {Name, NameAlt, Width, Grade, Bridge, Tunnel, Roundabout} - ADDRESS POI - GROUP historic - - TYPE historic_archaeological_site - = NODE WAY AREA ("historic"=="archaeological_site") - {Name, NameAlt, Width, Grade, Bridge, Tunnel, Roundabout} - ADDRESS POI - GROUP historic - - TYPE historic_battlefield - = NODE AREA ("historic"=="battlefield") - {Name, NameAlt} - ADDRESS POI MERGE_AREAS - GROUP historic - - TYPE historic_wreck - = NODE AREA ("historic"=="wreck") - {Name, NameAlt} - ADDRESS POI - GROUP historic - - TYPE historic - = NODE AREA (EXISTS "historic") - {Name, NameAlt} - ADDRESS POI - GROUP historic - - // - // Military - // - - TYPE military_airfield - = NODE AREA ("military"=="airfield") - {Name, NameAlt} - ADDRESS - - TYPE military_bunker - = NODE AREA ("military"=="bunker") - {Name, NameAlt} - ADDRESS - - TYPE military_barracks - = NODE AREA ("military"=="barracks") - {Name, NameAlt} - ADDRESS - - TYPE military_danger_area - = NODE AREA ("military"=="danger_area") - {Name, NameAlt} - MERGE_AREAS - - TYPE military_range - = NODE AREA ("military"=="range") - {Name, NameAlt} - MERGE_AREAS - - TYPE military_naval_base - = NODE AREA ("military"=="naval_base") - {Name, NameAlt} - ADDRESS - - // - // Sport - // INFO: We expect that only the "node" variant will appear, since the area version should be - // handled by "building", "pitch" or similar areas. - // See http://wiki.openstreetmap.org/wiki/Key:sport - // - - TYPE sport_building - = AREA (EXISTS "sport" AND EXISTS "building" AND !("building" IN ["no","false","0"])) - {Name, NameAlt} - ADDRESS - - TYPE sport_9pin - = NODE AREA ("sport"=="9pin") - {Name, NameAlt} - ADDRESS - - TYPE sport_10pin - = NODE AREA ("sport"=="10pin") - {Name, NameAlt} - ADDRESS - - TYPE sport_archery - = NODE AREA ("sport"=="archery") - {Name, NameAlt} - ADDRESS - - TYPE sport_athletics - = NODE AREA ("sport"=="athletics") - {Name, NameAlt} - ADDRESS - - TYPE sport_australian_football - = NODE AREA ("sport"=="australian_football") - {Name, NameAlt} - ADDRESS - - TYPE sport_baseball - = NODE AREA ("sport"=="baseball") - {Name, NameAlt} - ADDRESS - - TYPE sport_basketball - = NODE AREA ("sport"=="basketball") - {Name, NameAlt} - ADDRESS - - TYPE sport_beachvolleyball - = NODE AREA ("sport"=="beachvolleyball") - {Name, NameAlt} - ADDRESS - - TYPE sport_boules - = NODE AREA ("sport"=="boules") - {Name, NameAlt} - ADDRESS - - TYPE sport_bowls - = NODE AREA ("sport"=="bowls") - {Name, NameAlt} - ADDRESS - - TYPE sport_canoe - = NODE AREA ("sport"=="canoe") - {Name, NameAlt} - ADDRESS - - TYPE sport_chess - = NODE AREA ("sport"=="chess") - {Name, NameAlt} - ADDRESS - - TYPE sport_climbing - = NODE AREA ("sport"=="climbing") - {Name, NameAlt} - ADDRESS - - TYPE sport_cricket - = NODE AREA ("sport"=="cricket") - {Name, NameAlt} - ADDRESS - - TYPE sport_cricket_nets - = NODE AREA ("sport"=="cricket_nets") - {Name, NameAlt} - ADDRESS - - TYPE sport_croquet - = NODE AREA ("sport"=="croquet") - {Name, NameAlt} - ADDRESS - - TYPE sport_cycling - = NODE AREA ("sport"=="cycling") - {Name, NameAlt} - ADDRESS - - TYPE sport_dog_racing - = NODE AREA ("sport"=="dog_racing") - {Name, NameAlt} - ADDRESS - - TYPE sport_equestrian - = NODE AREA ("sport"=="equestrian") - {Name, NameAlt} - ADDRESS - - TYPE sport_football - = NODE AREA ("sport"=="football") - {Name, NameAlt} - ADDRESS - - TYPE sport_golf - = NODE AREA ("sport"=="golf") - {Name, NameAlt} - ADDRESS - - TYPE sport_gymnastics - = NODE AREA ("sport"=="gymnastics") - {Name, NameAlt} - ADDRESS - - TYPE sport_hockey - = NODE AREA ("sport"=="hockey") - {Name, NameAlt} - ADDRESS - - TYPE sport_horse_racing - = NODE AREA ("sport"=="horse_racing") - {Name, NameAlt} - ADDRESS - - TYPE sport_motor - = NODE AREA ("sport"=="motor") - {Name, NameAlt} - ADDRESS - - TYPE sport_multi - = NODE AREA ("sport"=="multi") - {Name, NameAlt} - ADDRESS - - TYPE sport_orienteering - = NODE AREA ("sport"=="orienteering") - {Name, NameAlt} - ADDRESS - - TYPE sport_paddle_tennis - = NODE AREA ("sport"=="paddle_tennis") - {Name, NameAlt} - ADDRESS - - TYPE sport_pelota - = NODE AREA ("sport"=="pelota") - {Name, NameAlt} - ADDRESS - - TYPE sport_raquet - = NODE AREA ("sport"=="raquet") - {Name, NameAlt} - ADDRESS - - TYPE sport_rowing - = NODE AREA ("sport"=="rowing") - {Name, NameAlt} - ADDRESS - - TYPE sport_rugby - = NODE AREA ("sport"=="rugby") - {Name, NameAlt} - ADDRESS - - TYPE sport_shooting - = NODE AREA ("sport"=="shooting") - {Name, NameAlt} - ADDRESS - - TYPE sport_skating - = NODE AREA ("sport"=="skating") - {Name, NameAlt} - ADDRESS - - TYPE sport_skateboard - = NODE AREA ("sport"=="skateboard") - {Name, NameAlt} - ADDRESS - - TYPE sport_skiing - = NODE AREA ("sport"=="skiing") - {Name, NameAlt} - ADDRESS - - TYPE sport_soccer - = NODE AREA ("sport"=="soccer") - {Name, NameAlt} - ADDRESS - - TYPE sport_swimming - = NODE AREA ("sport"=="swimming") - {Name, NameAlt} - ADDRESS - - TYPE sport_table_tenis - = NODE AREA ("sport"=="table_tenis") - {Name, NameAlt} - ADDRESS - - TYPE sport_team_handball - = NODE AREA ("sport"=="team_handball") - {Name, NameAlt} - ADDRESS - - TYPE sport_tennis - = NODE AREA ("sport"=="tennis") - {Name, NameAlt} - ADDRESS - - TYPE sport_volleyball - = NODE AREA ("sport"=="volleyball") - {Name, NameAlt} - ADDRESS - - TYPE sport - = NODE AREA (EXISTS "sport") - {Name, NameAlt} - ADDRESS - - // - // Power - // - - TYPE power_tower - = NODE ("power"=="tower") - - TYPE power_pole - = NODE ("power"=="pole") - - TYPE power_line - = WAY ("power"=="line") IGNORESEALAND - - TYPE power_minor_line - = WAY ("power"=="minor_line") - - // See http://wiki.openstreetmap.org/wiki/Tag:power%3Dstation - TYPE power_sub_station - = NODE AREA ("power"=="station" OR "power"=="sub_station") - {Name, NameAlt} - ADDRESS - - TYPE power_generator - = NODE AREA ("power"=="generator") - ADDRESS - - // TODO: power_plant, see: http://wiki.openstreetmap.org/wiki/Tag:power%3Dplant - - // - // Regions, locations, cities stuff - // - - // Do not delete the following type, they are required by the GenLocation import step - TYPE boundary_country - = WAY AREA ("boundary"=="administrative" AND "admin_level"=="2") OR - RELATION ("type"=="boundary" AND "boundary"=="administrative" AND "admin_level"=="2") - {Name, NameAlt, AdminLevel} - MULTIPOLYGON IGNORESEALAND - - TYPE boundary_state - = WAY AREA ("boundary"=="administrative" AND "admin_level"=="4") OR - RELATION ("type"=="boundary" AND "boundary"=="administrative" AND "admin_level"=="4") - {Name, NameAlt, AdminLevel} - MULTIPOLYGON IGNORESEALAND - - TYPE boundary_county - = WAY AREA ("boundary"=="administrative" AND "admin_level"=="6") OR - RELATION ("type"=="boundary" AND "boundary"=="administrative" AND "admin_level"=="6") - {Name, NameAlt, AdminLevel} - MULTIPOLYGON IGNORESEALAND - - TYPE boundary_administrative - = WAY AREA ("boundary"=="administrative") OR - RELATION ("type"=="boundary" AND "boundary"=="administrative") - {Name, NameAlt, AdminLevel} - MULTIPOLYGON IGNORESEALAND - - TYPE place_continent - = NODE AREA ("place"=="continent") - {Name, NameAlt, IsIn} - - // Not part of the administrative boundary hierachy? - TYPE place_peninsula IGNORE - = NODE AREA ("place"=="peninsula") - {Name, NameAlt} - - TYPE place_country - = NODE ("place"=="country") - {Name, NameAlt, IsIn} - - TYPE place_state - = NODE AREA ("place"=="state") - {Name, NameAlt, IsIn} - - TYPE place_region - = NODE AREA ("place"=="region") - {Name, NameAlt, IsIn} - - TYPE place_county - = NODE AREA ("place"=="county") - {Name, NameAlt, IsIn} - - // Do not delete the following types, they are required by the GenCityStreet import step - TYPE place_millioncity - = NODE AREA ("place"=="city" AND EXISTS "population" AND "population">1000000) - {Name, NameAlt, IsIn} - ADMIN_REGION - - TYPE place_bigcity - = NODE AREA ("place"=="city" AND EXISTS "population" AND "population">100000) - {Name, NameAlt, IsIn} - ADMIN_REGION - - TYPE place_city - = NODE AREA ("place"=="city") - {Name, NameAlt, IsIn} - ADMIN_REGION - - TYPE place_town - = NODE AREA ("place"=="town") - {Name, NameAlt, IsIn} - ADMIN_REGION - - TYPE place_village - = NODE AREA ("place"=="village") - {Name, NameAlt, IsIn} - ADMIN_REGION - - TYPE place_hamlet - = NODE AREA ("place"=="hamlet") - {Name, NameAlt, IsIn} - ADMIN_REGION - - TYPE place_suburb - = NODE AREA ("place"=="suburb") - {Name, NameAlt, IsIn} - ADMIN_REGION - - TYPE place_locality - = NODE AREA ("place"=="locality") - {Name, NameAlt, IsIn} - POI - - TYPE place_island - = NODE AREA ("place"=="island") - {Name, NameAlt, IsIn} - - TYPE place_islet - = NODE AREA ("place"=="islet") - {Name, NameAlt, IsIn} - - // - // Buildings - // - - TYPE building_garage - = AREA ("building"=="garage") - - TYPE building - = AREA (EXISTS "building" AND !("building" IN ["no","false","0"])) - {Name, NameAlt} - ADDRESS POI - - // Addresses - TYPE address - = NODE AREA ((EXISTS "addr:street" OR EXISTS "addr:place") AND EXISTS "addr:housenumber") - ADDRESS - - // - // - // Barriers - - // Currently not supported by stylesheet - TYPE barrier_hedge - IGNORE - = WAY AREA ("barrier"=="hedge") - - TYPE barrier_fence - = WAY AREA ("barrier"=="fence") - IGNORESEALAND - - TYPE barrier_wall - = WAY AREA ("barrier"=="wall") - - TYPE barrier_ditch - IGNORE - = WAY AREA ("barrier"=="ditch") - - TYPE barrier_retaining_wall - IGNORE - = WAY AREA ("barrier"=="retaining_wall") - IGNORESEALAND - - TYPE barrier_city_wall - = WAY AREA ("barrier"=="city_wall") - {Name, NameAlt, Width} - - TYPE barrier_bollard - IGNORE - = NODE WAY ("barrier"=="bollard") - - TYPE barrier_cycle_barrier - IGNORE - = NODE ("barrier"=="cycle_barrier") - - TYPE barrier_block - IGNORE - = NODE ("barrier"=="block") - - TYPE barrier_cattle_grid - IGNORE - = NODE ("barrier"=="cattle_grid") - - TYPE barrier_toll_booth - IGNORE - = NODE ("barrier"=="toll_booth") - - TYPE barrier_entrance - IGNORE - = NODE ("barrier"=="entrance") - - TYPE barrier_gate - IGNORE - = NODE WAY ("barrier"=="gate") - - TYPE barrier_lift_gate - IGNORE - = NODE ("barrier"=="lift_gate") - - TYPE barrier_stile - IGNORE - = NODE ("barrier"=="stile") - - TYPE barrier_sally_port - IGNORE - = NODE ("barrier"=="sally_port") - - TYPE barrier_kent_carriage_gate - IGNORE - = NODE ("barrier"=="kent_carriage_gate") - - // Types we currently ignore - - // This does increase the size of nodes data too much - TYPE building_entrance IGNORE - = NODE ("building"=="entrance" OR EXISTS "entrance") - - TYPE street_any IGNORE - = RELATION ("type"=="street") - - TYPE tunnel_any IGNORE - = RELATION ("type"=="tunnel") - - TYPE bridge_any IGNORE - = RELATION ("type"=="bridge") - - TYPE dual_carriageway_any IGNORE - = RELATION ("type"=="dual_carriageway") - - TYPE waterway_any IGNORE - = RELATION ("type"=="waterway") - - TYPE wayparts_any IGNORE - = RELATION ("type"=="wayparts") - - TYPE associated_street_any IGNORE - = RELATION ("type"=="associatedStreet") - - TYPE enforcement_any IGNORE - = RELATION ("type"=="enforcement") - - TYPE roadAccess_any IGNORE - = RELATION ("type"=="roadAccess") - - TYPE junction_any IGNORE - = RELATION ("type"=="junction") - - TYPE label_any IGNORE - = RELATION ("type"=="label") - - TYPE composite_Attribute_any IGNORE - = RELATION ("type"=="composite_Attribute") - - TYPE segmented_tag_any IGNORE - = RELATION ("type"=="segmented_tag") - - TYPE building_any IGNORE - = RELATION ("type"=="building") - - TYPE related_building_any IGNORE - = RELATION ("type"=="relatedBuilding") - - TYPE site_any IGNORE - = RELATION ("type"=="site") - - TYPE station_any IGNORE - = RELATION ("type"=="station") - - TYPE railway_halt_any IGNORE - = RELATION ("type"=="railway_halt") - - TYPE line_any IGNORE - = RELATION ("type"=="line") - - TYPE public_transport_any IGNORE - = RELATION ("type"=="public_transport") - - TYPE public_transport_stop_area_group IGNORE - = RELATION ("public_transport"=="stop_area_group") - - TYPE collection_any IGNORE - = RELATION ("type"=="collection") - - TYPE network_any IGNORE - = RELATION ("type"=="network") - - TYPE route_any IGNORE - = RELATION ("type"=="route") - - TYPE routemaster_any IGNORE - = RELATION ("type"=="route_master") - - TYPE superroute_any IGNORE - = RELATION ("type"=="superroute") - - TYPE netzwolf_route_any IGNORE - = RELATION ("type"=="netzwolf-route") - - TYPE netzwolf_routemaster_any IGNORE - = RELATION ("type"=="netzwolf:route_master") - - TYPE address IGNORE - = RELATION ("type"=="address") - - TYPE border IGNORE - = AREA RELATION ("type"=="border") - - TYPE postal_code IGNORE - = AREA RELATION ("type"=="postal_code") - - TYPE organization IGNORE - = AREA RELATION ("type"=="organization") - - TYPE boundary_civil IGNORE - = AREA ("boundary"=="civil") - - TYPE boundary_national_park IGNORE - = WAY AREA RELATION ("boundary"=="national_park") - - TYPE boundary_maritime IGNORE - = AREA RELATION ("boundary"=="maritime") - - TYPE boundary_political IGNORE - = AREA ("boundary"=="political") - - TYPE boundary_postal_code IGNORE - = AREA RELATION ("boundary"=="postal_code") - - TYPE place_any IGNORE - = RELATION ("type"=="place") - - TYPE amt_any IGNORE - = RELATION ("type"=="amt") - - TYPE suburb_any IGNORE - = RELATION ("type"=="suburb") - - TYPE city_any IGNORE - = RELATION ("type"=="city") - - TYPE town_any IGNORE - = RELATION ("type"=="town") - - TYPE municipality_any IGNORE - = RELATION ("type"=="municipality") - - TYPE district_any IGNORE - = RELATION ("type"=="district") - - TYPE county_any IGNORE - = RELATION ("type"=="county") - - TYPE state_any IGNORE - = RELATION ("type"=="state") - - TYPE tmc_any IGNORE - = AREA (EXISTS "TMC:cid_58:tabcd_1:Class") OR RELATION ("type"=="tmc" OR "type"=="TMC") - - TYPE lez_any IGNORE - = RELATION ("type"=="LEZ") - - TYPE group_any IGNORE - = RELATION ("type"=="group") - - TYPE set_any IGNORE - = RELATION ("type"=="set") - - TYPE compound_any IGNORE - = RELATION ("type"=="compound") -END diff --git a/scripts/tags/generate_mapstyle.py b/scripts/tags/generate_mapstyle.py index b6974f7..292231f 100755 --- a/scripts/tags/generate_mapstyle.py +++ b/scripts/tags/generate_mapstyle.py @@ -5,8 +5,7 @@ db = sqlite3.connect('taginfo-db.db') c = db.cursor() -mapost = "" -whitelist = "" +prioritylist = "" keyvals = [] for r in c.execute("select key,value from tags where key='shop' order by count_all desc limit 50"): @@ -22,13 +21,8 @@ for r in keyvals: key, value = r - mapost += ' TYPE ' + key + '_' + value + '\n' - mapost += ' = NODE AREA ("%s"=="%s")\n' % (key, value) - mapost += ' {Name, NameAlt}\n ADDRESS POI\n GROUP ' + key + '\n\n' + prioritylist += key + '_' + value + '\n' - whitelist += key + '_' + value + '\n' - -print(mapost) -print(whitelist) +print(prioritylist)