From 5c4dcc1aaa2f6578ddd89c95768e0cd745dce4a0 Mon Sep 17 00:00:00 2001 From: Jochen Topf Date: Tue, 24 Oct 2023 13:59:55 +0200 Subject: [PATCH 1/2] Use tags = NULL in middle tables if object doesn't have any tags This doesn't make much of a difference for the ways and rels table, but if we store all nodes in the database, it does make a huge difference, because most nodes don't have any tags. For a current planet, disk usage for the nodes table goes from 476 GB down to 409 GB saving 67 GB or nearly 15%. Additionally it makes use of that table simpler. If you want to do any queries on tags, you need an index on the tags column on the nodes/ways/rels tables like this: CREATE INDEX ON planet_osm_ways USING gin (tags); But that is wasteful, because of the empty tags. We probably want to generate them as CREATE INDEX ON planet_osm_ways USING gin (tags) WHERE tags != '{}'::jsonb; But now all queries on those tables have to include that extra condition so that the query planner will use the index. SELECT * FROM planet_osm_ways WHERE tags ? 'highway' AND tags != '{}'::jsonb; If we use NULLs, the index can be created as: CREATE INDEX ON planet_osm_ways USING gin (tags) WHERE tags IS NOT NULL; And now the query becomes simpler, because the NOT NULL is automatically taken into account by the query planner: SELECT * FROM planet_osm_ways WHERE tags ? 'highway'; Note that this is an incompatible change to the new format middle tables, but they are still marked as experimental, so we can do this. --- src/middle-pgsql.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/middle-pgsql.cpp b/src/middle-pgsql.cpp index 6daed404c..e7ca279a1 100644 --- a/src/middle-pgsql.cpp +++ b/src/middle-pgsql.cpp @@ -323,6 +323,10 @@ template void pgsql_parse_json_tags(char const *string, osmium::memory::Buffer *buffer, T *obuilder) { + if (*string == '\0') { // NULL + return; + } + auto const tags = nlohmann::json::parse(string); if (!tags.is_object()) { throw std::runtime_error{"Database format for tags invalid."}; @@ -613,6 +617,10 @@ void middle_pgsql_t::copy_attributes(osmium::OSMObject const &obj) void middle_pgsql_t::copy_tags(osmium::OSMObject const &obj) { if (m_store_options.db_format == 2) { + if (obj.tags().empty()) { + m_db_copy.add_null_column(); + return; + } json_writer_t writer; tags_to_json(obj.tags(), &writer); m_db_copy.add_column(writer.json()); @@ -1464,7 +1472,7 @@ static table_sql sql_for_nodes_format2(middle_pgsql_options const &options) " lat int4 NOT NULL," " lon int4 NOT NULL," "{attribute_columns_definition}" - " tags jsonb NOT NULL" + " tags jsonb" ") {data_tablespace}"; sql.prepare_queries = { @@ -1530,7 +1538,7 @@ static table_sql sql_for_ways_format2(middle_pgsql_options const &options) " id int8 PRIMARY KEY {using_tablespace}," "{attribute_columns_definition}" " nodes int8[] NOT NULL," - " tags jsonb NOT NULL" + " tags jsonb" ") {data_tablespace}"; sql.prepare_queries = {"PREPARE get_way(int8) AS" @@ -1601,7 +1609,7 @@ static table_sql sql_for_relations_format2() " id int8 PRIMARY KEY {using_tablespace}," "{attribute_columns_definition}" " members jsonb NOT NULL," - " tags jsonb NOT NULL" + " tags jsonb" ") {data_tablespace}"; sql.prepare_queries = {"PREPARE get_rel(int8) AS" From 5b25afede97417477c325134cb059b685f1914d1 Mon Sep 17 00:00:00 2001 From: Jochen Topf Date: Sat, 28 Oct 2023 14:53:41 +0200 Subject: [PATCH 2/2] Allow NULL values in new middle format members list This makes osm2pgsql a bit more future proof by allowing the list of members (which is encoded as JSON in the new middle format) to be empty, i.e. to contain NULL. We currently don't write empty member lists as NULL but as an empty JSON list, but if we change this in the future, older versions of osm2pgsql will be able to read this correctly. --- src/middle-pgsql.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/middle-pgsql.cpp b/src/middle-pgsql.cpp index e7ca279a1..a673472a5 100644 --- a/src/middle-pgsql.cpp +++ b/src/middle-pgsql.cpp @@ -443,6 +443,10 @@ template void pgsql_parse_json_members(char const *string, osmium::memory::Buffer *buffer, T *obuilder) { + if (*string == '\0') { // NULL + return; + } + osmium::builder::RelationMemberListBuilder builder{*buffer, obuilder}; member_list_json_builder parser{&builder}; nlohmann::json::sax_parse(string, &parser);