Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add gpkg generator #130

Draft
wants to merge 7 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,41 @@ Options:
--help Show this message and exit.
```

### Generate geopackage from table definitions

```bash
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
```bash
```text

geopackage-validator generate-gpkg --help
Usage: geopackage-validator generate-gpkg [OPTIONS]

Generate an empty geopackage based on a geopackage validator table
definition. Provide the table definitions with the --table-definitions-path
parameter. The generated geopackage will be valid except for the fact that
it will be empty.

Options:
--gpkg-path PATH Path pointing to the geopackage.gpkg file
[env var: GPKG_PATH]
-t, --table-definitions-path FILE
Path pointing to the table-definitions JSON
or YAML file (generate this file by calling
the generate-definitions command)
[required]
--validations-path FILE Path pointing to the set of validations to
run. If validations-path and validations are
not given, validate runs all validations
[env var: VALIDATIONS_FILE]
--validations TEXT Comma-separated list of validations to run
(e.g. --validations RQ1,RQ2,RQ3). If
validations-path and validations are not
given, validate runs all validations [env
var: VALIDATIONS]
-v, --verbosity LVL Either CRITICAL, ERROR, WARNING, INFO or
DEBUG
--yaml Output yaml
--validate Validate after generation
--help Show this message and exit.
```

## Local development

We advise using docker-compose for local development. This allows live editing and testing code with the correct gdal/ogr version with spatialite 5.0.0.
Expand Down
138 changes: 138 additions & 0 deletions geopackage_validator/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,144 @@ def geopackage_validator_command_generate_table_definitions(
sys.exit(1)


@cli.command(
name="generate-gpkg",
help=(
"Generate an empty geopackage based on a geopackage validator table definition. Provide the table definitions "
"with the --table-definitions-path parameter. The generated geopackage will be valid except for the fact that "
"it will be empty."
),
)
@click.option(
"--gpkg-path",
envvar="GPKG_PATH",
required=False,
default=None,
show_envvar=True,
help="Path pointing to the geopackage.gpkg file",
type=click.types.Path(
file_okay=False,
dir_okay=False,
readable=False,
writable=False,
resolve_path=False,
allow_dash=False,
),
)
@click.option(
"-t",
"--table-definitions-path",
show_envvar=True,
required=True,
default=None,
help=(
"Path pointing to the table-definitions JSON or YAML file (generate this file by calling the "
"generate-definitions command)"
),
type=click.types.Path(
exists=False,
file_okay=True,
dir_okay=False,
readable=True,
writable=False,
allow_dash=False,
),
)
@click.option(
"--validations-path",
show_envvar=True,
required=False,
default=None,
envvar="VALIDATIONS_FILE",
help=(
"Path pointing to the set of validations to run. If validations-path and validations are not given, validate "
"runs all validations"
),
type=click.types.Path(
exists=True,
file_okay=True,
dir_okay=False,
readable=True,
writable=False,
allow_dash=False,
),
)
@click.option(
"--validations",
show_envvar=True,
required=False,
default="",
envvar="VALIDATIONS",
help=(
"Comma-separated list of validations to run (e.g. --validations RQ1,RQ2,RQ3). If validations-path and "
"validations are not given, validate runs all validations"
),
)
@click_log.simple_verbosity_option(logger)
@click.option(
"--yaml",
required=False,
is_flag=True,
help="Output yaml",
)
@click.option(
"--validate",
"do_validate",
required=False,
is_flag=True,
help="Validate after generation",
default=False,
)
def geopackage_validator_command_generate_gpkg(
gpkg_path,
table_definitions_path,
validations_path,
validations,
yaml,
do_validate,
):
gpkg_path_not_exists = gpkg_path is None
if gpkg_path_not_exists:
logger.error("Give a valid --gpkg-path or (/vsi)s3 location")
sys.exit(1)
try:
generate.generate_empty_geopackage(gpkg_path, table_definitions_path)
except Exception:
logger.exception("Error while generating table definitions")
sys.exit(1)

do_validate = do_validate or validations or validations_path is not None

if do_validate:
if not (validations or validations_path is not None):
validations = ",".join(
[
k
for k in validate.get_validation_descriptions(False).keys()
if k != "RQ2"
]
)
start_time = datetime.now()
duration_start = time.monotonic()
filename = gpkg_path
results, validations_executed, success = validate.validate(
gpkg_path,
table_definitions_path,
validations_path,
validations,
)
duration_seconds = time.monotonic() - duration_start
output.log_output(
filename=filename,
results=results,
validations_executed=validations_executed,
start_time=start_time,
duration_seconds=duration_seconds,
success=success,
as_yaml=yaml,
)


@cli.command(
name="show-validations",
help="Show all the possible validations that can be executed in the validate command.",
Expand Down
90 changes: 89 additions & 1 deletion geopackage_validator/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import Dict, List, Union
from collections import OrderedDict

from osgeo import ogr
from osgeo import ogr, osr
from osgeo.ogr import DataSource

from geopackage_validator import utils
Expand All @@ -14,6 +14,37 @@
TableDefinition = Dict[str, Union[int, Dict[str, ColumnDefinition]]]


OGR_GEOMETRY_TYPES = {
"POINT": ogr.wkbPoint,
"LINESTRING": ogr.wkbLineString,
"POLYGON": ogr.wkbPolygon,
"MULTIPOINT": ogr.wkbMultiPoint,
"MULTILINESTRING": ogr.wkbMultiLineString,
"MULTIPOLYGON": ogr.wkbMultiPolygon,
}


OGR_FIELD_TYPES = dict(
**OGR_GEOMETRY_TYPES,
**{
"DATE": ogr.OFTDate,
"DATETIME": ogr.OFTDateTime,
"TIME": ogr.OFTTime,
"INTEGER": ogr.OFTInteger,
"INTEGER64": ogr.OFTInteger64,
"REAL": ogr.OFTReal,
"STRING": ogr.OFTString,
"BINARY": ogr.OFTBinary,
"INTEGERLIST": ogr.OFTIntegerList,
"INTEGER64LIST": ogr.OFTInteger64List,
"REALLIST": ogr.OFTRealList,
"STRINGLIST": ogr.OFTStringList,
"WIDESTRING": ogr.OFTWideString,
"WIDESTRINGLIST": ogr.OFTWideStringList,
},
)


def columns_definition(table, geometry_column) -> ColumnDefinition:
layer_definition = table.GetLayerDefn()

Expand Down Expand Up @@ -83,10 +114,67 @@ def generate_table_definitions(dataset: DataSource) -> TableDefinition:
return result


def generate_geopackage_from_table_definition(
dataset: DataSource, table_definition: TableDefinition
):
projection = int(table_definition["projection"])
tables = table_definition["tables"]

srs = osr.SpatialReference()
srs.ImportFromEPSG(projection)

for table in tables:
try:
columns = {c["name"]: c["type"] for c in table["columns"]}
except KeyError:
try:
columns = {c["name"]: c["data_type"] for c in table["columns"]}
except KeyError:
raise ValueError(
f"Table defintion is incomplete or its version is too old"
)
try:
geometry_type = OGR_GEOMETRY_TYPES[columns[table["geometry_column"]]]
except KeyError:
raise ValueError(f"Unknown geometry type for table {table['name']}")

layer = dataset.CreateLayer(table["name"], srs=srs, geom_type=geometry_type)
try:
fields = [
ogr.FieldDefn(column["name"], OGR_FIELD_TYPES[column["type"]])
for column in table["columns"]
if column["name"] != table["geometry_column"]
]
except KeyError:
try:
fields = [
ogr.FieldDefn(column["name"], OGR_FIELD_TYPES[column["data_type"]])
for column in table["columns"]
if column["name"] != table["geometry_column"]
]
except KeyError:
raise ValueError(f"Unknown field type for table {table['name']}")

layer.CreateFields(fields)


def generate_definitions_for_path(gpkg_path: str) -> TableDefinition:
"""Starts the geopackage validation."""
utils.check_gdal_version()

dataset = utils.open_dataset(gpkg_path)

return generate_table_definitions(dataset)


def generate_empty_geopackage(gpkg_path: str, table_definition_path: str):
utils.check_gdal_version()

dataset = utils.create_dataset(gpkg_path)
table_definition = load_table_definitions(table_definition_path)

return generate_geopackage_from_table_definition(dataset, table_definition)


def load_table_definitions(table_definitions_path) -> TableDefinition:
return utils.load_config(table_definitions_path)
28 changes: 28 additions & 0 deletions geopackage_validator/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,34 @@ def silence_gdal():
return dataset


def create_dataset(filename=None, error_handler=None):
if error_handler is not None:
gdal.UseExceptions()
gdal.PushErrorHandler(error_handler)

@contextmanager
def silence_gdal():
if error_handler is None:
warnings.warn("cannot silence gdal without error handler")
return
gdal.PopErrorHandler()
yield
gdal.PushErrorHandler(error_handler)

driver = ogr.GetDriverByName("GPKG")

dataset = None
try:
dataset = driver.CreateDataSource(filename)
except Exception as e:
error_handler(gdal.CE_Failure, 0, e.args[0])

if dataset is not None:
dataset.silence_gdal = silence_gdal

return dataset


def check_gdal_version():
"""This method checks if GDAL has the right version and exits with an error otherwise."""
version_num = int(gdal.VersionInfo("VERSION_NUM"))
Expand Down
6 changes: 1 addition & 5 deletions geopackage_validator/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from osgeo import gdal

from geopackage_validator.generate import TableDefinition
from geopackage_validator.generate import TableDefinition, load_table_definitions
from geopackage_validator import validations as validation
from geopackage_validator.validations.validator import (
Validator,
Expand Down Expand Up @@ -226,7 +226,3 @@ def get_validator_classes():
if issubclass(getattr(validation, validator), Validator)
]
return sorted(validator_classes, key=lambda v: (v.level, v.code))


def load_table_definitions(table_definitions_path) -> TableDefinition:
return utils.load_config(table_definitions_path)
2 changes: 1 addition & 1 deletion tests/data/test_allcorrect_definition.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
geopackage_validator_version: 0.5.9
projection: 28992,
projection: 28992
tables:
- name: test_allcorrect
geometry_column: geom
Expand Down
Loading
Loading