diff --git a/README.md b/README.md index 853cc51..5b53b28 100644 --- a/README.md +++ b/README.md @@ -396,6 +396,41 @@ Options: --help Show this message and exit. ``` +### Generate geopackage from table definitions + +```bash +geopackage-validator generate-gpkg --help +Usage: geopackage-validator generate-gpkg [OPTIONS] + + Generate an empty geopackage based on a geopackage validator table + definition. Provide the table definitions with the --table-definitions-path + parameter. The generated geopackage will be valid except for the fact that + it will be empty. + +Options: + --gpkg-path PATH Path pointing to the geopackage.gpkg file + [env var: GPKG_PATH] + -t, --table-definitions-path FILE + Path pointing to the table-definitions JSON + or YAML file (generate this file by calling + the generate-definitions command) + [required] + --validations-path FILE Path pointing to the set of validations to + run. If validations-path and validations are + not given, validate runs all validations + [env var: VALIDATIONS_FILE] + --validations TEXT Comma-separated list of validations to run + (e.g. --validations RQ1,RQ2,RQ3). If + validations-path and validations are not + given, validate runs all validations [env + var: VALIDATIONS] + -v, --verbosity LVL Either CRITICAL, ERROR, WARNING, INFO or + DEBUG + --yaml Output yaml + --validate Validate after generation + --help Show this message and exit. +``` + ## Local development We advise using docker-compose for local development. This allows live editing and testing code with the correct gdal/ogr version with spatialite 5.0.0. diff --git a/geopackage_validator/cli.py b/geopackage_validator/cli.py index e0ae4c4..f09a532 100644 --- a/geopackage_validator/cli.py +++ b/geopackage_validator/cli.py @@ -416,6 +416,144 @@ def geopackage_validator_command_generate_table_definitions( sys.exit(1) +@cli.command( + name="generate-gpkg", + help=( + "Generate an empty geopackage based on a geopackage validator table definition. Provide the table definitions " + "with the --table-definitions-path parameter. The generated geopackage will be valid except for the fact that " + "it will be empty." + ), +) +@click.option( + "--gpkg-path", + envvar="GPKG_PATH", + required=False, + default=None, + show_envvar=True, + help="Path pointing to the geopackage.gpkg file", + type=click.types.Path( + file_okay=False, + dir_okay=False, + readable=False, + writable=False, + resolve_path=False, + allow_dash=False, + ), +) +@click.option( + "-t", + "--table-definitions-path", + show_envvar=True, + required=True, + default=None, + help=( + "Path pointing to the table-definitions JSON or YAML file (generate this file by calling the " + "generate-definitions command)" + ), + type=click.types.Path( + exists=False, + file_okay=True, + dir_okay=False, + readable=True, + writable=False, + allow_dash=False, + ), +) +@click.option( + "--validations-path", + show_envvar=True, + required=False, + default=None, + envvar="VALIDATIONS_FILE", + help=( + "Path pointing to the set of validations to run. If validations-path and validations are not given, validate " + "runs all validations" + ), + type=click.types.Path( + exists=True, + file_okay=True, + dir_okay=False, + readable=True, + writable=False, + allow_dash=False, + ), +) +@click.option( + "--validations", + show_envvar=True, + required=False, + default="", + envvar="VALIDATIONS", + help=( + "Comma-separated list of validations to run (e.g. --validations RQ1,RQ2,RQ3). If validations-path and " + "validations are not given, validate runs all validations" + ), +) +@click_log.simple_verbosity_option(logger) +@click.option( + "--yaml", + required=False, + is_flag=True, + help="Output yaml", +) +@click.option( + "--validate", + "do_validate", + required=False, + is_flag=True, + help="Validate after generation", + default=False, +) +def geopackage_validator_command_generate_gpkg( + gpkg_path, + table_definitions_path, + validations_path, + validations, + yaml, + do_validate, +): + gpkg_path_not_exists = gpkg_path is None + if gpkg_path_not_exists: + logger.error("Give a valid --gpkg-path or (/vsi)s3 location") + sys.exit(1) + try: + generate.generate_empty_geopackage(gpkg_path, table_definitions_path) + except Exception: + logger.exception("Error while generating table definitions") + sys.exit(1) + + do_validate = do_validate or validations or validations_path is not None + + if do_validate: + if not (validations or validations_path is not None): + validations = ",".join( + [ + k + for k in validate.get_validation_descriptions(False).keys() + if k != "RQ2" + ] + ) + start_time = datetime.now() + duration_start = time.monotonic() + filename = gpkg_path + results, validations_executed, success = validate.validate( + gpkg_path, + table_definitions_path, + validations_path, + validations, + ) + duration_seconds = time.monotonic() - duration_start + output.log_output( + filename=filename, + results=results, + validations_executed=validations_executed, + start_time=start_time, + duration_seconds=duration_seconds, + success=success, + as_yaml=yaml, + ) + + @cli.command( name="show-validations", help="Show all the possible validations that can be executed in the validate command.", diff --git a/geopackage_validator/generate.py b/geopackage_validator/generate.py index 8934e69..f709caa 100644 --- a/geopackage_validator/generate.py +++ b/geopackage_validator/generate.py @@ -2,7 +2,7 @@ from typing import Dict, List, Union from collections import OrderedDict -from osgeo import ogr +from osgeo import ogr, osr from osgeo.ogr import DataSource from geopackage_validator import utils @@ -14,6 +14,37 @@ TableDefinition = Dict[str, Union[int, Dict[str, ColumnDefinition]]] +OGR_GEOMETRY_TYPES = { + "POINT": ogr.wkbPoint, + "LINESTRING": ogr.wkbLineString, + "POLYGON": ogr.wkbPolygon, + "MULTIPOINT": ogr.wkbMultiPoint, + "MULTILINESTRING": ogr.wkbMultiLineString, + "MULTIPOLYGON": ogr.wkbMultiPolygon, +} + + +OGR_FIELD_TYPES = dict( + **OGR_GEOMETRY_TYPES, + **{ + "DATE": ogr.OFTDate, + "DATETIME": ogr.OFTDateTime, + "TIME": ogr.OFTTime, + "INTEGER": ogr.OFTInteger, + "INTEGER64": ogr.OFTInteger64, + "REAL": ogr.OFTReal, + "STRING": ogr.OFTString, + "BINARY": ogr.OFTBinary, + "INTEGERLIST": ogr.OFTIntegerList, + "INTEGER64LIST": ogr.OFTInteger64List, + "REALLIST": ogr.OFTRealList, + "STRINGLIST": ogr.OFTStringList, + "WIDESTRING": ogr.OFTWideString, + "WIDESTRINGLIST": ogr.OFTWideStringList, + }, +) + + def columns_definition(table, geometry_column) -> ColumnDefinition: layer_definition = table.GetLayerDefn() @@ -83,6 +114,50 @@ def generate_table_definitions(dataset: DataSource) -> TableDefinition: return result +def generate_geopackage_from_table_definition( + dataset: DataSource, table_definition: TableDefinition +): + projection = int(table_definition["projection"]) + tables = table_definition["tables"] + + srs = osr.SpatialReference() + srs.ImportFromEPSG(projection) + + for table in tables: + try: + columns = {c["name"]: c["type"] for c in table["columns"]} + except KeyError: + try: + columns = {c["name"]: c["data_type"] for c in table["columns"]} + except KeyError: + raise ValueError( + f"Table defintion is incomplete or its version is too old" + ) + try: + geometry_type = OGR_GEOMETRY_TYPES[columns[table["geometry_column"]]] + except KeyError: + raise ValueError(f"Unknown geometry type for table {table['name']}") + + layer = dataset.CreateLayer(table["name"], srs=srs, geom_type=geometry_type) + try: + fields = [ + ogr.FieldDefn(column["name"], OGR_FIELD_TYPES[column["type"]]) + for column in table["columns"] + if column["name"] != table["geometry_column"] + ] + except KeyError: + try: + fields = [ + ogr.FieldDefn(column["name"], OGR_FIELD_TYPES[column["data_type"]]) + for column in table["columns"] + if column["name"] != table["geometry_column"] + ] + except KeyError: + raise ValueError(f"Unknown field type for table {table['name']}") + + layer.CreateFields(fields) + + def generate_definitions_for_path(gpkg_path: str) -> TableDefinition: """Starts the geopackage validation.""" utils.check_gdal_version() @@ -90,3 +165,16 @@ def generate_definitions_for_path(gpkg_path: str) -> TableDefinition: dataset = utils.open_dataset(gpkg_path) return generate_table_definitions(dataset) + + +def generate_empty_geopackage(gpkg_path: str, table_definition_path: str): + utils.check_gdal_version() + + dataset = utils.create_dataset(gpkg_path) + table_definition = load_table_definitions(table_definition_path) + + return generate_geopackage_from_table_definition(dataset, table_definition) + + +def load_table_definitions(table_definitions_path) -> TableDefinition: + return utils.load_config(table_definitions_path) diff --git a/geopackage_validator/utils.py b/geopackage_validator/utils.py index 403b392..5fee073 100644 --- a/geopackage_validator/utils.py +++ b/geopackage_validator/utils.py @@ -69,6 +69,34 @@ def silence_gdal(): return dataset +def create_dataset(filename=None, error_handler=None): + if error_handler is not None: + gdal.UseExceptions() + gdal.PushErrorHandler(error_handler) + + @contextmanager + def silence_gdal(): + if error_handler is None: + warnings.warn("cannot silence gdal without error handler") + return + gdal.PopErrorHandler() + yield + gdal.PushErrorHandler(error_handler) + + driver = ogr.GetDriverByName("GPKG") + + dataset = None + try: + dataset = driver.CreateDataSource(filename) + except Exception as e: + error_handler(gdal.CE_Failure, 0, e.args[0]) + + if dataset is not None: + dataset.silence_gdal = silence_gdal + + return dataset + + def check_gdal_version(): """This method checks if GDAL has the right version and exits with an error otherwise.""" version_num = int(gdal.VersionInfo("VERSION_NUM")) diff --git a/geopackage_validator/validate.py b/geopackage_validator/validate.py index 0ee814a..2e97f54 100644 --- a/geopackage_validator/validate.py +++ b/geopackage_validator/validate.py @@ -5,7 +5,7 @@ from osgeo import gdal -from geopackage_validator.generate import TableDefinition +from geopackage_validator.generate import TableDefinition, load_table_definitions from geopackage_validator import validations as validation from geopackage_validator.validations.validator import ( Validator, @@ -226,7 +226,3 @@ def get_validator_classes(): if issubclass(getattr(validation, validator), Validator) ] return sorted(validator_classes, key=lambda v: (v.level, v.code)) - - -def load_table_definitions(table_definitions_path) -> TableDefinition: - return utils.load_config(table_definitions_path) diff --git a/tests/data/test_allcorrect_definition.yml b/tests/data/test_allcorrect_definition.yml index 6b88317..4159ffc 100644 --- a/tests/data/test_allcorrect_definition.yml +++ b/tests/data/test_allcorrect_definition.yml @@ -1,5 +1,5 @@ geopackage_validator_version: 0.5.9 -projection: 28992, +projection: 28992 tables: - name: test_allcorrect geometry_column: geom diff --git a/tests/test_cli.py b/tests/test_cli.py index 37e485e..4db0d99 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,4 +1,5 @@ import json +from pathlib import Path from click.testing import CliRunner @@ -56,6 +57,44 @@ def test_generate_definitions_with_gpkg(): assert json.loads(result.output) == expected +def test_generate_gpkg_with_definitions(tmp_path): + gpkg_dir = tmp_path / "gpkg" + gpkg_dir.mkdir() + + test_gpkg_path = gpkg_dir / "test.gpkg" + + runner = CliRunner() + result = runner.invoke( + cli, + [ + "generate-gpkg", + "--gpkg-path", + f"{test_gpkg_path}", + "--table-definitions-path", + "tests/data/test_allcorrect_definition.yml", + ], + ) + + print(result.stdout) + + assert result.exit_code == 0 + + result = runner.invoke( + cli, ["generate-definitions", "--gpkg-path", f"{test_gpkg_path}"] + ) + + expected = json.loads( + Path("tests/data/test_allcorrect_definition.json").read_text() + ) + validation_result = json.loads(result.output) + + del expected["geopackage_validator_version"] + del validation_result["geopackage_validator_version"] + + assert result.exit_code == 0 + assert validation_result == expected + + def test_generate_definitions_with_ndimension_geometries(): runner = CliRunner() result = runner.invoke( diff --git a/tests/validations/test_table_definitions_check.py b/tests/validations/test_table_definitions_check.py index 6efbbc4..8cc2c77 100644 --- a/tests/validations/test_table_definitions_check.py +++ b/tests/validations/test_table_definitions_check.py @@ -1,5 +1,7 @@ -from geopackage_validator.generate import generate_definitions_for_path -from geopackage_validator.validate import load_table_definitions +from geopackage_validator.generate import ( + generate_definitions_for_path, + load_table_definitions, +) from geopackage_validator.validations.table_definitions_check import ( TableDefinitionValidator, TableDefinitionValidatorV0,