diff --git a/geopackage_validator/cli.py b/geopackage_validator/cli.py index e0ae4c4..c32684f 100644 --- a/geopackage_validator/cli.py +++ b/geopackage_validator/cli.py @@ -2,10 +2,10 @@ """Main CLI entry for the Geopackage validator tool.""" # Setup logging before package imports. import logging -from datetime import datetime -from pathlib import Path import sys import time +from datetime import datetime +from pathlib import Path import click import click_log @@ -302,6 +302,13 @@ def geopackage_validator_command( is_flag=True, help="Output yaml", ) +@click.option( + "--with-indexes-and-fks", + default=False, + required=False, + is_flag=True, + help="Include indexes (and unique constraints) and foreign keys in the definitions", +) @click.option( "--s3-endpoint-no-protocol", envvar="S3_ENDPOINT_NO_PROTOCOL", @@ -367,17 +374,18 @@ def geopackage_validator_command( ) @click_log.simple_verbosity_option(logger) def geopackage_validator_command_generate_table_definitions( - gpkg_path, - yaml, - s3_endpoint_no_protocol, - s3_access_key, - s3_secret_key, - s3_bucket, - s3_key, - s3_secure, - s3_virtual_hosting, - s3_signing_region, - s3_no_sign_request, + gpkg_path: Path, + yaml: bool, + with_indexes_and_fks: bool, + s3_endpoint_no_protocol: str, + s3_access_key: str, + s3_secret_key: str, + s3_bucket: str, + s3_key: str, + s3_secure: bool, + s3_virtual_hosting: bool, + s3_signing_region: str, + s3_no_sign_request: bool, ): gpkg_path_not_exists = s3_endpoint_no_protocol is None and ( gpkg_path is None @@ -399,7 +407,9 @@ def geopackage_validator_command_generate_table_definitions( s3_signing_region=s3_signing_region, s3_no_sign_request=s3_no_sign_request, ) - definitionlist = generate.generate_definitions_for_path(gpkg_path) + definitionlist = generate.generate_definitions_for_path( + gpkg_path, with_indexes_and_fks + ) else: with s3.minio_resource( s3_endpoint_no_protocol, @@ -409,7 +419,9 @@ def geopackage_validator_command_generate_table_definitions( s3_key, s3_secure, ) as localfilename: - definitionlist = generate.generate_definitions_for_path(localfilename) + definitionlist = generate.generate_definitions_for_path( + localfilename, with_indexes_and_fks + ) output.print_output(definitionlist, yaml) except Exception: logger.exception("Error while generating table definitions") diff --git a/geopackage_validator/generate.py b/geopackage_validator/generate.py index 3fbd116..2fcce9b 100644 --- a/geopackage_validator/generate.py +++ b/geopackage_validator/generate.py @@ -1,20 +1,24 @@ import logging -from typing import List +from typing import List, Optional, Dict -from osgeo.ogr import DataSource +from osgeo.ogr import DataSource, Layer from geopackage_validator import __version__ from geopackage_validator import utils from geopackage_validator.models import ( ColumnDefinition, + ColumnMapping, + ForeignKeyDefinition, + IndexDefinition, TableDefinition, TablesDefinition, ) +from geopackage_validator.utils import group_by logger = logging.getLogger(__name__) -def columns_definition(table, geometry_column) -> List[ColumnDefinition]: +def column_definitions(table, geometry_column) -> List[ColumnDefinition]: layer_definition = table.GetLayerDefn() assert layer_definition, f'Invalid Layer {"" if not table else table.GetName()}' @@ -40,8 +44,85 @@ def fid_column_definition(table) -> List[ColumnDefinition]: return [ColumnDefinition(name=name, type="INTEGER")] +def get_index_definitions( + dataset: DataSource, table_name: str +) -> List[IndexDefinition]: + index_definitions: List[IndexDefinition] = [] + index_list = dataset.ExecuteSQL( + f"select name, \"unique\", origin from pragma_index_list('{table_name}');" + ) + pk_in_index_list = False + for index_listing in index_list: + pk_in_index_list = pk_in_index_list or index_listing["origin"] == "pk" + index_definitions.append( + IndexDefinition( + columns=tuple(get_index_column_names(dataset, index_listing["name"])), + unique=bool(int(index_listing["unique"])), + ) + ) + dataset.ReleaseResultSet(index_list) + index_definitions = sorted(index_definitions, key=lambda d: d.columns) + + if not pk_in_index_list: + pk_index = get_pk_index(dataset, table_name) + if pk_index is not None: + index_definitions.insert(0, pk_index) + + return index_definitions + + +def get_pk_index(dataset: DataSource, table_name: str) -> Optional[IndexDefinition]: + pk_columns = dataset.ExecuteSQL( + f"select name from pragma_table_info('{table_name}') where pk;" + ) + column_names = tuple(r["name"] for r in pk_columns) + if len(column_names) == 0: + return None + return IndexDefinition(columns=column_names, unique=True) + + +def get_index_column_names(dataset: DataSource, index_name: str) -> List[str]: + index_info = dataset.ExecuteSQL( + f"select name from pragma_index_info('{index_name}');" + ) + column_names: List[str] = [r["name"] for r in index_info] + dataset.ReleaseResultSet(index_info) + return column_names -def generate_table_definitions(dataset: DataSource) -> TablesDefinition: + +def get_foreign_key_definitions(dataset, table_name) -> List[ForeignKeyDefinition]: + foreign_key_list = dataset.ExecuteSQL( + f'select id, seq, "table", "from", "to" from pragma_foreign_key_list(\'{table_name}\');' + ) + foreign_key_definitions: List[ForeignKeyDefinition] = [] + for foreign_key_listing in group_by(foreign_key_list, lambda r: r["id"]): + table: str = "" + columns: Dict[str, str] = {} + for column_reference in foreign_key_listing: + table = column_reference["table"] + to = column_reference["to"] + if to is None: + pk_index = get_pk_index(dataset, column_reference["table"]) + to = pk_index.columns[int(column_reference["seq"])] + columns[column_reference["from"]] = to + foreign_key_definitions.append( + ForeignKeyDefinition( + table=table, + columns=tuple( + ColumnMapping(src=c[0], dst=c[1]) for c in columns.items() + ), + ) + ) + foreign_key_definitions = sorted( + foreign_key_definitions, key=lambda fk: (fk.table, (c.src for c in fk.columns)) + ) + dataset.ReleaseResultSet(foreign_key_list) + return foreign_key_definitions + + +def generate_table_definitions( + dataset: DataSource, with_indexes_and_fks: bool = False +) -> TablesDefinition: projections = set() table_geometry_types = { table_name: geometry_type_name @@ -50,6 +131,7 @@ def generate_table_definitions(dataset: DataSource) -> TablesDefinition: table_list: List[TableDefinition] = [] for table in dataset: + table: Layer geo_column_name = table.GetGeometryColumn() if geo_column_name == "": continue @@ -59,11 +141,21 @@ def generate_table_definitions(dataset: DataSource) -> TablesDefinition: "name": geo_column_name, "type": table_geometry_types[table_name], } + columns = tuple(column_definitions(table, geometry_column)) + + indexes = None + foreign_keys = None + if with_indexes_and_fks: + indexes = tuple(get_index_definitions(dataset, table_name)) + foreign_keys = tuple(get_foreign_key_definitions(dataset, table_name)) + table_list.append( TableDefinition( name=table_name, geometry_column=geo_column_name, - columns=columns_definition(table, geometry_column), + columns=columns, + indexes=indexes, + foreign_keys=foreign_keys, ) ) @@ -74,16 +166,21 @@ def generate_table_definitions(dataset: DataSource) -> TablesDefinition: result = TablesDefinition( geopackage_validator_version=__version__, projection=int(projections.pop()), - tables=table_list, + tables=tuple(sorted(table_list, key=lambda t: t.name)), ) return result -def generate_definitions_for_path(gpkg_path: str) -> TablesDefinition: +def get_datasource_for_path(gpkg_path: str, error_handler=None) -> DataSource: """Starts the geopackage validation.""" utils.check_gdal_version() + return utils.open_dataset(gpkg_path, error_handler) - dataset = utils.open_dataset(gpkg_path) - return generate_table_definitions(dataset) +def generate_definitions_for_path( + gpkg_path: str, with_indexes_and_fks: bool = False +) -> TablesDefinition: + return generate_table_definitions( + get_datasource_for_path(gpkg_path), with_indexes_and_fks + ) diff --git a/geopackage_validator/models.py b/geopackage_validator/models.py index f3e3cbf..07d6edd 100644 --- a/geopackage_validator/models.py +++ b/geopackage_validator/models.py @@ -1,7 +1,8 @@ import copy -from typing import List, Optional +from typing import Optional, Tuple -from pydantic import BaseModel +from pydantic import BaseModel, Field, field_validator, ConfigDict +from semver import Version class Named(BaseModel): @@ -9,39 +10,86 @@ class Named(BaseModel): class ColumnDefinition(Named): + model_config = ConfigDict(frozen=True) + type: str +class IndexDefinition(BaseModel): + model_config = ConfigDict(frozen=True) + + columns: Tuple[str, ...] = Field(min_length=1) + unique: bool = False + + +class ColumnMapping(BaseModel): + model_config = ConfigDict(frozen=True) + + src: str + dst: str + + +class ForeignKeyDefinition(BaseModel): + model_config = ConfigDict(frozen=True) + + @field_validator("columns") + @classmethod + def unique_src_columns( + cls, v: Tuple[ColumnMapping, ...] + ) -> Tuple[ColumnMapping, ...]: + src_columns = set() + for c in v: + if c.src in src_columns: + raise ValueError(f"Duplicate src column detected: {c.src}") + src_columns.add(c.src) + return v + + table: str = Field(min_length=1) + columns: Tuple[ColumnMapping, ...] = Field(min_length=1) + + class TableDefinition(Named): + model_config = ConfigDict(frozen=True) + geometry_column: str = "geom" - columns: List[ColumnDefinition] = [] + columns: Tuple[ColumnDefinition, ...] = tuple() + """Ordered as in the table (left to right), but with FID and geometry columns always first. + (This order is not validated.)""" + indexes: Optional[Tuple[IndexDefinition, ...]] = None + """None means: don't validate. Empty list means: there should be no indexes.""" + foreign_keys: Optional[Tuple[ForeignKeyDefinition, ...]] = None + """None means: don't validate. Empty list means: there should be no foreign keys.""" class TablesDefinition(BaseModel): + model_config = ConfigDict(frozen=True) + geopackage_validator_version: str = "0" projection: Optional[int] - tables: List[TableDefinition] + tables: Tuple[TableDefinition, ...] + """Ordered by table name""" + + def with_indexes_and_fks(self) -> bool: + for table in self.tables: + if table.indexes is not None or table.foreign_keys is not None: + return True + return False -def migrate_tables_definition(old: dict) -> dict: +def migrate_tables_definition(original: dict) -> dict: """Migrate a possibly old tables definition to new schema/model""" - version = old.get("geopackage_validator_version", "0") - # older versions where not versioned (?), so assuming "0" if there is no version - version_tuple = tuple(int(v) for v in version.split(".")) - if version_tuple == (0, 0, 0, "-dev") or version_tuple > ( - 0, - 5, - 8, - ): # no changes after 0.5.8 - return old - new = copy.deepcopy(old) - if version_tuple <= ( - 0, - 5, - 8, - ): # until 0.5.8, column's "type" property was named "data_type" - for t in new.get("tables", []): + # older versions were not versioned (?), so assuming "0.0.0" if there is no version + version = Version.parse(original.get("geopackage_validator_version", "0.0.0")) + if version == Version(0, 0, 0, "dev"): + return original + # nothing changed after v0.5.8 + if version > Version(0, 5, 8): + return original + migrated = copy.deepcopy(original) + # until and including 0.5.8, column's "type" property was named "data_type" + if version <= Version(0, 5, 8): + for t in migrated.get("tables", []): for c in t.get("columns", []): c["type"] = c["data_type"] del c["data_type"] - return new + return migrated diff --git a/geopackage_validator/output.py b/geopackage_validator/output.py index aa2a515..79ceb1b 100644 --- a/geopackage_validator/output.py +++ b/geopackage_validator/output.py @@ -68,7 +68,7 @@ def print_output(python_object, as_yaml, yaml_indent=2): def print_output_pydantic(model: BaseModel, as_yaml: bool, yaml_indent=2): - content = model.model_dump_json(indent=4) + content = model.model_dump_json(indent=4, exclude_none=True) if as_yaml: python_object = yaml.safe_load(content) content = yaml.dump(python_object, indent=yaml_indent, sort_keys=False) diff --git a/geopackage_validator/utils.py b/geopackage_validator/utils.py index 403b392..b64132c 100644 --- a/geopackage_validator/utils.py +++ b/geopackage_validator/utils.py @@ -1,13 +1,14 @@ -import sys +import json import os +import sys import warnings from contextlib import contextmanager from functools import lru_cache - from pathlib import Path -import json +from typing import List, Tuple, Iterable, Callable import yaml +from osgeo.ogr import DataSource try: from osgeo import ogr, osr, gdal @@ -41,7 +42,7 @@ } -def open_dataset(filename=None, error_handler=None): +def open_dataset(filename: str = None, error_handler: Callable = None) -> DataSource: if error_handler is not None: gdal.UseExceptions() gdal.PushErrorHandler(error_handler) @@ -77,7 +78,7 @@ def check_gdal_version(): @lru_cache(None) -def dataset_geometry_tables(dataset): +def dataset_geometry_tables(dataset: ogr.DataSource) -> List[Tuple[str, str, str]]: """ Generate a list of geometry type names from the gpkg_geometry_columns table. """ @@ -110,3 +111,21 @@ def set_gdal_env(**kwargs): v = gdal_argument_mapping.get(v, v) if gdal_env_parameter not in os.environ: gdal.SetConfigOption(gdal_env_parameter, v) + + +def group_by( + iterable: Iterable[any], grouper: Callable[[any], any] +) -> Iterable[List[any]]: + first = True + current_id: any = None + group: List[any] = [] + for item in iterable: + group_id = grouper(item) + if not first and group_id != current_id: + yield group + group = [] + current_id = group_id + group.append(item) + first = False + if len(group) > 0: + yield group diff --git a/geopackage_validator/validations/table_definitions_check.py b/geopackage_validator/validations/table_definitions_check.py index 344a239..99aa0bd 100644 --- a/geopackage_validator/validations/table_definitions_check.py +++ b/geopackage_validator/validations/table_definitions_check.py @@ -1,5 +1,8 @@ from typing import Iterable, List, Dict, Set, Tuple +from osgeo.ogr import DataSource +from pydantic import BaseModel + from geopackage_validator.generate import generate_table_definitions from geopackage_validator.models import ( Named, @@ -11,7 +14,7 @@ def prepare_comparison( - new_: List[Named], old_: List[Named] + new_: Iterable[Named], old_: Iterable[Named] ) -> Tuple[Dict[str, Named], Dict[str, Named], str, str, Set[str]]: new_dict = {item.name: item for item in new_} old_dict = {item.name: item for item in old_} @@ -22,8 +25,8 @@ def prepare_comparison( def compare_column_definitions( - new_columns: List[ColumnDefinition], - old_columns: List[ColumnDefinition], + new_columns: Iterable[ColumnDefinition], + old_columns: Iterable[ColumnDefinition], table_name: str, ) -> List[str]: assert old_columns, f"table {table_name} in table definition misses columns" @@ -48,10 +51,37 @@ def compare_column_definitions( return result + wrong_types +def compare_object_lists( + current: Iterable[object], + expected: Iterable[object], + table_name: str, +) -> List[str]: + messages: List[str] = [] + added = set(current) + for o in expected: + if o in added: + added.remove(o) + else: + messages.append( + f"table {table_name} misses {o.__class__.__name__}: {o_repr_oneline(o)}" + ) + for o in added: + messages.append( + f"table {table_name} has extra {o.__class__.__name__}: {o_repr_oneline(o)}" + ) + return messages + + +def o_repr_oneline(o: object) -> str: + r = repr(o) if not isinstance(o, BaseModel) else o.model_dump_json(indent=0) + return r.replace("\n", " ") + + def compare_table_definitions( new_definition: TablesDefinition, old_definition: TablesDefinition, compare_columns=True, + compare_indexes_and_fks=True, ) -> List[str]: results = [] @@ -84,10 +114,43 @@ def compare_table_definitions( results += compare_column_definitions( new_table.columns, old_table.columns, table_name ) + if compare_indexes_and_fks: + if old_table.indexes is None: + results.append( + f"index checking is enabled but {table_name} misses the list" + ) + else: + results += compare_object_lists( + new_table.indexes, old_table.indexes, table_name + ) + if old_table.foreign_keys is None: + results.append( + f"foreign keys checking is enabled but {table_name} misses the list" + ) + else: + results += compare_object_lists( + new_table.foreign_keys, old_table.foreign_keys, table_name + ) return results +def get_foreign_key_violations(datasource: DataSource) -> List[str]: + # This used to be a per-table operation. But it's not due to + # a bug in sqlite: https://sqlite.org/forum/info/30cd7db3d0b2f12e + # used in github ubuntu 20-04: + # https://github.com/actions/runner-images/blob/main/images/ubuntu/Ubuntu2004-Readme.md#installed-apt-packages + messages: List[str] = [] + foreign_key_violations = datasource.ExecuteSQL( + f'select "table", rowid, parent, fkid from pragma_foreign_key_check();' + ) + for v in foreign_key_violations: + messages.append( + f"foreign key violation in {v['table']} for fk {v['fkid']} to {v['parent']} on row {v['rowid']}" + ) + return messages + + class TableDefinitionValidator(validator.Validator): """Geopackage must conform to given JSON definitions.""" @@ -96,19 +159,38 @@ class TableDefinitionValidator(validator.Validator): def __init__(self, dataset, **kwargs): super().__init__(dataset) - self.table_definitions = kwargs.get("table_definitions") + self.table_definitions: TablesDefinition = kwargs.get("table_definitions") def check(self) -> Iterable[str]: - current_definitions = generate_table_definitions(self.dataset) - return self.check_table_definitions(current_definitions) - - def check_table_definitions(self, definitions_current: TablesDefinition): - assert definitions_current is not None - if self.table_definitions is None: return ["Missing '--table-definitions-path' input"] + current_definitions = generate_table_definitions( + self.dataset, self.table_definitions.with_indexes_and_fks() + ) + return ( + self.check_table_definitions(current_definitions) + + self.check_foreign_keys() + ) + + def check_table_definitions( + self, definitions_current: TablesDefinition + ) -> List[str]: + assert definitions_current is not None + return compare_table_definitions( + definitions_current, + self.table_definitions, + compare_indexes_and_fks=self.table_definitions.with_indexes_and_fks(), + ) - return compare_table_definitions(definitions_current, self.table_definitions) + def check_foreign_keys(self) -> List[str]: + messages: List[str] = [] + if not self.table_definitions.with_indexes_and_fks(): + return messages + for table_definition in self.table_definitions.tables: + if table_definition.foreign_keys is None: + messages += f"foreign keys checking is enabled but {table_definition.name} misses the list" + messages += get_foreign_key_violations(self.dataset) + return messages class TableDefinitionValidatorV0(validator.Validator): @@ -132,5 +214,8 @@ def check_table_definitions(self, definitions_current: TablesDefinition): return ["Missing '--table-definitions-path' input"] return compare_table_definitions( - definitions_current, self.table_definitions, compare_columns=False + definitions_current, + self.table_definitions, + compare_columns=False, + compare_indexes_and_fks=False, ) diff --git a/geopackage_validator/validations/validator.py b/geopackage_validator/validations/validator.py index 854bea5..2df7c38 100644 --- a/geopackage_validator/validations/validator.py +++ b/geopackage_validator/validations/validator.py @@ -2,6 +2,8 @@ from abc import ABC, abstractmethod from enum import IntEnum +from osgeo.ogr import DataSource + class ValidationLevel(IntEnum): UNKNOWN_ERROR = 0 @@ -58,7 +60,7 @@ class Validator(ABC): message: str def __init__(self, dataset, **kwargs): - self.dataset = dataset + self.dataset: DataSource = dataset def validate(self) -> Dict[str, List[str]]: """Run validation at geopackage.""" diff --git a/pyproject.toml b/pyproject.toml index f81d430..364bb18 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ dependencies = [ "minio", "pydantic >= 2.9", "pyyaml", + "semver >= 3.0.2", ] requires-python = ">=3.6" diff --git a/tests/data/test_allcorrect_with_indexes_and_fks.gpkg b/tests/data/test_allcorrect_with_indexes_and_fks.gpkg new file mode 100644 index 0000000..6899d96 Binary files /dev/null and b/tests/data/test_allcorrect_with_indexes_and_fks.gpkg differ diff --git a/tests/data/test_allcorrect_with_indexes_and_fks_definition.yml b/tests/data/test_allcorrect_with_indexes_and_fks_definition.yml new file mode 100644 index 0000000..0dac533 --- /dev/null +++ b/tests/data/test_allcorrect_with_indexes_and_fks_definition.yml @@ -0,0 +1,87 @@ +geopackage_validator_version: 0.0.0-dev +projection: 28992 +tables: +- name: test_allcorrect + geometry_column: geom + columns: + - name: fid + type: INTEGER + - name: geom + type: POLYGON + - name: foreign_id + type: INTEGER64 + indexes: + - columns: + - fid + unique: true + foreign_keys: + - table: test_foreign + columns: + - src: foreign_id + dst: id +- name: test_foreign + geometry_column: geom + columns: + - name: id + type: INTEGER + - name: geom + type: POINT + - name: name + type: STRING + - name: x + type: INTEGER64 + - name: y + type: INTEGER64 + indexes: + - columns: + - id + unique: true + - columns: + - name + unique: true + - columns: + - x + - y + unique: false + foreign_keys: [] +- name: test_multi_fk + geometry_column: geom + columns: + - name: geom + type: POINT + - name: allcorrect_id + type: INTEGER64 + - name: other_id + type: INTEGER64 + - name: other_name + type: STRING + indexes: [] + foreign_keys: + - table: test_allcorrect + columns: + - src: allcorrect_id + dst: fid + - table: test_other + columns: + - src: other_id + dst: id + - src: other_name + dst: name +- name: test_other + geometry_column: geom + columns: + - name: id + type: INTEGER + - name: geom + type: POINT + - name: name + type: STRING + indexes: + - columns: + - id + unique: true + - columns: + - id + - name + unique: true + foreign_keys: [] diff --git a/tests/data/test_changed_indexes_and_fks_definition.yml b/tests/data/test_changed_indexes_and_fks_definition.yml new file mode 100644 index 0000000..ac17ee7 --- /dev/null +++ b/tests/data/test_changed_indexes_and_fks_definition.yml @@ -0,0 +1,87 @@ +geopackage_validator_version: 0.0.0-dev +projection: 28992 +tables: # purposely shuffled. order is not validated +- name: test_allcorrect + geometry_column: geom + columns: # purposely shuffled. order is not validated + - name: geom + type: POLYGON + - name: foreign_id + type: INTEGER64 + - name: fid + type: INTEGER + indexes: + - columns: + - foo # fid + unique: true + foreign_keys: + - table: unexisting # test_foreign + columns: + - src: foreign_id + dst: id +- name: test_foreign + geometry_column: geom + columns: + - name: id + type: INTEGER + - name: geom + type: POINT + - name: name + type: STRING + - name: x + type: INTEGER64 + - name: y + type: INTEGER64 + indexes: + - columns: + - id + unique: true + - columns: + - name + unique: false # true + - columns: + - x + - y + unique: true # false + foreign_keys: [] +- name: test_other + geometry_column: geom + columns: + - name: id + type: INTEGER + - name: geom + type: POINT + - name: name + type: STRING + indexes: # purposely shuffled. order is not validated + - columns: + - id + - name + unique: true + - columns: + - id + unique: true + foreign_keys: [] +- name: test_multi_fk + geometry_column: geom + columns: + - name: geom + type: POINT + - name: allcorrect_id + type: INTEGER64 + - name: other_id + type: INTEGER64 + - name: other_name + type: STRING + indexes: [] + foreign_keys: +# - table: test_allcorrect +# columns: +# - src: allcorrect_id +# dst: fid + - table: test_other + columns: + - src: other_id + dst: id + - src: other_name + dst: name diff --git a/tests/data/test_foreign_key_violation.gpkg b/tests/data/test_foreign_key_violation.gpkg new file mode 100644 index 0000000..f8777f2 Binary files /dev/null and b/tests/data/test_foreign_key_violation.gpkg differ diff --git a/tests/test_cli.py b/tests/test_cli.py index 97cd58a..d176548 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,5 +1,6 @@ import json +import pytest from click.testing import CliRunner from geopackage_validator import __version__ @@ -58,6 +59,130 @@ def test_generate_definitions_with_gpkg(): assert json.loads(result.output) == expected +@pytest.mark.parametrize( + "gpkg_path, expected", + [ + ( + "tests/data/test_allcorrect.gpkg", + { + "geopackage_validator_version": __version__, + "projection": 28992, + "tables": [ + { + "name": "test_allcorrect", + "geometry_column": "geom", + "columns": [ + {"name": "fid", "type": "INTEGER"}, + {"name": "geom", "type": "POLYGON"}, + ], + "indexes": [{"columns": ["fid"], "unique": True}], + "foreign_keys": [], + } + ], + }, + ), + ( + "tests/data/test_allcorrect_with_indexes_and_fks.gpkg", + { + "geopackage_validator_version": "0.0.0-dev", + "projection": 28992, + "tables": [ + { + "name": "test_allcorrect", + "geometry_column": "geom", + "columns": [ + {"name": "fid", "type": "INTEGER"}, # fid + {"name": "geom", "type": "POLYGON"}, # geom + {"name": "foreign_id", "type": "INTEGER64"}, + ], + "indexes": [ + {"columns": ["fid"], "unique": True}, # PK + ], + "foreign_keys": [ + { + "table": "test_foreign", + "columns": [{"src": "foreign_id", "dst": "id"}], + } + ], + }, + { + "name": "test_foreign", + "geometry_column": "geom", + "columns": [ + {"name": "id", "type": "INTEGER"}, # fid + {"name": "geom", "type": "POINT"}, # geom + {"name": "name", "type": "STRING"}, + {"name": "x", "type": "INTEGER64"}, + {"name": "y", "type": "INTEGER64"}, + ], + "indexes": [ + {"columns": ["id"], "unique": True}, # PK + {"columns": ["name"], "unique": True}, # n comes before x + {"columns": ["x", "y"], "unique": False}, + ], + "foreign_keys": [], + }, + { + "name": "test_multi_fk", + "geometry_column": "geom", + "columns": [ + {"name": "geom", "type": "POINT"}, + {"name": "allcorrect_id", "type": "INTEGER64"}, + {"name": "other_id", "type": "INTEGER64"}, + {"name": "other_name", "type": "STRING"}, + ], + "indexes": [], + "foreign_keys": [ + { + "table": "test_allcorrect", + "columns": [{"src": "allcorrect_id", "dst": "fid"}], + }, + { + "table": "test_other", + "columns": [ + {"src": "other_id", "dst": "id"}, + {"src": "other_name", "dst": "name"}, + ], + }, + ], + }, + { + "name": "test_other", + "geometry_column": "geom", + "columns": [ + {"name": "id", "type": "INTEGER"}, # fid + {"name": "geom", "type": "POINT"}, # geom + {"name": "name", "type": "STRING"}, + ], + "foreign_keys": [], + "indexes": [ + {"columns": ["id"], "unique": True}, # PK + {"columns": ["id", "name"], "unique": True}, + ], + }, + ], + }, + ), + ], +) +def test_generate_definitions_with_indexes_and_fks(gpkg_path: str, expected: dict): + runner = CliRunner() + result = runner.invoke( + cli, + [ + "generate-definitions", + "--gpkg-path", + gpkg_path, + "--with-indexes-and-fks", + ], + ) + + if result.exit_code != 0: + print(result.output) + assert result.exit_code == 0 + assert json.loads(result.output) == expected + + def test_generate_definitions_with_ndimension_geometries(): runner = CliRunner() result = runner.invoke( @@ -84,7 +209,7 @@ def test_generate_definitions_with_ndimension_geometries(): ], }, { - "name": "test_dimensions4", + "name": "test_dimensions3_correct", "geometry_column": "geom", "columns": [ {"name": "fid", "type": "INTEGER"}, @@ -92,7 +217,7 @@ def test_generate_definitions_with_ndimension_geometries(): ], }, { - "name": "test_dimensions4_correct", + "name": "test_dimensions4", "geometry_column": "geom", "columns": [ {"name": "fid", "type": "INTEGER"}, @@ -100,7 +225,7 @@ def test_generate_definitions_with_ndimension_geometries(): ], }, { - "name": "test_dimensions3_correct", + "name": "test_dimensions4_correct", "geometry_column": "geom", "columns": [ {"name": "fid", "type": "INTEGER"}, diff --git a/tests/validations/test_table_definitions_check.py b/tests/validations/test_table_definitions_check.py index d5538bb..0368658 100644 --- a/tests/validations/test_table_definitions_check.py +++ b/tests/validations/test_table_definitions_check.py @@ -1,5 +1,6 @@ from geopackage_validator.generate import ( generate_definitions_for_path, + get_datasource_for_path, ) from geopackage_validator.models import TablesDefinition from geopackage_validator.validate import load_table_definitions @@ -9,19 +10,6 @@ ) -def test_table_definitions_input_is_none(): - current_definitions = generate_definitions_for_path( - "tests/data/test_allcorrect.gpkg" - ) - - diff = TableDefinitionValidator( - None, table_definitions=None - ).check_table_definitions(current_definitions) - - assert len(diff) == 1 - assert diff[0] == "Missing '--table-definitions-path' input" - - def test_table_definitions_check_correct(): current_definitions = generate_definitions_for_path( "tests/data/test_allcorrect.gpkg" @@ -35,7 +23,7 @@ def test_table_definitions_check_correct(): None, table_definitions=table_definitions ).check_table_definitions(current_definitions) - assert len(diff) == 0 + assert diff == [] def test_table_definitions_check_incorrect_geometry(): @@ -178,3 +166,51 @@ def test_legacy_table_definitions_check_table_changed(): "missing table(s): test_allcorrect", "extra table(s): test_different_table", ] + + +def gdal_error_handler_print_err(err_level, err_no, err_msg): + print(f"GDAL error: err_level: {err_level}\nerr_no: {err_no}\nerr_msg: {err_msg}") + + +def test_table_definitions_check_changed_indexes_and_fks(): + datasource = get_datasource_for_path( + "tests/data/test_allcorrect_with_indexes_and_fks.gpkg", + gdal_error_handler_print_err, + ) + table_definitions = load_table_definitions( + "tests/data/test_changed_indexes_and_fks_definition.yml" + ) + + diff = TableDefinitionValidator( + dataset=datasource, table_definitions=table_definitions + ).check() + + assert set(diff) == { + 'table test_allcorrect has extra ForeignKeyDefinition: { "table": "test_foreign", "columns": [ { "src": "foreign_id", "dst": "id" } ] }', + 'table test_allcorrect has extra IndexDefinition: { "columns": [ "fid" ], "unique": true }', + 'table test_allcorrect misses ForeignKeyDefinition: { "table": "unexisting", "columns": [ { "src": "foreign_id", "dst": "id" } ] }', + 'table test_allcorrect misses IndexDefinition: { "columns": [ "foo" ], "unique": true }', + 'table test_foreign has extra IndexDefinition: { "columns": [ "name" ], "unique": true }', + 'table test_foreign has extra IndexDefinition: { "columns": [ "x", "y" ], "unique": false }', + 'table test_foreign misses IndexDefinition: { "columns": [ "name" ], "unique": false }', + 'table test_foreign misses IndexDefinition: { "columns": [ "x", "y" ], "unique": true }', + 'table test_multi_fk has extra ForeignKeyDefinition: { "table": "test_allcorrect", "columns": [ { "src": "allcorrect_id", "dst": "fid" } ] }', + } + + +def test_table_definitions_check_fk_violation(): + datasource = get_datasource_for_path( + "tests/data/test_foreign_key_violation.gpkg", gdal_error_handler_print_err + ) + table_definitions = load_table_definitions( + "tests/data/test_allcorrect_with_indexes_and_fks_definition.yml" + ) + + diff = TableDefinitionValidator( + dataset=datasource, table_definitions=table_definitions + ).check() + + assert set(diff) == { + "foreign key violation in test_multi_fk for fk 0 to test_other on row 2", + "foreign key violation in test_multi_fk for fk 1 to test_allcorrect on row 2", + }