diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index e3be2de..fba52ef 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -42,7 +42,7 @@ jobs: if [ "${{ matrix.os }}" = "ubuntu-20.04" ] then echo adding ubuntugis unstable - sudo apt-add-repository ppa:ubuntugis/ubuntugis-unstable + sudo apt-add-repository ppa:ubuntugis/ubuntugis-unstable else echo adding ubuntugis stable sudo apt-add-repository ppa:ubuntugis/ppa @@ -70,4 +70,4 @@ jobs: pytest -vv - name: Test with black run: | - black --check . + black --check --diff . diff --git a/README.md b/README.md index d111626..19d0847 100644 --- a/README.md +++ b/README.md @@ -112,7 +112,7 @@ The current checks are (see also the 'show-validations' command): | RQ21 | All layer and column names shall not be longer than 57 characters. | | RQ22 | Only the following EPSG spatial reference systems are allowed: 28992, 3034, 3035, 3040, 3041, 3042, 3043, 3044, 3045, 3046, 3047, 3048, 3049, 3857, 4258, 4326, 4936, 4937, 5730, 7409. | | RQ23 | Geometry should be valid and simple. | -| RQ24 | Geometry should not be empty (e.g. 'POINT EMPTY', represented as 'POINT(NaN NaN)'). | +| RQ24 | Geometry should not be null or empty (e.g. 'POINT EMPTY', WKT 'POINT(NaN NaN)'). | | RC17 | It is recommended to name all GEOMETRY type columns 'geom'. | | RC18 | It is recommended to give all GEOMETRY type columns the same name. | | RC19 | It is recommended to only use multidimensional geometry coordinates (elevation and measurement) when necessary. | diff --git a/geopackage_validator/validations/geometry_empty_check.py b/geopackage_validator/validations/geometry_empty_check.py index 8457730..cb9f653 100644 --- a/geopackage_validator/validations/geometry_empty_check.py +++ b/geopackage_validator/validations/geometry_empty_check.py @@ -2,32 +2,41 @@ from geopackage_validator.validations import validator from geopackage_validator import utils -SQL_EMPTY_TEMPLATE = """SELECT count(row_id) AS count, row_id +SQL_EMPTY_TEMPLATE = """SELECT type, count(type) AS count, row_id FROM( SELECT + CASE + WHEN ST_IsEmpty("{column_name}") = 1 + THEN 'empty' + WHEN "{column_name}" IS NULL + THEN 'null' + END AS type, cast(rowid AS INTEGER) AS row_id - FROM "{table_name}" WHERE ST_IsEmpty("{column_name}") = 1 -);""" + FROM "{table_name}" WHERE ST_IsEmpty("{column_name}") = 1 OR "{column_name}" IS NULL +) +GROUP BY type;""" -def query_geometry_empty(dataset, sql_template) -> Iterable[Tuple[str, str, int, int]]: +def query_geometry_empty( + dataset, sql_template +) -> Iterable[Tuple[str, str, str, int, int]]: columns = utils.dataset_geometry_tables(dataset) for table_name, column_name, _ in columns: validations = dataset.ExecuteSQL( sql_template.format(table_name=table_name, column_name=column_name) ) - for count, row_id in validations: - yield table_name, column_name, count, row_id + for type, count, row_id in validations: + yield table_name, column_name, type, count, row_id dataset.ReleaseResultSet(validations) class EmptyGeometryValidator(validator.Validator): - """Geometries should not be empty.""" + """Geometries should not be null or empty.""" code = 24 level = validator.ValidationLevel.ERROR - message = "Found empty geometry in table: {table_name}, column {column_name}, {count} {count_label}, example id {row_id}" + message = "Found {type} geometry in table: {table_name}, column {column_name}, {count} {count_label}, example id {row_id}" def check(self) -> Iterable[str]: result = query_geometry_empty(self.dataset, SQL_EMPTY_TEMPLATE) @@ -36,10 +45,11 @@ def check(self) -> Iterable[str]: self.message.format( table_name=table_name, column_name=column_name, + type=type, count=count, count_label=("time" if count == 1 else "times"), row_id=row_id, ) - for table_name, column_name, count, row_id in result + for table_name, column_name, type, count, row_id in result if count > 0 ] diff --git a/tests/data/test_geometry_empty.gpkg b/tests/data/test_geometry_empty.gpkg index 46a35b2..7d1d22b 100755 Binary files a/tests/data/test_geometry_empty.gpkg and b/tests/data/test_geometry_empty.gpkg differ diff --git a/tests/data/test_geometry_null.gpkg b/tests/data/test_geometry_null.gpkg new file mode 100644 index 0000000..2c93c58 Binary files /dev/null and b/tests/data/test_geometry_null.gpkg differ diff --git a/tests/validations/test_geometry_empty_check.py b/tests/validations/test_geometry_empty_check.py index 8dae640..b248e95 100644 --- a/tests/validations/test_geometry_empty_check.py +++ b/tests/validations/test_geometry_empty_check.py @@ -10,7 +10,17 @@ def test_with_gpkg_empty(): assert len(result) == 1 assert ( result[0] - == "Found empty geometry in table: stations, column geom, 45 times, example id 129" + == "Found empty geometry in table: test_geometry_empty, column geom, 45 times, example id 129" + ) + + +def test_with_gpkg_null(): + dataset = open_dataset("tests/data/test_geometry_null.gpkg") + result = list(EmptyGeometryValidator(dataset).check()) + assert len(result) == 1 + assert ( + result[0] + == "Found null geometry in table: test_geometry_null, column geometry, 2 times, example id 1" ) diff --git a/tests/validations/test_geometry_valid_check.py b/tests/validations/test_geometry_valid_check.py index 30299f1..3213eb2 100644 --- a/tests/validations/test_geometry_valid_check.py +++ b/tests/validations/test_geometry_valid_check.py @@ -46,6 +46,13 @@ def test_with_gpkg_empty(): assert len(checks) == 0 +def test_with_gpkg_null(): + # geometries that are null slip through + dataset = open_dataset("tests/data/test_geometry_null.gpkg") + checks = list(query_geometry_valid(dataset, SQL_VALID_TEMPLATE)) + assert len(checks) == 0 + + def test_with_gpkg_allcorrect(): dataset = open_dataset("tests/data/test_allcorrect.gpkg") checks = list(query_geometry_valid(dataset, SQL_VALID_TEMPLATE))