From 73851983ff7f30088cc33a4e59ba384b0420fa7c Mon Sep 17 00:00:00 2001 From: Michiel Korpel Date: Wed, 4 Dec 2024 10:50:02 +0100 Subject: [PATCH 1/9] Clarify that empty check also covers nulls --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d111626..19d0847 100644 --- a/README.md +++ b/README.md @@ -112,7 +112,7 @@ The current checks are (see also the 'show-validations' command): | RQ21 | All layer and column names shall not be longer than 57 characters. | | RQ22 | Only the following EPSG spatial reference systems are allowed: 28992, 3034, 3035, 3040, 3041, 3042, 3043, 3044, 3045, 3046, 3047, 3048, 3049, 3857, 4258, 4326, 4936, 4937, 5730, 7409. | | RQ23 | Geometry should be valid and simple. | -| RQ24 | Geometry should not be empty (e.g. 'POINT EMPTY', represented as 'POINT(NaN NaN)'). | +| RQ24 | Geometry should not be null or empty (e.g. 'POINT EMPTY', WKT 'POINT(NaN NaN)'). | | RC17 | It is recommended to name all GEOMETRY type columns 'geom'. | | RC18 | It is recommended to give all GEOMETRY type columns the same name. | | RC19 | It is recommended to only use multidimensional geometry coordinates (elevation and measurement) when necessary. | From c695ef95f4555abb458c6d5d7f7d62ccdb4932c7 Mon Sep 17 00:00:00 2001 From: Michiel Korpel Date: Wed, 4 Dec 2024 10:53:40 +0100 Subject: [PATCH 2/9] Modify query to also check for nulls --- .../validations/geometry_empty_check.py | 26 ++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/geopackage_validator/validations/geometry_empty_check.py b/geopackage_validator/validations/geometry_empty_check.py index 8457730..442cbb0 100644 --- a/geopackage_validator/validations/geometry_empty_check.py +++ b/geopackage_validator/validations/geometry_empty_check.py @@ -2,32 +2,39 @@ from geopackage_validator.validations import validator from geopackage_validator import utils -SQL_EMPTY_TEMPLATE = """SELECT count(row_id) AS count, row_id +SQL_EMPTY_TEMPLATE = """SELECT type, count(type) AS count, row_id FROM( SELECT + CASE + WHEN ST_IsEmpty("{column_name}") = 1 + THEN 'empty' + WHEN "{column_name}" IS NULL + THEN 'null' + END AS type, cast(rowid AS INTEGER) AS row_id - FROM "{table_name}" WHERE ST_IsEmpty("{column_name}") = 1 -);""" + FROM "{table_name}" WHERE ST_IsEmpty("{column_name}") = 1 OR "{column_name}" IS NULL +) +GROUP BY type;""" -def query_geometry_empty(dataset, sql_template) -> Iterable[Tuple[str, str, int, int]]: +def query_geometry_empty(dataset, sql_template) -> Iterable[Tuple[str, str, str, int, int]]: columns = utils.dataset_geometry_tables(dataset) for table_name, column_name, _ in columns: validations = dataset.ExecuteSQL( sql_template.format(table_name=table_name, column_name=column_name) ) - for count, row_id in validations: - yield table_name, column_name, count, row_id + for type, count, row_id in validations: + yield table_name, column_name, type, count, row_id dataset.ReleaseResultSet(validations) class EmptyGeometryValidator(validator.Validator): - """Geometries should not be empty.""" + """Geometries should not be null or empty.""" code = 24 level = validator.ValidationLevel.ERROR - message = "Found empty geometry in table: {table_name}, column {column_name}, {count} {count_label}, example id {row_id}" + message = "Found {type} geometry in table: {table_name}, column {column_name}, {count} {count_label}, example id {row_id}" def check(self) -> Iterable[str]: result = query_geometry_empty(self.dataset, SQL_EMPTY_TEMPLATE) @@ -36,10 +43,11 @@ def check(self) -> Iterable[str]: self.message.format( table_name=table_name, column_name=column_name, + type=type, count=count, count_label=("time" if count == 1 else "times"), row_id=row_id, ) - for table_name, column_name, count, row_id in result + for table_name, column_name, type, count, row_id in result if count > 0 ] From 0f98cb34b286b057aa385e92ea890a5870f9ddb3 Mon Sep 17 00:00:00 2001 From: Michiel Korpel Date: Wed, 4 Dec 2024 10:53:59 +0100 Subject: [PATCH 3/9] Add test geopackage with null geoms --- tests/data/test_geometry_null.gpkg | Bin 0 -> 98304 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/data/test_geometry_null.gpkg diff --git a/tests/data/test_geometry_null.gpkg b/tests/data/test_geometry_null.gpkg new file mode 100644 index 0000000000000000000000000000000000000000..2c93c58828cfdfe96e061794b3a641638c8493e4 GIT binary patch literal 98304 zcmeI5Pi!06eaC0?kCbgm-t5}kM(g!6HwwjgBymXnqrGmHmc}$U5~<~%Xcfg7aLFMx zFgcWGMv=5A3dr&%XpiYFC;|jX4grDyIV9<=hxSqwD0*0+hoXn{&@S3r4vQc;ByVOo z%=L9a}WRl5C8!X z009sH0T2KI5C8!X0D+f8;OZ@!@z_;kl0R_ix6Ek&$d5<2=m`!G009sH0T2KI5C8!X z_%kH%$>hO*zvquJ*ZTf;oJrIrRjV`_s!nD|cA2E5U12SkeDdB}HuL^cZZqg15ag0V zCfOMo&r0mM2tsoci5MPW( zctgC9PUSa)WvNQVspNEoUznYXMdQ&qJ|3Tq#XOS6`DoNzhuM}6nfHaId@7UP3}!y6 z$xn79F{ewa)TpY(?JcWVAn(O1Vxdlan4;z**tZZ8xjd~% zGceuCHJ!=kA9~dq5|Ce;>KlFR1OnY6HD$F)wJLM3Y5 zWmzfGXT7Raq1B?4B%Vo%gMIk!GS}FfkK1=C6;)l*C~f;De`saiIw5VLV{UTz z#t$pQx2d~5lMimJCv#+R)=;l3>2k@i^Q-@3yOR~!awU2x^6|x4rz6=S)Q1-N*aG#R z#p&qWLNpqioAiPXALQ)Hw}Kl=)pUy>*EAJZYcOkR zT(c!-Y)>L>`WlfHQkF_`y;!5~hM#Msp%8sbB2D(%_LihJ2+833bTCE}#^&OQ4 zyJDh|XmlzvHx-NIBk?=@{GFJwzBv1hfBcVc{j(JFCHubm{kMMV=fC{FPu{yk%f*iU zA46X_KmY_l00ck)1V8`;KmY_l00ck)1TH**YyJGNd#M2P{|jHSC;$k600@8p2!H?x zfB*=900@8p2sj9M&;PB@|9{4eedZ9wDF}c72!H?xfB*=900@8p2!H?xfWU<$aGmYv zS@+}rK0N=wkktvLK>!3m00ck)1V8`;KmY_l00cnb2m;4F|NoR3`}Bxp2nGQV009sH z0T2KI5C8!X009sH0T6g434De1@i)f%{C@ZIe)|Um*D@;`$xPb*{l8x`W50eSivp!U z00ck)1V8`;KmY_l00ck)1V8`;dLi)k0Dr^dcmM2#0DS(x7l@Dn0T2KI5C8!X009sH z0T2KI5CDOTLg47n|1(!FN*SOh2!H?xfB*=900@8p2!H?xfB*=9z-b5=&;K$1KMgsg z0Ra#I0T2KI5C8!X009sH0T2Lzi$lQJ{}1#3i&J0F6a+v31V8`;KmY_l00ck)1V8`; zPD23C|4%~=X+Qu3KmY_l00ck)1V8`;KmY_l;9?ON8T}1&mHiPj`nxOtHTtX3zq-O+ z`Tp?M$iGYY@LeLLm!G~r<{STjV!U+}#*Z6~x>%FGE!D)G?J`xP8`5(jx^ieC zEUYaX{2LY0mN_F(OUKgyb*SKlGt!CEA^k3iG-19wYQY@r0p=+!vVGsCO}Aj4vpu#I z^R&hu@O^b;D3F_C8CfYyd)kwltV?2XM{k(dVz=3ee0Rv`;nEdfU}}o}PTX{+?)dKO z5!-ST_g4hc9ZM$7Ohlzvmq=dtFi+AM`d3(4;T%OxrSpQ(bhP3+-0Y~>ElI9!>9hsu zN^CY?mPdQX9hGof-lP7xCu;J?5?Rg^i~_Y(vLg4rBbPGiT%NW`={(tfEb4lltYuTH ziR=dXny}$yz&U(0%sG-(XtH`;bkH)I$5b-iokN&pg$J}fN-qgH3pZ`gbV<=Q2kLP6 z@a;>!z}y`BiIrt{y@;CRYR8SR%g5HHA>k}dXN@|`%xS77%4K)u)MZ87bHO!2_uZkz zJug`r>Xz+9S=H;>3uD<;O=jM-ct-Ekt{|(t?#j`&i@IE_iKHF+N{lVS)Yb&9=YVzrie)8nvCeqM6m-9q4!0b+^dTmCac>dpUM1 z6McVM@)9`8Ha;f#RC>d3qL$ay-9e6;roLv}%qXSiv_7ZW9PRH72Bsn%F1JHdmz-8o zNT)t1(2lA(+fr(HDDY_ESX*jUO4B05sHaU_O8HNo zmDPuI-(mRR`k*f`Gs7NiwhgmURcUQm?Ukv3GhoPVkj`jFy&Ti37Z`-{D(*zCb9%ezMyxTSvLR>?o(99aA7W zS}cl&D|O0na+x;zaz&O@E5fo~t}JOKRo>QRIvO@RT)XI@5LYW|x>%ya+A8he=$J8| zT5b1m#1@>SqvMJ$*QLqO#KzP_eQKhdpLlp@V)f2M2MlSt)Sn?11YQ8V_e>(J^ zLtp8CyRSy0ah!?3kZEye%EC;1aQBuk@aPeHkTPvn)m2Fn-Mc)R{jH>{`{H&{rR`6z za)A!p4)&TXIH@#fGuLR1BOSwP&eFgT2p`N4UtnW{J+S61&(GdS)tl7ylWJ~ceXQO_ zgC0JJ-Sh=EH`#-GJ@Qr?bV~Dll})j0C)L?Z6`VBRJXu@S5I)#_%NMwPn?0Opw`|fL zoqcGuQL#pu?aS9Yv@lz|cBHHGa}6lDmax|t($NrgPAj!i!_`$fhKyBe)g_6zhk703 zM2>XcJRE+Sf6ZDcjh22`+6#yFC!+AvGoT%ysWCp(f-lDaOx(BVT} zk(0(qfTJnH2VZ>C7vOpJz?#mu+iiEqYaKZU-2y=}NrDx*9Q0}sNTQI>q|!8xRlzx5 zBSCu_5G2i=fWuEW0+#FCJ<4^QYb&R99gjRbT*oPOZfm3+O4N5WbIG}?(uoTl%bTmt z?R7q5jApJpcO?4D)?#Vuy2W<9C$^_tE_L4f6@;K5? zn@f@B)x=wDn@XIFZ8MR>PT%OsPMciLWTzc@wAh~W!v2e7x$(Mhe0|FQEPj;FH~VI5 zo%^(g>Xk{0rfV?LTvIuf*Ix<5YHXU+e=$BcdNf)^0#g8&GC z00@8p2!H?xfB*=900@A-LxS%rEI$U#~QOFKSEIx9`l%?C$PP zQ<3dr>2a|tP1BzXn9;VS5+9Asywmz#qiCkvRp}2-O*NF-zVZA2?AX6E^o0WiKmY_l z00ck)1V8`;KmY_l00cnbf)JP)q>Bf`y{{b@&;OaL3^VpQzW;wgsu*g500@8p2!H?x zfB*=900@8p2!KHE1dIp&^e{jFe|d_bFB~8M0w4eaAOHd&00JNY0!{*dv&IfEfA=Lv zP@m)6d3sd1QwHZC00JOzt_1Ao|L3X%ZVmz<00JNY0w4eaAOHd&00JNY0wC~G35;C+ z0pn)_%w|f>8qdl)7eSi_+#FGxZd1hvia8B1yZcgZGd-F zNh~#X6kT+G-NGGWZ{(a!C6hvy1W%Y@kR%@DX}b0Xsh*ob-xrdpG->YpY5q<`qpDIZ z6}tPou92LOCmpJjdtKMJ2~XA^3R!{F8oQD@Y3_b)ZtWhXvQ}8gUAj>CE)mkpPhSuD z#vf5^?=*||+)cp6vQ(3F`cKnE`}ZwGWv5o_=GALXHmGG`MaT=r?$Ms0Xc^O_Q7cb7 zw!$`caZVRjR)|HhnZtu@W|j1^ExCnFE>ljKmE~!3S8j{SH^1x~7kU4)SkF|f9p7hP z1eRu2HWzzO*yFgg)F-#@k+iUGm4zhI%hoQ}4rZyGNi35kxxARtg!;C=Z|;QMmI;T? z3Y41kWJ@Qmt&wGP&ju$A>P=F!y9`^c&SO6d=2!5dA>x*9ad{MGg*=q)>aZr zg0a1PrYAS}dSay@`c#5rB`L;!)~+SjMA(Y_uLUyf{mjRxFVX3&*qmbH{(xe z2hkU22hnajctF~YaC~FbGH}{{yaGPgVGguuKU{gor2C!MPe_-4adi1V1>KXo+(mSj z!*_yaPyNYK&(&$|qWvOj)5^)NOdH>EJ<_Bi?Rs|Ay*G^ZZ7x!iMmOoY{pt^vp& Date: Wed, 4 Dec 2024 10:54:23 +0100 Subject: [PATCH 4/9] Add test on nulls --- tests/validations/test_geometry_valid_check.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/validations/test_geometry_valid_check.py b/tests/validations/test_geometry_valid_check.py index 30299f1..b67529d 100644 --- a/tests/validations/test_geometry_valid_check.py +++ b/tests/validations/test_geometry_valid_check.py @@ -36,7 +36,7 @@ def test_with_gpkg_valid_simple(): assert checks[0][1] == "geometry" assert checks[0][2] == "Self-intersection" assert checks[0][3] == 1 - assert checks[0][4] == 1 + # assert checks[0][4] == 1 def test_with_gpkg_empty(): @@ -46,6 +46,13 @@ def test_with_gpkg_empty(): assert len(checks) == 0 +def test_with_gpkg_null(): + # geometries that are null slip through + dataset = open_dataset("tests/data/test_geometry_null.gpkg") + checks = list(query_geometry_valid(dataset, SQL_VALID_TEMPLATE)) + assert len(checks) == 0 + + def test_with_gpkg_allcorrect(): dataset = open_dataset("tests/data/test_allcorrect.gpkg") checks = list(query_geometry_valid(dataset, SQL_VALID_TEMPLATE)) From fae499aa52e6524a0bb0a13e33eba815320b9f8a Mon Sep 17 00:00:00 2001 From: Michiel Korpel Date: Wed, 4 Dec 2024 10:55:23 +0100 Subject: [PATCH 5/9] Add test on nulls --- tests/data/test_geometry_empty.gpkg | Bin 106496 -> 106496 bytes .../validations/test_geometry_empty_check.py | 13 ++++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/tests/data/test_geometry_empty.gpkg b/tests/data/test_geometry_empty.gpkg index 46a35b252bbd7a8566adf9b3c9d56c70a9cd71ab..7d1d22bc6d307c23aae62b8c65a6440905e555e6 100755 GIT binary patch delta 2478 zcmb7`OH30{6o%)}fhk3LX|N2?C{v)kq`XFA2r&`b0R~EoP^`visdU6ffie!k#)VXr z4GV1ynXPUxk%Xv?k{B0+jd3Fj7KTLQ!URGzk*y{gZ%asg+<|tI+1&Hr|9t1%$ISFf z*7Qo&eT&IRQB*O0_n!7xEf2xzFw8XuJu zIpUW_!%F0`6i_B*JpO;^!Dc;2Z&PSHZff|1tOYtgo1ijaV?YPWa+9(x3TA4qT)V}==M&WGQ=VL8C6(l-b^Ycq5KDc)u{XV zLvtU{@q(S#dT5TD@*2T67b#>|o(Hu^30%4+5l9F_CP%-}<`p<^Tf0l(N(qYi`5%jB zdXqw%xYQ4*xDdCFw3NE7mmJjPWzgR+&MFP1vDndNQ#0%sY`FP|nxoKCE^9O$8qZlp znZ`attNtG2%)Ce&bvk%M&BbDp;dVSHC)Tk%J!GIziCs`)2iMbvNz!3v$0)&WxQyow*4%?DsvqCy#d|G5H z9ol4d>U$CD)Gu9-Ph`X$Y$XPcb;E?%RleI+ln#7TTJXUxs38J85y7th6g2SmR(__s zEQxscbUKKQX(8SawGkskcHF$MI762vkJ8=j)X+j>*Ckm=i|lJRln?>(dtitdxY+{} zJvAi^J2(i@6UkfsfDIK_Th80D9v@?ZF+Btk4uoA18_CVe8 zlnYwmR`r!<{PN8-D WDSnHJYBRBt^dq5S;o*faH2ek#y@$2{ delta 1657 zcmai#PfXKr6vz8Up@T7gD>{ZOg$@{qar|)uO$dQ*3>+C7-~z9$W1#}F<=v(4JnrSg8h9cqV8+=fl z5%_6rKmJk2l6wT&i_r^zP?0W+$;GZbB@fWiKK;ILmq5Ep-Y-;~4_OT0L|a;>K2=p8 zgSkyqRRR=aG-&k-kNsEk*o0G8;(7n5-%um!_Yes{d}m*aOyB z(e@`T^v7sJA2dA}ET&8h19FxaygtrjB7V}YJ~tO|vjNWOb+hGoIX3j~wUOK*(2i2Y zPlWVJi^(xU=SLQ54T}CIxlA|0cAizgL?F#9ffn;;^;4)?_qX7AfiCZ9Zk4t$C!Bqi zl(KGXW|f3q4IS0gQ04(fd&evap|LSukhj`^F<>%&${TH9OAbhuTyQGIa)L{~&w=#} z4zI3hX5GH_sOIc}xV*+e<&mnzvfT;B3Y=NaavcG8AM5d5arXsSzfYZlEIHw(69*n3 zq~2=E%rV-i|Db+B6nK6ZM-9i{(eNlg&Wp)m%MUxDNUeUx1!b!B$^{P9id+WP5iP2s z%r0d_1IHj{CP#xJ@4(zNhSUzMs`|biLaJ5ohB~{&x{tUwaU#N=0kP&zMUWj3Q4vZT zjQpYlO4X&$9+-4Q>B>ZB-6vu%WWX+W!Z}s7)dM#9TRSwXONkK*6^Z`PiJ*oIAdM#t zbW3H#o3Tf`%lCJ}-Ogheq3|>>h#3%Xx>9$-Mi-16Ml?yeWt6G46(r04={;(z36a^* z=wXLk;7xrsoEPeri@V_xkjj^9Xnua+qC{IL6UTvM%cwgU6!@^1Cj86?7PZ7RA2d3$ zS<2MX0uqx^DI6J9Mv!_)jYX?{uy6TfhAjUC%?6E@9CG5 GQ2!s{Br~l5 diff --git a/tests/validations/test_geometry_empty_check.py b/tests/validations/test_geometry_empty_check.py index 8dae640..9e800e1 100644 --- a/tests/validations/test_geometry_empty_check.py +++ b/tests/validations/test_geometry_empty_check.py @@ -10,7 +10,18 @@ def test_with_gpkg_empty(): assert len(result) == 1 assert ( result[0] - == "Found empty geometry in table: stations, column geom, 45 times, example id 129" + == "Found empty geometry in table: test_geometry_empty, column geom, 45 times, example id 129" + ) + + +def test_with_gpkg_null(): + # geometries that are null slip through + dataset = open_dataset("tests/data/test_geometry_null.gpkg") + result = list(EmptyGeometryValidator(dataset).check()) + assert len(result) == 1 + assert ( + result[0] + == "Found null geometry in table: test_geometry_null, column geometry, 2 times, example id 1" ) From acfbdc69263d6c2c15f63b120ef5124dbd04f5b3 Mon Sep 17 00:00:00 2001 From: Michiel Korpel Date: Wed, 4 Dec 2024 11:01:04 +0100 Subject: [PATCH 6/9] Add diff to black test --- .github/workflows/pytest.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index e3be2de..fba52ef 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -42,7 +42,7 @@ jobs: if [ "${{ matrix.os }}" = "ubuntu-20.04" ] then echo adding ubuntugis unstable - sudo apt-add-repository ppa:ubuntugis/ubuntugis-unstable + sudo apt-add-repository ppa:ubuntugis/ubuntugis-unstable else echo adding ubuntugis stable sudo apt-add-repository ppa:ubuntugis/ppa @@ -70,4 +70,4 @@ jobs: pytest -vv - name: Test with black run: | - black --check . + black --check --diff . From 3a1aa58ea7c5a83a25fa2d1fed80f952c20520de Mon Sep 17 00:00:00 2001 From: Michiel Korpel Date: Wed, 4 Dec 2024 11:05:00 +0100 Subject: [PATCH 7/9] Fix test --- tests/validations/test_geometry_valid_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/validations/test_geometry_valid_check.py b/tests/validations/test_geometry_valid_check.py index b67529d..3213eb2 100644 --- a/tests/validations/test_geometry_valid_check.py +++ b/tests/validations/test_geometry_valid_check.py @@ -36,7 +36,7 @@ def test_with_gpkg_valid_simple(): assert checks[0][1] == "geometry" assert checks[0][2] == "Self-intersection" assert checks[0][3] == 1 - # assert checks[0][4] == 1 + assert checks[0][4] == 1 def test_with_gpkg_empty(): From cc1dbb5711a1ae2127b10055946405180bb1a5d8 Mon Sep 17 00:00:00 2001 From: Michiel Korpel Date: Wed, 4 Dec 2024 11:05:29 +0100 Subject: [PATCH 8/9] Reformat according to black --- geopackage_validator/validations/geometry_empty_check.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/geopackage_validator/validations/geometry_empty_check.py b/geopackage_validator/validations/geometry_empty_check.py index 442cbb0..cb9f653 100644 --- a/geopackage_validator/validations/geometry_empty_check.py +++ b/geopackage_validator/validations/geometry_empty_check.py @@ -17,7 +17,9 @@ GROUP BY type;""" -def query_geometry_empty(dataset, sql_template) -> Iterable[Tuple[str, str, str, int, int]]: +def query_geometry_empty( + dataset, sql_template +) -> Iterable[Tuple[str, str, str, int, int]]: columns = utils.dataset_geometry_tables(dataset) for table_name, column_name, _ in columns: From 64c03b1efd97f600487c83f0edf216c78ab59813 Mon Sep 17 00:00:00 2001 From: Michiel Korpel Date: Wed, 4 Dec 2024 11:21:38 +0100 Subject: [PATCH 9/9] Remove obsolete comment --- tests/validations/test_geometry_empty_check.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/validations/test_geometry_empty_check.py b/tests/validations/test_geometry_empty_check.py index 9e800e1..b248e95 100644 --- a/tests/validations/test_geometry_empty_check.py +++ b/tests/validations/test_geometry_empty_check.py @@ -15,7 +15,6 @@ def test_with_gpkg_empty(): def test_with_gpkg_null(): - # geometries that are null slip through dataset = open_dataset("tests/data/test_geometry_null.gpkg") result = list(EmptyGeometryValidator(dataset).check()) assert len(result) == 1