-
Notifications
You must be signed in to change notification settings - Fork 3.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix incorrect result when reading deletion vectors in Delta Lake
This issue happens when Parquet file contains several pages and predicates filter page when parquet_use_column_index is set to true.
- Loading branch information
Showing
10 changed files
with
54 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
14 changes: 14 additions & 0 deletions
14
...n/trino-delta-lake/src/test/resources/deltalake/deletion_vector_pages/README.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
Data generated using Delta Lake 3.2.0: | ||
|
||
```sql | ||
CREATE TABLE test_deletion_vector_pages | ||
(id INT) | ||
USING delta | ||
LOCATION 's3://test-bucket/test_deletion_vector_pages3' | ||
TBLPROPERTIES ('delta.enableDeletionVectors' = 'true', 'delta.enableChangeDataFeed' = 'true'); | ||
|
||
-- Write 2 pages and update a row in the second page | ||
-- 20000 is the default size of DEFAULT_PAGE_ROW_COUNT_LIMIT | ||
INSERT INTO test_deletion_vector_pages SELECT explode(sequence(1, 20001)); | ||
UPDATE test_deletion_vector_pages SET id = 99999 WHERE id = 20001; | ||
``` |
Binary file added
BIN
+773 Bytes
...tor_pages/_change_data/cdc-00000-8e47b370-7c53-4609-9432-1bbd8d71463a.c000.snappy.parquet
Binary file not shown.
3 changes: 3 additions & 0 deletions
3
...e/src/test/resources/deltalake/deletion_vector_pages/_delta_log/00000000000000000000.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
{"commitInfo":{"timestamp":1736468279775,"operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","clusterBy":"[]","description":null,"isManaged":"false","properties":"{\"delta.enableChangeDataFeed\":\"true\",\"delta.enableDeletionVectors\":\"true\"}"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.2.0","txnId":"0a1b0291-0ee3-4283-a3b9-17c6ebf506cf"}} | ||
{"metaData":{"id":"ec0e9b1d-2340-402e-a5a3-9cee9f4fbfb4","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.enableChangeDataFeed":"true","delta.enableDeletionVectors":"true"},"createdTime":1736468279750}} | ||
{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors"],"writerFeatures":["deletionVectors","changeDataFeed"]}} |
2 changes: 2 additions & 0 deletions
2
...e/src/test/resources/deltalake/deletion_vector_pages/_delta_log/00000000000000000001.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
{"commitInfo":{"timestamp":1736468280380,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"20001","numOutputBytes":"80540"},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.2.0","txnId":"72c9962f-8a24-4877-bee3-5259ac43b66b"}} | ||
{"add":{"path":"part-00000-8e78760d-6337-4ce4-a7b8-3ce3e7c5be44-c000.snappy.parquet","partitionValues":{},"size":80540,"modificationTime":1736468280000,"dataChange":true,"stats":"{\"numRecords\":20001,\"minValues\":{\"id\":1},\"maxValues\":{\"id\":20001},\"nullCount\":{\"id\":0},\"tightBounds\":true}"}} |
5 changes: 5 additions & 0 deletions
5
...e/src/test/resources/deltalake/deletion_vector_pages/_delta_log/00000000000000000002.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
{"commitInfo":{"timestamp":1736468282105,"operation":"UPDATE","operationParameters":{"predicate":"[\"(id#3023 = 20001)\"]"},"readVersion":1,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRemovedFiles":"1","numRemovedBytes":"773","numCopiedRows":"0","numDeletionVectorsAdded":"1","numDeletionVectorsRemoved":"0","numAddedChangeFiles":"1","executionTimeMs":"1544","numDeletionVectorsUpdated":"0","scanTimeMs":"1023","numAddedFiles":"1","numUpdatedRows":"1","numAddedBytes":"669","rewriteTimeMs":"243"},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.2.0","txnId":"bc1f9b9d-3d04-4cf2-a8d1-b6b75e922f6c"}} | ||
{"add":{"path":"part-00000-8e78760d-6337-4ce4-a7b8-3ce3e7c5be44-c000.snappy.parquet","partitionValues":{},"size":80540,"modificationTime":1736468280000,"dataChange":true,"stats":"{\"numRecords\":20001,\"minValues\":{\"id\":1},\"maxValues\":{\"id\":20001},\"nullCount\":{\"id\":0},\"tightBounds\":false}","deletionVector":{"storageType":"u","pathOrInlineDv":"hR{.8.y4^dHj2[P[Sd0J","offset":1,"sizeInBytes":34,"cardinality":1}}} | ||
{"remove":{"path":"part-00000-8e78760d-6337-4ce4-a7b8-3ce3e7c5be44-c000.snappy.parquet","deletionTimestamp":1736468281584,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":80540,"stats":"{\"numRecords\":20001}"}} | ||
{"add":{"path":"part-00000-2007aafe-e829-46a6-b6cf-97f6ec686718.c000.snappy.parquet","partitionValues":{},"size":669,"modificationTime":1736468282000,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":99999},\"maxValues\":{\"id\":99999},\"nullCount\":{\"id\":0},\"tightBounds\":true}"}} | ||
{"cdc":{"path":"_change_data/cdc-00000-8e47b370-7c53-4609-9432-1bbd8d71463a.c000.snappy.parquet","partitionValues":{},"size":773,"dataChange":false}} |
Binary file added
BIN
+43 Bytes
.../deltalake/deletion_vector_pages/deletion_vector_36de4107-c227-4588-867c-a788f18f5e4d.bin
Binary file not shown.
Binary file added
BIN
+669 Bytes
...deletion_vector_pages/part-00000-2007aafe-e829-46a6-b6cf-97f6ec686718.c000.snappy.parquet
Binary file not shown.
Binary file added
BIN
+78.7 KB
...deletion_vector_pages/part-00000-8e78760d-6337-4ce4-a7b8-3ce3e7c5be44-c000.snappy.parquet
Binary file not shown.