diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakePageSourceProvider.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakePageSourceProvider.java index 80d32276fb8a..a65c04d58d1d 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakePageSourceProvider.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakePageSourceProvider.java @@ -216,7 +216,7 @@ public ConnectorPageSource createPageSource( ParquetReaderOptions options = parquetReaderOptions.withMaxReadBlockSize(getParquetMaxReadBlockSize(session)) .withMaxReadBlockRowCount(getParquetMaxReadBlockRowCount(session)) .withSmallFileThreshold(getParquetSmallFileThreshold(session)) - .withUseColumnIndex(isParquetUseColumnIndex(session)) + .withUseColumnIndex(split.getDeletionVector().isEmpty() && isParquetUseColumnIndex(session)) .withIgnoreStatistics(isParquetIgnoreStatistics(session)) .withVectorizedDecodingEnabled(isParquetVectorizedDecodingEnabled(session)); diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java index 43134702c831..0f0b29f84340 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java @@ -1402,6 +1402,35 @@ void testDeletionVectorsRepeat() } } + @Test + void testDeletionVectorsPages() + throws Exception + { + testDeletionVectorsPages(true); + testDeletionVectorsPages(false); + } + + private void testDeletionVectorsPages(boolean parquetUseColumnIndex) + throws Exception + { + String tableName = "deletion_vectors_pages" + randomNameSuffix(); + Path tableLocation = catalogDir.resolve(tableName); + copyDirectoryContents(new File(Resources.getResource("deltalake/deletion_vector_pages").toURI()).toPath(), tableLocation); + assertUpdate("CALL system.register_table('%s', '%s', '%s')".formatted(getSession().getSchema().orElseThrow(), tableName, tableLocation.toUri())); + + Session session = Session.builder(getSession()) + .setCatalogSessionProperty("delta", "parquet_use_column_index", Boolean.toString(parquetUseColumnIndex)) + .build(); + + assertQueryReturnsEmptyResult(session, "SELECT * FROM " + tableName + " WHERE id = 20001"); + assertThat(query(session, "SELECT * FROM " + tableName + " WHERE id = 99999")).matches("VALUES 99999"); + + assertThat(query(session, "SELECT id, _change_type, _commit_version FROM TABLE(system.table_changes('tpch', '" +tableName+ "')) WHERE id = 20001")) + .matches("VALUES (20001, VARCHAR 'insert', BIGINT '1'), (20001, VARCHAR 'update_preimage', BIGINT '2')"); + + assertUpdate("DROP TABLE " + tableName); + } + @Test public void testUnsupportedVacuumDeletionVectors() throws Exception diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/deletion_vector_pages/README.md b/plugin/trino-delta-lake/src/test/resources/deltalake/deletion_vector_pages/README.md new file mode 100644 index 000000000000..9a07d942d7ad --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/deletion_vector_pages/README.md @@ -0,0 +1,14 @@ +Data generated using Delta Lake 3.2.0: + +```sql +CREATE TABLE test_deletion_vector_pages +(id INT) +USING delta +LOCATION 's3://test-bucket/test_deletion_vector_pages3' +TBLPROPERTIES ('delta.enableDeletionVectors' = 'true', 'delta.enableChangeDataFeed' = 'true'); + +-- Write 2 pages and update a row in the second page +-- 20000 is the default size of DEFAULT_PAGE_ROW_COUNT_LIMIT +INSERT INTO test_deletion_vector_pages SELECT explode(sequence(1, 20001)); +UPDATE test_deletion_vector_pages SET id = 99999 WHERE id = 20001; +``` diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/deletion_vector_pages/_change_data/cdc-00000-8e47b370-7c53-4609-9432-1bbd8d71463a.c000.snappy.parquet b/plugin/trino-delta-lake/src/test/resources/deltalake/deletion_vector_pages/_change_data/cdc-00000-8e47b370-7c53-4609-9432-1bbd8d71463a.c000.snappy.parquet new file mode 100644 index 000000000000..8ad8d9649cf2 Binary files /dev/null and b/plugin/trino-delta-lake/src/test/resources/deltalake/deletion_vector_pages/_change_data/cdc-00000-8e47b370-7c53-4609-9432-1bbd8d71463a.c000.snappy.parquet differ diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/deletion_vector_pages/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/deltalake/deletion_vector_pages/_delta_log/00000000000000000000.json new file mode 100644 index 000000000000..8c56796ae24b --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/deletion_vector_pages/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1736468279775,"operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","clusterBy":"[]","description":null,"isManaged":"false","properties":"{\"delta.enableChangeDataFeed\":\"true\",\"delta.enableDeletionVectors\":\"true\"}"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.2.0","txnId":"0a1b0291-0ee3-4283-a3b9-17c6ebf506cf"}} +{"metaData":{"id":"ec0e9b1d-2340-402e-a5a3-9cee9f4fbfb4","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.enableChangeDataFeed":"true","delta.enableDeletionVectors":"true"},"createdTime":1736468279750}} +{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors"],"writerFeatures":["deletionVectors","changeDataFeed"]}} diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/deletion_vector_pages/_delta_log/00000000000000000001.json b/plugin/trino-delta-lake/src/test/resources/deltalake/deletion_vector_pages/_delta_log/00000000000000000001.json new file mode 100644 index 000000000000..1600c6485d78 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/deletion_vector_pages/_delta_log/00000000000000000001.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1736468280380,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"20001","numOutputBytes":"80540"},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.2.0","txnId":"72c9962f-8a24-4877-bee3-5259ac43b66b"}} +{"add":{"path":"part-00000-8e78760d-6337-4ce4-a7b8-3ce3e7c5be44-c000.snappy.parquet","partitionValues":{},"size":80540,"modificationTime":1736468280000,"dataChange":true,"stats":"{\"numRecords\":20001,\"minValues\":{\"id\":1},\"maxValues\":{\"id\":20001},\"nullCount\":{\"id\":0},\"tightBounds\":true}"}} diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/deletion_vector_pages/_delta_log/00000000000000000002.json b/plugin/trino-delta-lake/src/test/resources/deltalake/deletion_vector_pages/_delta_log/00000000000000000002.json new file mode 100644 index 000000000000..0e200d2fe784 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/deletion_vector_pages/_delta_log/00000000000000000002.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1736468282105,"operation":"UPDATE","operationParameters":{"predicate":"[\"(id#3023 = 20001)\"]"},"readVersion":1,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRemovedFiles":"1","numRemovedBytes":"773","numCopiedRows":"0","numDeletionVectorsAdded":"1","numDeletionVectorsRemoved":"0","numAddedChangeFiles":"1","executionTimeMs":"1544","numDeletionVectorsUpdated":"0","scanTimeMs":"1023","numAddedFiles":"1","numUpdatedRows":"1","numAddedBytes":"669","rewriteTimeMs":"243"},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.2.0","txnId":"bc1f9b9d-3d04-4cf2-a8d1-b6b75e922f6c"}} +{"add":{"path":"part-00000-8e78760d-6337-4ce4-a7b8-3ce3e7c5be44-c000.snappy.parquet","partitionValues":{},"size":80540,"modificationTime":1736468280000,"dataChange":true,"stats":"{\"numRecords\":20001,\"minValues\":{\"id\":1},\"maxValues\":{\"id\":20001},\"nullCount\":{\"id\":0},\"tightBounds\":false}","deletionVector":{"storageType":"u","pathOrInlineDv":"hR{.8.y4^dHj2[P[Sd0J","offset":1,"sizeInBytes":34,"cardinality":1}}} +{"remove":{"path":"part-00000-8e78760d-6337-4ce4-a7b8-3ce3e7c5be44-c000.snappy.parquet","deletionTimestamp":1736468281584,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":80540,"stats":"{\"numRecords\":20001}"}} +{"add":{"path":"part-00000-2007aafe-e829-46a6-b6cf-97f6ec686718.c000.snappy.parquet","partitionValues":{},"size":669,"modificationTime":1736468282000,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":99999},\"maxValues\":{\"id\":99999},\"nullCount\":{\"id\":0},\"tightBounds\":true}"}} +{"cdc":{"path":"_change_data/cdc-00000-8e47b370-7c53-4609-9432-1bbd8d71463a.c000.snappy.parquet","partitionValues":{},"size":773,"dataChange":false}} diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/deletion_vector_pages/deletion_vector_36de4107-c227-4588-867c-a788f18f5e4d.bin b/plugin/trino-delta-lake/src/test/resources/deltalake/deletion_vector_pages/deletion_vector_36de4107-c227-4588-867c-a788f18f5e4d.bin new file mode 100644 index 000000000000..0d5b888fa6c1 Binary files /dev/null and b/plugin/trino-delta-lake/src/test/resources/deltalake/deletion_vector_pages/deletion_vector_36de4107-c227-4588-867c-a788f18f5e4d.bin differ diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/deletion_vector_pages/part-00000-2007aafe-e829-46a6-b6cf-97f6ec686718.c000.snappy.parquet b/plugin/trino-delta-lake/src/test/resources/deltalake/deletion_vector_pages/part-00000-2007aafe-e829-46a6-b6cf-97f6ec686718.c000.snappy.parquet new file mode 100644 index 000000000000..7923f7026d87 Binary files /dev/null and b/plugin/trino-delta-lake/src/test/resources/deltalake/deletion_vector_pages/part-00000-2007aafe-e829-46a6-b6cf-97f6ec686718.c000.snappy.parquet differ diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/deletion_vector_pages/part-00000-8e78760d-6337-4ce4-a7b8-3ce3e7c5be44-c000.snappy.parquet b/plugin/trino-delta-lake/src/test/resources/deltalake/deletion_vector_pages/part-00000-8e78760d-6337-4ce4-a7b8-3ce3e7c5be44-c000.snappy.parquet new file mode 100644 index 000000000000..6212129a0577 Binary files /dev/null and b/plugin/trino-delta-lake/src/test/resources/deltalake/deletion_vector_pages/part-00000-8e78760d-6337-4ce4-a7b8-3ce3e7c5be44-c000.snappy.parquet differ