diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadata.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadata.java index 5129aede8ce4..ac469fa56d29 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadata.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadata.java @@ -2801,7 +2801,12 @@ public TableStatistics getTableStatistics(ConnectorSession session, ConnectorTab Optional.empty()), // forAnalyze does not affect stats handle -> { Table icebergTable = catalog.loadTable(session, handle.getSchemaTableName()); - return TableStatisticsReader.getTableStatistics(typeManager, session, handle, icebergTable); + return TableStatisticsReader.getTableStatistics( + typeManager, + session, + handle, + icebergTable, + fileSystemFactory.create(session.getIdentity(), icebergTable.io().properties())); }); } diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergSplitSource.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergSplitSource.java index 0109bc11c839..503486b8d0e4 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergSplitSource.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergSplitSource.java @@ -24,13 +24,10 @@ import io.airlift.units.DataSize; import io.airlift.units.Duration; import io.trino.cache.NonEvictableCache; -import io.trino.filesystem.Location; -import io.trino.filesystem.TrinoInputFile; import io.trino.filesystem.cache.CachingHostAddressProvider; import io.trino.plugin.iceberg.delete.DeleteFile; import io.trino.plugin.iceberg.util.DataFileWithDeleteFiles; import io.trino.spi.SplitWeight; -import io.trino.spi.TrinoException; import io.trino.spi.connector.ColumnHandle; import io.trino.spi.connector.ConnectorSession; import io.trino.spi.connector.ConnectorSplit; @@ -82,16 +79,16 @@ import static io.trino.cache.SafeCaches.buildNonEvictableCache; import static io.trino.plugin.iceberg.ExpressionConverter.isConvertableToIcebergExpression; import static io.trino.plugin.iceberg.ExpressionConverter.toIcebergExpression; -import static io.trino.plugin.iceberg.IcebergColumnHandle.fileModifiedTimeColumnHandle; -import static io.trino.plugin.iceberg.IcebergColumnHandle.pathColumnHandle; -import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_FILESYSTEM_ERROR; import static io.trino.plugin.iceberg.IcebergMetadataColumn.isMetadataColumnId; import static io.trino.plugin.iceberg.IcebergSessionProperties.getSplitSize; import static io.trino.plugin.iceberg.IcebergSplitManager.ICEBERG_DOMAIN_COMPACTION_THRESHOLD; import static io.trino.plugin.iceberg.IcebergTypes.convertIcebergValueToTrino; import static io.trino.plugin.iceberg.IcebergUtil.getColumnHandle; +import static io.trino.plugin.iceberg.IcebergUtil.getFileModifiedTimePathDomain; +import static io.trino.plugin.iceberg.IcebergUtil.getModificationTime; import static io.trino.plugin.iceberg.IcebergUtil.getPartitionKeys; import static io.trino.plugin.iceberg.IcebergUtil.getPartitionValues; +import static io.trino.plugin.iceberg.IcebergUtil.getPathDomain; import static io.trino.plugin.iceberg.IcebergUtil.primitiveFieldTypes; import static io.trino.plugin.iceberg.TypeConverter.toIcebergType; import static io.trino.spi.type.DateTimeEncoding.packDateTimeWithZone; @@ -298,7 +295,7 @@ private boolean pruneFileScanTask(FileScanTask fileScanTask, boolean fileHasNoDe return true; } if (!fileModifiedTimeDomain.isAll()) { - long fileModifiedTime = getModificationTime(fileScanTask.file().path().toString()); + long fileModifiedTime = getModificationTime(fileScanTask.file().path().toString(), fileSystemFactory.create(session.getIdentity(), fileIoProperties)); if (!fileModifiedTimeDomain.includesNullableValue(packDateTimeWithZone(fileModifiedTime, UTC_KEY))) { return true; } @@ -337,17 +334,6 @@ private boolean noDataColumnsProjected(FileScanTask fileScanTask) .containsAll(projectedBaseColumns); } - private long getModificationTime(String path) - { - try { - TrinoInputFile inputFile = fileSystemFactory.create(session.getIdentity(), fileIoProperties).newInputFile(Location.of(path)); - return inputFile.lastModified().toEpochMilli(); - } - catch (IOException e) { - throw new TrinoException(ICEBERG_FILESYSTEM_ERROR, "Failed to get file modification time: " + path, e); - } - } - private void finish() { close(); @@ -533,26 +519,4 @@ private IcebergSplit toIcebergSplit(FileScanTask task, TupleDomain effectivePredicate) - { - IcebergColumnHandle pathColumn = pathColumnHandle(); - Domain domain = effectivePredicate.getDomains().orElseThrow(() -> new IllegalArgumentException("Unexpected NONE tuple domain")) - .get(pathColumn); - if (domain == null) { - return Domain.all(pathColumn.getType()); - } - return domain; - } - - private static Domain getFileModifiedTimePathDomain(TupleDomain effectivePredicate) - { - IcebergColumnHandle fileModifiedTimeColumn = fileModifiedTimeColumnHandle(); - Domain domain = effectivePredicate.getDomains().orElseThrow(() -> new IllegalArgumentException("Unexpected NONE tuple domain")) - .get(fileModifiedTimeColumn); - if (domain == null) { - return Domain.all(fileModifiedTimeColumn.getType()); - } - return domain; - } } diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergUtil.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergUtil.java index a8cfb404ba33..ab806b9b7b90 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergUtil.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergUtil.java @@ -26,6 +26,7 @@ import io.trino.filesystem.FileIterator; import io.trino.filesystem.Location; import io.trino.filesystem.TrinoFileSystem; +import io.trino.filesystem.TrinoInputFile; import io.trino.plugin.iceberg.PartitionTransforms.ColumnTransform; import io.trino.plugin.iceberg.catalog.IcebergTableOperations; import io.trino.plugin.iceberg.catalog.IcebergTableOperationsProvider; @@ -42,6 +43,7 @@ import io.trino.spi.predicate.Domain; import io.trino.spi.predicate.NullableValue; import io.trino.spi.predicate.Range; +import io.trino.spi.predicate.TupleDomain; import io.trino.spi.predicate.ValueSet; import io.trino.spi.type.DecimalType; import io.trino.spi.type.Int128; @@ -102,7 +104,9 @@ import static io.trino.plugin.base.io.ByteBuffers.getWrappedBytes; import static io.trino.plugin.hive.HiveMetadata.TABLE_COMMENT; import static io.trino.plugin.iceberg.ColumnIdentity.createColumnIdentity; +import static io.trino.plugin.iceberg.IcebergColumnHandle.fileModifiedTimeColumnHandle; import static io.trino.plugin.iceberg.IcebergColumnHandle.fileModifiedTimeColumnMetadata; +import static io.trino.plugin.iceberg.IcebergColumnHandle.pathColumnHandle; import static io.trino.plugin.iceberg.IcebergColumnHandle.pathColumnMetadata; import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_BAD_DATA; import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_FILESYSTEM_ERROR; @@ -883,4 +887,37 @@ else if (versionNumber == latestMetadataVersion) { } return getOnlyElement(latestMetadataLocations).toString(); } + + public static Domain getPathDomain(TupleDomain effectivePredicate) + { + IcebergColumnHandle pathColumn = pathColumnHandle(); + Domain domain = effectivePredicate.getDomains().orElseThrow(() -> new IllegalArgumentException("Unexpected NONE tuple domain")) + .get(pathColumn); + if (domain == null) { + return Domain.all(pathColumn.getType()); + } + return domain; + } + + public static Domain getFileModifiedTimePathDomain(TupleDomain effectivePredicate) + { + IcebergColumnHandle fileModifiedTimeColumn = fileModifiedTimeColumnHandle(); + Domain domain = effectivePredicate.getDomains().orElseThrow(() -> new IllegalArgumentException("Unexpected NONE tuple domain")) + .get(fileModifiedTimeColumn); + if (domain == null) { + return Domain.all(fileModifiedTimeColumn.getType()); + } + return domain; + } + + public static long getModificationTime(String path, TrinoFileSystem fileSystem) + { + try { + TrinoInputFile inputFile = fileSystem.newInputFile(Location.of(path)); + return inputFile.lastModified().toEpochMilli(); + } + catch (IOException e) { + throw new TrinoException(ICEBERG_FILESYSTEM_ERROR, "Failed to get file modification time: " + path, e); + } + } } diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/TableStatisticsReader.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/TableStatisticsReader.java index 0b4947959e0e..7b390d83b570 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/TableStatisticsReader.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/TableStatisticsReader.java @@ -18,9 +18,11 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Maps; import io.airlift.log.Logger; +import io.trino.filesystem.TrinoFileSystem; import io.trino.spi.TrinoException; import io.trino.spi.connector.ColumnHandle; import io.trino.spi.connector.ConnectorSession; +import io.trino.spi.predicate.Domain; import io.trino.spi.predicate.TupleDomain; import io.trino.spi.statistics.ColumnStatistics; import io.trino.spi.statistics.DoubleRange; @@ -56,11 +58,17 @@ import static com.google.common.collect.ImmutableMap.toImmutableMap; import static com.google.common.collect.Iterables.getOnlyElement; import static com.google.common.collect.Streams.stream; +import static io.airlift.slice.Slices.utf8Slice; import static io.trino.plugin.iceberg.ExpressionConverter.toIcebergExpression; import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_INVALID_METADATA; import static io.trino.plugin.iceberg.IcebergMetadataColumn.isMetadataColumnId; import static io.trino.plugin.iceberg.IcebergSessionProperties.isExtendedStatisticsEnabled; import static io.trino.plugin.iceberg.IcebergUtil.getColumns; +import static io.trino.plugin.iceberg.IcebergUtil.getFileModifiedTimePathDomain; +import static io.trino.plugin.iceberg.IcebergUtil.getModificationTime; +import static io.trino.plugin.iceberg.IcebergUtil.getPathDomain; +import static io.trino.spi.type.DateTimeEncoding.packDateTimeWithZone; +import static io.trino.spi.type.TimeZoneKey.UTC_KEY; import static io.trino.spi.type.VarbinaryType.VARBINARY; import static io.trino.spi.type.VarcharType.VARCHAR; import static java.lang.Long.parseLong; @@ -77,7 +85,7 @@ private TableStatisticsReader() {} public static final String APACHE_DATASKETCHES_THETA_V1_NDV_PROPERTY = "ndv"; - public static TableStatistics getTableStatistics(TypeManager typeManager, ConnectorSession session, IcebergTableHandle tableHandle, Table icebergTable) + public static TableStatistics getTableStatistics(TypeManager typeManager, ConnectorSession session, IcebergTableHandle tableHandle, Table icebergTable, TrinoFileSystem fileSystem) { return makeTableStatistics( typeManager, @@ -85,7 +93,8 @@ public static TableStatistics getTableStatistics(TypeManager typeManager, Connec tableHandle.getSnapshotId(), tableHandle.getEnforcedPredicate(), tableHandle.getUnenforcedPredicate(), - isExtendedStatisticsEnabled(session)); + isExtendedStatisticsEnabled(session), + fileSystem); } @VisibleForTesting @@ -95,7 +104,8 @@ public static TableStatistics makeTableStatistics( Optional snapshot, TupleDomain enforcedConstraint, TupleDomain unenforcedConstraint, - boolean extendedStatisticsEnabled) + boolean extendedStatisticsEnabled, + TrinoFileSystem fileSystem) { if (snapshot.isEmpty()) { // No snapshot, so no data. @@ -125,15 +135,28 @@ public static TableStatistics makeTableStatistics( .map(column -> Maps.immutableEntry(column.fieldId(), column.type())) .collect(toUnmodifiableMap(Map.Entry::getKey, Map.Entry::getValue)); + Domain pathDomain = getPathDomain(effectivePredicate); + Domain fileModifiedTimeDomain = getFileModifiedTimePathDomain(effectivePredicate); TableScan tableScan = icebergTable.newScan() - // Table enforced constraint may include eg $path column predicate which is not handled by Iceberg library TODO apply $path and $file_modified_time filters here .filter(toIcebergExpression(effectivePredicate.filter((column, domain) -> !isMetadataColumnId(column.getId())))) .useSnapshot(snapshotId) .includeColumnStats(); IcebergStatistics.Builder icebergStatisticsBuilder = new IcebergStatistics.Builder(columns, typeManager); try (CloseableIterable fileScanTasks = tableScan.planFiles()) { - fileScanTasks.forEach(fileScanTask -> icebergStatisticsBuilder.acceptDataFile(fileScanTask.file(), fileScanTask.spec())); + fileScanTasks.forEach(fileScanTask -> { + if (!pathDomain.isAll() && !pathDomain.includesNullableValue(utf8Slice(fileScanTask.file().path().toString()))) { + return; + } + if (!fileModifiedTimeDomain.isAll()) { + long fileModifiedTime = getModificationTime(fileScanTask.file().path().toString(), fileSystem); + if (!fileModifiedTimeDomain.includesNullableValue(packDateTimeWithZone(fileModifiedTime, UTC_KEY))) { + return; + } + } + + icebergStatisticsBuilder.acceptDataFile(fileScanTask.file(), fileScanTask.spec()); + }); } catch (IOException e) { throw new UncheckedIOException(e); diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorTest.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorTest.java index e3dae3aaf299..7c22b067929e 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorTest.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorTest.java @@ -5516,6 +5516,208 @@ public void testOptimizeWithPathColumn() assertUpdate("DROP TABLE " + tableName); } + @Test + public void testCollectingStatisticsWithPathColumnPredicate() + { + assertQuerySucceeds("EXPLAIN SELECT * FROM region WHERE \"$path\" = ''"); + + Session collectingStatisticsSession = Session.builder(getSession()) + .setSystemProperty("collect_plan_statistics_for_all_queries", "true") + .build(); + String tableName = "test_collect_statistics_with_path_" + randomNameSuffix(); + assertUpdate("CREATE TABLE " + tableName + "(id integer, value integer)"); + + assertUpdate("INSERT INTO " + tableName + " VALUES (1, 1)", 1); + assertUpdate("INSERT INTO " + tableName + " VALUES (2, 2)", 1); + assertUpdate("INSERT INTO " + tableName + " VALUES (3, null)", 1); + assertUpdate("INSERT INTO " + tableName + " VALUES (4, 4)", 1); + + // Make sure the whole table has stats + MaterializedResult tableStatistics = computeActual(collectingStatisticsSession, "SHOW STATS FOR (SELECT * FROM %s WHERE \"$path\" IS NOT NULL)".formatted(tableName)); + MaterializedResult expectedTableStatistics = + resultBuilder(collectingStatisticsSession, VARCHAR, DOUBLE, DOUBLE, DOUBLE, DOUBLE, VARCHAR, VARCHAR) + .row("id", null, 4.0, 0.0, null, "1", "4") + .row("value", null, 3.0, 0.25, null, "1", "4") + .row(null, null, null, null, 4.0, null, null) + .build(); + if (format == AVRO) { + expectedTableStatistics = + resultBuilder(collectingStatisticsSession, VARCHAR, DOUBLE, DOUBLE, DOUBLE, DOUBLE, VARCHAR, VARCHAR) + .row("id", null, 4.0, 0.0, null, null, null) + .row("value", null, 3.0, 0.1, null, null, null) + .row(null, null, null, null, 4.0, null, null) + .build(); + } + assertThat(tableStatistics).containsExactlyElementsOf(expectedTableStatistics); + + String firstPath = (String) computeScalar(collectingStatisticsSession, "SELECT \"$path\" FROM " + tableName + " WHERE id = 1"); + String secondPath = (String) computeScalar(collectingStatisticsSession, "SELECT \"$path\" FROM " + tableName + " WHERE id = 2"); + String thirdPath = (String) computeScalar(collectingStatisticsSession, "SELECT \"$path\" FROM " + tableName + " WHERE id = 3"); + String fourthPath = (String) computeScalar(collectingStatisticsSession, "SELECT \"$path\" FROM " + tableName + " WHERE id = 4"); + + String pathPredicateSql = "SELECT * FROM " + tableName + " WHERE \"$path\" = '%s'"; + // Check the predicate with path + assertQuery(collectingStatisticsSession, pathPredicateSql.formatted(firstPath), "VALUES (1, 1)"); + assertQuery(collectingStatisticsSession, pathPredicateSql.formatted(secondPath), "VALUES (2, 2)"); + assertQuery(collectingStatisticsSession, "SELECT COUNT(*) FROM %s WHERE \"$path\" = '%s' OR \"$path\" = '%s'".formatted(tableName, thirdPath, fourthPath), "VALUES 2"); + + MaterializedResult firstPathStatistics = computeActual(collectingStatisticsSession, "SHOW STATS FOR (" + pathPredicateSql.formatted(firstPath) + ")"); + MaterializedResult expectedFirstPathStatistics = + resultBuilder(collectingStatisticsSession, VARCHAR, DOUBLE, DOUBLE, DOUBLE, DOUBLE, VARCHAR, VARCHAR) + .row("id", null, 1.0, 0.0, null, "1", "1") + .row("value", null, 1.0, 0.0, null, "1", "1") + .row(null, null, null, null, 1.0, null, null) + .build(); + if (format == AVRO) { + expectedFirstPathStatistics = + resultBuilder(collectingStatisticsSession, VARCHAR, DOUBLE, DOUBLE, DOUBLE, DOUBLE, VARCHAR, VARCHAR) + .row("id", null, 1.0, 0.0, null, null, null) + .row("value", null, 1.0, 0.0, null, null, null) + .row(null, null, null, null, 1.0, null, null) + .build(); + } + assertThat(firstPathStatistics).containsExactlyElementsOf(expectedFirstPathStatistics); + + MaterializedResult secondThirdPathStatistics = computeActual(collectingStatisticsSession, "SHOW STATS FOR (SELECT * FROM %s WHERE \"$path\" IN ('%s', '%s'))".formatted(tableName, secondPath, thirdPath)); + MaterializedResult expectedSecondThirdPathStatistics = + resultBuilder(collectingStatisticsSession, VARCHAR, DOUBLE, DOUBLE, DOUBLE, DOUBLE, VARCHAR, VARCHAR) + .row("id", null, 2.0, 0.0, null, "2", "3") + .row("value", null, 1.0, 0.5, null, "2", "2") + .row(null, null, null, null, 2.0, null, null) + .build(); + if (format == AVRO) { + expectedSecondThirdPathStatistics = + resultBuilder(collectingStatisticsSession, VARCHAR, DOUBLE, DOUBLE, DOUBLE, DOUBLE, VARCHAR, VARCHAR) + .row("id", null, 2.0, 0.0, null, null, null) + .row("value", null, 2.0, 0.0, null, null, null) + .row(null, null, null, null, 2.0, null, null) + .build(); + } + assertThat(secondThirdPathStatistics).containsExactlyElementsOf(expectedSecondThirdPathStatistics); + + MaterializedResult fourthPathStatistics = computeActual(collectingStatisticsSession, "SHOW STATS FOR (" + pathPredicateSql.formatted(fourthPath) + ")"); + MaterializedResult expectedFourthPathStatistics = + resultBuilder(collectingStatisticsSession, VARCHAR, DOUBLE, DOUBLE, DOUBLE, DOUBLE, VARCHAR, VARCHAR) + .row("id", null, 1.0, 0.0, null, "4", "4") + .row("value", null, 1.0, 0.0, null, "4", "4") + .row(null, null, null, null, 1.0, null, null) + .build(); + if (format == AVRO) { + expectedFourthPathStatistics = + resultBuilder(collectingStatisticsSession, VARCHAR, DOUBLE, DOUBLE, DOUBLE, DOUBLE, VARCHAR, VARCHAR) + .row("id", null, 1.0, 0.0, null, null, null) + .row("value", null, 1.0, 0.0, null, null, null) + .row(null, null, null, null, 1.0, null, null) + .build(); + } + assertThat(fourthPathStatistics).containsExactlyElementsOf(expectedFourthPathStatistics); + + assertUpdate("DROP TABLE " + tableName); + } + + @Test + public void testCollectingStatisticsWithFileModifiedTimeColumnPredicate() + throws InterruptedException + { + assertQuerySucceeds("EXPLAIN SELECT * FROM region WHERE \"$file_modified_time\" = TIMESTAMP '2001-08-22 03:04:05.321 UTC'"); + + Session collectingStatisticsSession = Session.builder(getSession()) + .setSystemProperty("collect_plan_statistics_for_all_queries", "true") + .build(); + String tableName = "test_collect_statistics_with_file_modified_time_" + randomNameSuffix(); + assertUpdate("CREATE TABLE " + tableName + "(id integer, value integer)"); + + assertUpdate("INSERT INTO " + tableName + " VALUES (1, 1)", 1); + storageTimePrecision.sleep(1); + assertUpdate("INSERT INTO " + tableName + " VALUES (2, 2)", 1); + storageTimePrecision.sleep(1); + assertUpdate("INSERT INTO " + tableName + " VALUES (3, null)", 1); + storageTimePrecision.sleep(1); + assertUpdate("INSERT INTO " + tableName + " VALUES (4, 4)", 1); + + // Make sure the whole table has stats + MaterializedResult tableStatistics = computeActual(collectingStatisticsSession, "SHOW STATS FOR (SELECT * FROM %s WHERE \"$file_modified_time\" IS NOT NULL)".formatted(tableName)); + MaterializedResult expectedTableStatistics = + resultBuilder(collectingStatisticsSession, VARCHAR, DOUBLE, DOUBLE, DOUBLE, DOUBLE, VARCHAR, VARCHAR) + .row("id", null, 4.0, 0.0, null, "1", "4") + .row("value", null, 3.0, 0.25, null, "1", "4") + .row(null, null, null, null, 4.0, null, null) + .build(); + if (format == AVRO) { + expectedTableStatistics = + resultBuilder(collectingStatisticsSession, VARCHAR, DOUBLE, DOUBLE, DOUBLE, DOUBLE, VARCHAR, VARCHAR) + .row("id", null, 4.0, 0.0, null, null, null) + .row("value", null, 3.0, 0.1, null, null, null) + .row(null, null, null, null, 4.0, null, null) + .build(); + } + assertThat(tableStatistics).containsExactlyElementsOf(expectedTableStatistics); + + ZonedDateTime firstFileModifiedTime = (ZonedDateTime) computeScalar(collectingStatisticsSession, "SELECT \"$file_modified_time\" FROM " + tableName + " WHERE id = 1"); + ZonedDateTime secondFileModifiedTime = (ZonedDateTime) computeScalar(collectingStatisticsSession, "SELECT \"$file_modified_time\" FROM " + tableName + " WHERE id = 2"); + ZonedDateTime thirdFileModifiedTime = (ZonedDateTime) computeScalar(collectingStatisticsSession, "SELECT \"$file_modified_time\" FROM " + tableName + " WHERE id = 3"); + ZonedDateTime fourthFileModifiedTime = (ZonedDateTime) computeScalar(collectingStatisticsSession, "SELECT \"$file_modified_time\" FROM " + tableName + " WHERE id = 4"); + + String fileModifiedTimePredicateSql = "SELECT * FROM " + tableName + " WHERE \"$file_modified_time\" = from_iso8601_timestamp('%s')"; + // Check the predicate with fileModifiedTime + assertQuery(collectingStatisticsSession, fileModifiedTimePredicateSql.formatted(firstFileModifiedTime.format(ISO_OFFSET_DATE_TIME)), "SELECT 1, 1"); + assertQuery(collectingStatisticsSession, fileModifiedTimePredicateSql.formatted(secondFileModifiedTime.format(ISO_OFFSET_DATE_TIME)), "SELECT 2, 2"); + assertQuery(collectingStatisticsSession, "SELECT COUNT(*) FROM %s WHERE \"$file_modified_time\" = from_iso8601_timestamp('%s') OR \"$file_modified_time\" = from_iso8601_timestamp('%s')".formatted(tableName, thirdFileModifiedTime.format(ISO_OFFSET_DATE_TIME), fourthFileModifiedTime.format(ISO_OFFSET_DATE_TIME)), "VALUES 2"); + + MaterializedResult firstFileModifiedTimeStatistics = computeActual(collectingStatisticsSession, "SHOW STATS FOR (" + fileModifiedTimePredicateSql.formatted(firstFileModifiedTime.format(ISO_OFFSET_DATE_TIME)) + ")"); + MaterializedResult expectedFirstFileModifiedTimeStatistics = + resultBuilder(collectingStatisticsSession, VARCHAR, DOUBLE, DOUBLE, DOUBLE, DOUBLE, VARCHAR, VARCHAR) + .row("id", null, 1.0, 0.0, null, "1", "1") + .row("value", null, 1.0, 0.0, null, "1", "1") + .row(null, null, null, null, 1.0, null, null) + .build(); + if (format == AVRO) { + expectedFirstFileModifiedTimeStatistics = + resultBuilder(collectingStatisticsSession, VARCHAR, DOUBLE, DOUBLE, DOUBLE, DOUBLE, VARCHAR, VARCHAR) + .row("id", null, 1.0, 0.0, null, null, null) + .row("value", null, 1.0, 0.0, null, null, null) + .row(null, null, null, null, 1.0, null, null) + .build(); + } + assertThat(firstFileModifiedTimeStatistics).containsExactlyElementsOf(expectedFirstFileModifiedTimeStatistics); + + MaterializedResult secondThirdFileModifiedTimeStatistics = computeActual(collectingStatisticsSession, "SHOW STATS FOR (SELECT * FROM %s WHERE \"$file_modified_time\" IN (from_iso8601_timestamp('%s'), from_iso8601_timestamp('%s')))".formatted(tableName, secondFileModifiedTime.format(ISO_OFFSET_DATE_TIME), thirdFileModifiedTime.format(ISO_OFFSET_DATE_TIME))); + MaterializedResult expectedSecondThirdFileModifiedTimetatistics = + resultBuilder(collectingStatisticsSession, VARCHAR, DOUBLE, DOUBLE, DOUBLE, DOUBLE, VARCHAR, VARCHAR) + .row("id", null, 2.0, 0.0, null, "2", "3") + .row("value", null, 1.0, 0.5, null, "2", "2") + .row(null, null, null, null, 2.0, null, null) + .build(); + if (format == AVRO) { + expectedSecondThirdFileModifiedTimetatistics = + resultBuilder(collectingStatisticsSession, VARCHAR, DOUBLE, DOUBLE, DOUBLE, DOUBLE, VARCHAR, VARCHAR) + .row("id", null, 2.0, 0.0, null, null, null) + .row("value", null, 2.0, 0.0, null, null, null) + .row(null, null, null, null, 2.0, null, null) + .build(); + } + assertThat(secondThirdFileModifiedTimeStatistics).containsExactlyElementsOf(expectedSecondThirdFileModifiedTimetatistics); + + MaterializedResult fourthFileModifiedTimeStatistics = computeActual(collectingStatisticsSession, "SHOW STATS FOR (" + fileModifiedTimePredicateSql.formatted(fourthFileModifiedTime.format(ISO_OFFSET_DATE_TIME)) + ")"); + MaterializedResult expectedFourthFileModifiedTimeStatistics = + resultBuilder(collectingStatisticsSession, VARCHAR, DOUBLE, DOUBLE, DOUBLE, DOUBLE, VARCHAR, VARCHAR) + .row("id", null, 1.0, 0.0, null, "4", "4") + .row("value", null, 1.0, 0.0, null, "4", "4") + .row(null, null, null, null, 1.0, null, null) + .build(); + if (format == AVRO) { + expectedFourthFileModifiedTimeStatistics = + resultBuilder(collectingStatisticsSession, VARCHAR, DOUBLE, DOUBLE, DOUBLE, DOUBLE, VARCHAR, VARCHAR) + .row("id", null, 1.0, 0.0, null, null, null) + .row("value", null, 1.0, 0.0, null, null, null) + .row(null, null, null, null, 1.0, null, null) + .build(); + } + assertThat(fourthFileModifiedTimeStatistics).containsExactlyElementsOf(expectedFourthFileModifiedTimeStatistics); + + assertUpdate("DROP TABLE " + tableName); + } + @Test public void testDeleteWithPathColumn() { diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergV2.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergV2.java index e4b42ae46ec6..b77be164b939 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergV2.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergV2.java @@ -819,7 +819,7 @@ public void testStatsFilePruning() Optional snapshotId = Optional.of((long) computeScalar("SELECT snapshot_id FROM \"" + testTable.getName() + "$snapshots\" ORDER BY committed_at DESC FETCH FIRST 1 ROW WITH TIES")); TypeManager typeManager = new TestingTypeManager(); Table table = loadTable(testTable.getName()); - TableStatistics withNoFilter = TableStatisticsReader.makeTableStatistics(typeManager, table, snapshotId, TupleDomain.all(), TupleDomain.all(), true); + TableStatistics withNoFilter = TableStatisticsReader.makeTableStatistics(typeManager, table, snapshotId, TupleDomain.all(), TupleDomain.all(), true, fileSystemFactory.create(SESSION)); assertThat(withNoFilter.getRowCount().getValue()).isEqualTo(4.0); TableStatistics withPartitionFilter = TableStatisticsReader.makeTableStatistics( @@ -830,7 +830,8 @@ public void testStatsFilePruning() new IcebergColumnHandle(ColumnIdentity.primitiveColumnIdentity(1, "b"), INTEGER, ImmutableList.of(), INTEGER, true, Optional.empty()), Domain.singleValue(INTEGER, 10L))), TupleDomain.all(), - true); + true, + fileSystemFactory.create(SESSION)); assertThat(withPartitionFilter.getRowCount().getValue()).isEqualTo(3.0); TableStatistics withUnenforcedFilter = TableStatisticsReader.makeTableStatistics( @@ -841,7 +842,8 @@ public void testStatsFilePruning() TupleDomain.withColumnDomains(ImmutableMap.of( new IcebergColumnHandle(ColumnIdentity.primitiveColumnIdentity(0, "a"), INTEGER, ImmutableList.of(), INTEGER, true, Optional.empty()), Domain.create(ValueSet.ofRanges(Range.greaterThan(INTEGER, 100L)), true))), - true); + true, + fileSystemFactory.create(SESSION)); assertThat(withUnenforcedFilter.getRowCount().getValue()).isEqualTo(2.0); } }