From 1ba014338085388c6585bdb73427cab5a491684c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Zieli=C5=84ski?= Date: Mon, 10 Jul 2023 18:06:39 +0100 Subject: [PATCH] Add support for TIMESTAMP WITH LOCAL TIME ZONE in Hive connector * Map Hive TIMESTAMP WITH LOCAL TIME ZONE type to TIMESTAMP WITH TIME ZONE. * Hive: update docs about TIMESTAMP WITH LOCAL TIME ZONE --- docs/src/main/sphinx/connector/hive.rst | 6 +-- .../java/io/trino/plugin/hive/HiveType.java | 6 ++- .../metastore/thrift/ThriftMetastoreUtil.java | 3 +- .../plugin/hive/util/HiveTypeTranslator.java | 3 ++ .../plugin/hive/BaseHiveConnectorTest.java | 47 +++++++++++++++++++ 5 files changed, 59 insertions(+), 6 deletions(-) diff --git a/docs/src/main/sphinx/connector/hive.rst b/docs/src/main/sphinx/connector/hive.rst index 18a3c1e09ca1..e8be87462bae 100644 --- a/docs/src/main/sphinx/connector/hive.rst +++ b/docs/src/main/sphinx/connector/hive.rst @@ -1706,9 +1706,9 @@ Hive 3-related limitations * For security reasons, the ``sys`` system catalog is not accessible. -* Hive's ``timestamp with local zone`` data type is not supported. - It is possible to read from a table with a column of this type, but the column - data is not accessible. Writing to such a table is not supported. +* Hive's ``timestamp with local zone`` data type is mapped to + ``timestamp with time zone`` with UTC timezone. It only supports reading + values - writing to tables with columns of this type is not supported. * Due to Hive issues `HIVE-21002 `_ and `HIVE-22167 `_, Trino does diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveType.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveType.java index 0733174148db..907b21a54d55 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveType.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveType.java @@ -55,6 +55,7 @@ import static io.trino.plugin.hive.util.SerdeConstants.INT_TYPE_NAME; import static io.trino.plugin.hive.util.SerdeConstants.SMALLINT_TYPE_NAME; import static io.trino.plugin.hive.util.SerdeConstants.STRING_TYPE_NAME; +import static io.trino.plugin.hive.util.SerdeConstants.TIMESTAMPLOCALTZ_TYPE_NAME; import static io.trino.plugin.hive.util.SerdeConstants.TIMESTAMP_TYPE_NAME; import static io.trino.plugin.hive.util.SerdeConstants.TINYINT_TYPE_NAME; import static java.util.Objects.requireNonNull; @@ -72,6 +73,7 @@ public final class HiveType public static final HiveType HIVE_DOUBLE = new HiveType(getPrimitiveTypeInfo(DOUBLE_TYPE_NAME)); public static final HiveType HIVE_STRING = new HiveType(getPrimitiveTypeInfo(STRING_TYPE_NAME)); public static final HiveType HIVE_TIMESTAMP = new HiveType(getPrimitiveTypeInfo(TIMESTAMP_TYPE_NAME)); + public static final HiveType HIVE_TIMESTAMPLOCALTZ = new HiveType(getPrimitiveTypeInfo(TIMESTAMPLOCALTZ_TYPE_NAME)); public static final HiveType HIVE_DATE = new HiveType(getPrimitiveTypeInfo(DATE_TYPE_NAME)); public static final HiveType HIVE_BINARY = new HiveType(getPrimitiveTypeInfo(BINARY_TYPE_NAME)); @@ -197,10 +199,10 @@ private static boolean isSupported(PrimitiveTypeInfo typeInfo) CHAR, DATE, TIMESTAMP, + TIMESTAMPLOCALTZ, BINARY, DECIMAL -> true; - case TIMESTAMPLOCALTZ, - INTERVAL_YEAR_MONTH, + case INTERVAL_YEAR_MONTH, INTERVAL_DAY_TIME, VOID, UNKNOWN -> false; diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastoreUtil.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastoreUtil.java index 5b02a1adb863..fb499e213b50 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastoreUtil.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastoreUtil.java @@ -64,6 +64,7 @@ import io.trino.spi.type.MapType; import io.trino.spi.type.RowType; import io.trino.spi.type.TimestampType; +import io.trino.spi.type.TimestampWithTimeZoneType; import io.trino.spi.type.Type; import io.trino.spi.type.VarcharType; import jakarta.annotation.Nullable; @@ -942,7 +943,7 @@ public static Set getSupportedColumnStatistics(Type typ if (isNumericType(type) || type.equals(DATE)) { return ImmutableSet.of(MIN_VALUE, MAX_VALUE, NUMBER_OF_DISTINCT_VALUES, NUMBER_OF_NON_NULL_VALUES); } - if (type instanceof TimestampType) { + if (type instanceof TimestampType || type instanceof TimestampWithTimeZoneType) { // TODO (https://github.com/trinodb/trino/issues/5859) Add support for timestamp MIN_VALUE, MAX_VALUE return ImmutableSet.of(NUMBER_OF_DISTINCT_VALUES, NUMBER_OF_NON_NULL_VALUES); } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveTypeTranslator.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveTypeTranslator.java index 236cf0260202..574b26a76be3 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveTypeTranslator.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveTypeTranslator.java @@ -75,6 +75,7 @@ import static io.trino.spi.type.RealType.REAL; import static io.trino.spi.type.SmallintType.SMALLINT; import static io.trino.spi.type.TimestampType.createTimestampType; +import static io.trino.spi.type.TimestampWithTimeZoneType.createTimestampWithTimeZoneType; import static io.trino.spi.type.TinyintType.TINYINT; import static io.trino.spi.type.TypeSignature.arrayType; import static io.trino.spi.type.TypeSignature.mapType; @@ -252,6 +253,8 @@ private static Type fromPrimitiveType(PrimitiveTypeInfo typeInfo, HiveTimestampP return DATE; case TIMESTAMP: return createTimestampType(timestampPrecision.getPrecision()); + case TIMESTAMPLOCALTZ: + return createTimestampWithTimeZoneType(timestampPrecision.getPrecision()); case BINARY: return VARBINARY; case DECIMAL: diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseHiveConnectorTest.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseHiveConnectorTest.java index 860d83b6b0fc..1ea71bca8e7b 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseHiveConnectorTest.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseHiveConnectorTest.java @@ -29,6 +29,12 @@ import io.trino.metadata.QualifiedObjectName; import io.trino.metadata.TableHandle; import io.trino.metadata.TableMetadata; +import io.trino.plugin.hive.metastore.Column; +import io.trino.plugin.hive.metastore.PrincipalPrivileges; +import io.trino.plugin.hive.metastore.Storage; +import io.trino.plugin.hive.metastore.StorageFormat; +import io.trino.plugin.hive.metastore.Table; +import io.trino.plugin.hive.metastore.file.FileHiveMetastore; import io.trino.spi.connector.CatalogSchemaTableName; import io.trino.spi.connector.ColumnHandle; import io.trino.spi.connector.ColumnMetadata; @@ -83,6 +89,7 @@ import java.util.Map; import java.util.Optional; import java.util.OptionalInt; +import java.util.OptionalLong; import java.util.Set; import java.util.StringJoiner; import java.util.function.BiConsumer; @@ -136,6 +143,7 @@ import static io.trino.plugin.hive.HiveTableProperties.PARTITIONED_BY_PROPERTY; import static io.trino.plugin.hive.HiveTableProperties.STORAGE_FORMAT_PROPERTY; import static io.trino.plugin.hive.HiveType.toHiveType; +import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; import static io.trino.plugin.hive.util.HiveUtil.columnExtraInfo; import static io.trino.spi.security.Identity.ofUser; import static io.trino.spi.security.SelectedRole.Type.ROLE; @@ -8362,6 +8370,45 @@ public void testSelectFromPrestoViewReferencingHiveTableWithTimestamps() assertThat(query(nanosSessions, "SELECT ts FROM hive_timestamp_nanos.tpch." + prestoViewNameNanos)).matches("VALUES TIMESTAMP '1990-01-02 12:13:14.123000000'"); } + @Test + public void testTimestampWithTimeZone() + { + assertUpdate("CREATE TABLE test_timestamptz_base (t timestamp) WITH (format = 'PARQUET')"); + assertUpdate("INSERT INTO test_timestamptz_base (t) VALUES" + + "(timestamp '2022-07-26 12:13')", 1); + + // Writing TIMESTAMP WITH LOCAL TIME ZONE is not supported, so we first create Parquet object by writing unzoned + // timestamp (which is converted to UTC using default timezone) and then creating another table that reads from the same file. + String tableLocation = getTableLocation("test_timestamptz_base"); + + // TIMESTAMP WITH LOCAL TIME ZONE is not mapped to any Trino type, so we need to create the metastore entry manually + FileHiveMetastore metastore = createTestingFileHiveMetastore(new File(getDistributedQueryRunner().getCoordinator().getBaseDataDir().toFile(), "hive_data")); + metastore.createTable( + new Table( + "tpch", + "test_timestamptz", + Optional.of("hive"), + "EXTERNAL_TABLE", + new Storage( + StorageFormat.fromHiveStorageFormat(HiveStorageFormat.PARQUET), + Optional.of(tableLocation), + Optional.empty(), + false, + Collections.emptyMap()), + List.of(new Column("t", HiveType.HIVE_TIMESTAMPLOCALTZ, Optional.empty())), + List.of(), + Collections.emptyMap(), + Optional.empty(), + Optional.empty(), + OptionalLong.empty()), + PrincipalPrivileges.fromHivePrivilegeInfos(Collections.emptySet())); + + assertThat(query("SELECT * FROM test_timestamptz")) + .matches("VALUES TIMESTAMP '2022-07-26 17:13:00.000 UTC'"); + + assertUpdate("DROP TABLE test_timestamptz"); + } + @Test(dataProvider = "legalUseColumnNamesProvider") public void testUseColumnNames(HiveStorageFormat format, boolean formatUseColumnNames) {