Skip to content

Commit

Permalink
Add date to varchar coercion for hive
Browse files Browse the repository at this point in the history
  • Loading branch information
findinpath committed Dec 4, 2023
1 parent e38ebbf commit 1979724
Show file tree
Hide file tree
Showing 9 changed files with 105 additions and 3 deletions.
2 changes: 2 additions & 0 deletions docs/src/main/sphinx/connector/hive.md
Original file line number Diff line number Diff line change
Expand Up @@ -656,6 +656,8 @@ type conversions.
* - `DECIMAL`
- `DOUBLE`, `REAL`, `VARCHAR`, `TINYINT`, `SMALLINT`, `INTEGER`, `BIGINT`, as
well as narrowing and widening conversions for `DECIMAL`
* - `DATE`
- `VARCHAR`
* - `TIMESTAMP`
- `VARCHAR`
:::
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import io.trino.plugin.hive.HiveTimestampPrecision;
import io.trino.plugin.hive.HiveType;
import io.trino.plugin.hive.coercions.BooleanCoercer.BooleanToVarcharCoercer;
import io.trino.plugin.hive.coercions.DateCoercer.DateToVarcharCoercer;
import io.trino.plugin.hive.coercions.DateCoercer.VarcharToDateCoercer;
import io.trino.plugin.hive.coercions.TimestampCoercer.LongTimestampToDateCoercer;
import io.trino.plugin.hive.coercions.TimestampCoercer.LongTimestampToVarcharCoercer;
Expand Down Expand Up @@ -199,6 +200,9 @@ public static Type createTypeFromCoercer(TypeManager typeManager, HiveType fromH
}
return Optional.empty();
}
if (fromType instanceof DateType && toType instanceof VarcharType toVarcharType) {
return Optional.of(new DateToVarcharCoercer(toVarcharType));
}
if (fromType == DOUBLE && toType instanceof VarcharType toVarcharType) {
return Optional.of(new DoubleToVarcharCoercer(toVarcharType, coercionContext.treatNaNAsNull()));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,23 @@
*/
package io.trino.plugin.hive.coercions;

import io.airlift.slice.Slice;
import io.trino.spi.TrinoException;
import io.trino.spi.block.Block;
import io.trino.spi.block.BlockBuilder;
import io.trino.spi.type.DateType;
import io.trino.spi.type.VarcharType;

import java.time.DateTimeException;
import java.time.LocalDate;
import java.time.format.DateTimeParseException;

import static io.airlift.slice.SliceUtf8.countCodePoints;
import static io.airlift.slice.Slices.utf8Slice;
import static io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_TIMESTAMP_COERCION;
import static io.trino.spi.StandardErrorCode.INVALID_ARGUMENTS;
import static io.trino.spi.type.DateType.DATE;
import static java.lang.String.format;
import static java.time.format.DateTimeFormatter.ISO_LOCAL_DATE;

public final class DateCoercer
Expand Down Expand Up @@ -55,4 +62,32 @@ protected void applyCoercedValue(BlockBuilder blockBuilder, Block block, int pos
}
}
}

public static class DateToVarcharCoercer
extends TypeCoercer<DateType, VarcharType>
{
public DateToVarcharCoercer(VarcharType toType)
{
super(DATE, toType);
}

@Override
protected void applyCoercedValue(BlockBuilder blockBuilder, Block block, int position)
{
int value = fromType.getInt(block, position);
try {
if (value < START_OF_MODERN_ERA_DAYS) {
throw new TrinoException(HIVE_INVALID_TIMESTAMP_COERCION, "Coercion on historical dates is not supported");
}
Slice converted = utf8Slice(ISO_LOCAL_DATE.format(LocalDate.ofEpochDay(value)));
if (!toType.isUnbounded() && countCodePoints(converted) > toType.getBoundedLength()) {
throw new TrinoException(INVALID_ARGUMENTS, format("Varchar representation of '%s' exceeds %s bounds", converted.toStringUtf8(), toType));
}
toType.writeSlice(blockBuilder, converted);
}
catch (DateTimeException ignored) {
throw new IllegalArgumentException("Invalid date value: " + value + " is exceeding supported date range");
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import io.trino.orc.metadata.OrcType.OrcTypeKind;
import io.trino.plugin.hive.coercions.BooleanCoercer.BooleanToVarcharCoercer;
import io.trino.plugin.hive.coercions.DateCoercer.DateToVarcharCoercer;
import io.trino.plugin.hive.coercions.DateCoercer.VarcharToDateCoercer;
import io.trino.plugin.hive.coercions.DoubleToVarcharCoercer;
import io.trino.plugin.hive.coercions.IntegerNumberToDoubleCoercer;
Expand All @@ -34,6 +35,7 @@

import static io.trino.orc.metadata.OrcType.OrcTypeKind.BOOLEAN;
import static io.trino.orc.metadata.OrcType.OrcTypeKind.BYTE;
import static io.trino.orc.metadata.OrcType.OrcTypeKind.DATE;
import static io.trino.orc.metadata.OrcType.OrcTypeKind.DOUBLE;
import static io.trino.orc.metadata.OrcType.OrcTypeKind.INT;
import static io.trino.orc.metadata.OrcType.OrcTypeKind.LONG;
Expand Down Expand Up @@ -63,6 +65,9 @@ private OrcTypeTranslator() {}
}
return Optional.empty();
}
if (fromOrcType == DATE && toTrinoType instanceof VarcharType varcharType) {
return Optional.of(new DateToVarcharCoercer(varcharType));
}
if (isVarcharType(fromOrcType)) {
if (toTrinoType instanceof TimestampType timestampType) {
if (timestampType.isShort()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ private boolean canCoerce(HiveType fromHiveType, HiveType toHiveType, HiveTimest
fromHiveType.equals(HIVE_LONG) ||
fromHiveType.equals(HIVE_TIMESTAMP) ||
fromHiveType.equals(HIVE_DOUBLE) ||
fromHiveType.equals(HIVE_DATE) ||
fromType instanceof DecimalType;
}
if (toHiveType.equals(HIVE_DATE)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
package io.trino.plugin.hive.coercions;

import io.trino.plugin.hive.coercions.CoercionUtils.CoercionContext;
import io.trino.spi.TrinoException;
import io.trino.spi.block.Block;
import io.trino.spi.type.DateType;
import io.trino.spi.type.Type;
import org.junit.jupiter.api.Test;

Expand All @@ -27,7 +27,10 @@
import static io.trino.plugin.hive.coercions.CoercionUtils.createCoercer;
import static io.trino.spi.predicate.Utils.blockToNativeValue;
import static io.trino.spi.predicate.Utils.nativeValueToBlock;
import static io.trino.spi.type.DateType.DATE;
import static io.trino.spi.type.VarcharType.VARCHAR;
import static io.trino.spi.type.VarcharType.createUnboundedVarcharType;
import static io.trino.spi.type.VarcharType.createVarcharType;
import static io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
Expand Down Expand Up @@ -66,19 +69,49 @@ public void testThrowsExceptionWhenDateIsTooOld()
.hasMessageMatching(".*Coercion on historical dates is not supported.*");
}

@Test
public void testDateToVarchar()
{
assertDateToVarcharCoercion(createUnboundedVarcharType(), LocalDate.parse("2023-01-10"), "2023-01-10");
assertDateToVarcharCoercion(createUnboundedVarcharType(), LocalDate.parse("+10000-04-25"), "+10000-04-25");
}

@Test
public void testDateToLowerBoundedVarchar()
{
assertThatThrownBy(() -> assertDateToVarcharCoercion(createVarcharType(8), LocalDate.parse("2023-10-23"), "2023-10-23"))
.isInstanceOf(TrinoException.class)
.hasMessageContaining("Varchar representation of '2023-10-23' exceeds varchar(8) bounds");
}

@Test
public void testHistoricalDateToVarchar()
{
assertThatThrownBy(() -> assertDateToVarcharCoercion(createUnboundedVarcharType(), LocalDate.parse("1899-12-31"), null))
.hasMessageMatching(".*Coercion on historical dates is not supported.*");
}

private void assertVarcharToDateCoercion(Type fromType, String date)
{
assertVarcharToDateCoercion(fromType, date, fromDateToEpochDate(date));
}

private void assertVarcharToDateCoercion(Type fromType, String date, Long expected)
{
Block coercedValue = createCoercer(TESTING_TYPE_MANAGER, toHiveType(fromType), toHiveType(DateType.DATE), new CoercionContext(NANOSECONDS, false)).orElseThrow()
Block coercedValue = createCoercer(TESTING_TYPE_MANAGER, toHiveType(fromType), toHiveType(DATE), new CoercionContext(NANOSECONDS, false)).orElseThrow()
.apply(nativeValueToBlock(fromType, utf8Slice(date)));
assertThat(blockToNativeValue(DateType.DATE, coercedValue))
assertThat(blockToNativeValue(DATE, coercedValue))
.isEqualTo(expected);
}

private void assertDateToVarcharCoercion(Type toType, LocalDate date, String expected)
{
Block coercedValue = createCoercer(TESTING_TYPE_MANAGER, toHiveType(DATE), toHiveType(toType), new CoercionContext(NANOSECONDS, false)).orElseThrow()
.apply(nativeValueToBlock(DATE, date.toEpochDay()));
assertThat(blockToNativeValue(VARCHAR, coercedValue))
.isEqualTo(utf8Slice(expected));
}

private long fromDateToEpochDate(String dateString)
{
LocalDate date = LocalDate.parse(dateString);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,8 @@ protected void doTestHiveCoercion(HiveTableDefinition tableDefinition)
"string_to_double",
"varchar_to_double_infinity",
"varchar_to_special_double",
"date_to_string",
"date_to_bounded_varchar",
"char_to_bigger_char",
"char_to_smaller_char",
"timestamp_millis_to_date",
Expand Down Expand Up @@ -237,6 +239,8 @@ protected void insertTableRows(String tableName, String floatToDoubleType)
" '1234.01234', " +
" 'Infinity'," +
" 'NaN'," +
" DATE '2023-09-28', " +
" DATE '2000-04-13', " +
" 'abc', " +
" 'abc', " +
" TIMESTAMP '2022-12-31 23:59:59.999', " +
Expand Down Expand Up @@ -299,6 +303,8 @@ protected void insertTableRows(String tableName, String floatToDoubleType)
" '0', " +
" '-Infinity'," +
" 'Invalid Double'," +
" DATE '2123-09-27', " +
" DATE '1900-01-01', " +
" '\uD83D\uDCB0\uD83D\uDCB0\uD83D\uDCB0', " +
" '\uD83D\uDCB0\uD83D\uDCB0\uD83D\uDCB0', " +
" TIMESTAMP '1970-01-01 00:00:00.123', " +
Expand Down Expand Up @@ -519,6 +525,12 @@ else if (getHiveVersionMajor() == 3 && isFormat.test("orc")) {
.put("varchar_to_special_double", Arrays.asList(
coercedNaN == null ? null : Double.NaN,
null))
.put("date_to_string", ImmutableList.of(
"2023-09-28",
"2123-09-27"))
.put("date_to_bounded_varchar", ImmutableList.of(
"2000-04-13",
"1900-01-01"))
.put("char_to_bigger_char", ImmutableList.of(
"abc ",
"\uD83D\uDCB0\uD83D\uDCB0\uD83D\uDCB0 "))
Expand Down Expand Up @@ -977,6 +989,8 @@ private void assertProperAlteredTableSchema(String tableName)
row("string_to_double", "double"),
row("varchar_to_double_infinity", "double"),
row("varchar_to_special_double", "double"),
row("date_to_string", "varchar"),
row("date_to_bounded_varchar", "varchar(12)"),
row("char_to_bigger_char", "char(4)"),
row("char_to_smaller_char", "char(2)"),
row("timestamp_millis_to_date", "date"),
Expand Down Expand Up @@ -1055,6 +1069,8 @@ private void assertColumnTypes(
.put("string_to_double", DOUBLE)
.put("varchar_to_double_infinity", DOUBLE)
.put("varchar_to_special_double", DOUBLE)
.put("date_to_string", VARCHAR)
.put("date_to_bounded_varchar", VARCHAR)
.put("char_to_bigger_char", CHAR)
.put("char_to_smaller_char", CHAR)
.put("id", BIGINT)
Expand Down Expand Up @@ -1128,6 +1144,8 @@ private static void alterTableColumnTypes(String tableName)
onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN varchar_to_bigger_varchar varchar_to_bigger_varchar varchar(4)", tableName));
onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN varchar_to_smaller_varchar varchar_to_smaller_varchar varchar(2)", tableName));
onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN varchar_to_date varchar_to_date date", tableName));
onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN date_to_string date_to_string string", tableName));
onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN date_to_bounded_varchar date_to_bounded_varchar varchar(12)", tableName));
onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN varchar_to_distant_date varchar_to_distant_date date", tableName));
onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN varchar_to_double varchar_to_double double", tableName));
onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN string_to_double string_to_double double", tableName));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,8 @@ private static HiveTableDefinition.HiveTableDefinitionBuilder tableDefinitionBui
" string_to_double STRING," +
" varchar_to_double_infinity VARCHAR(40)," +
" varchar_to_special_double VARCHAR(40)," +
" date_to_string DATE," +
" date_to_bounded_varchar DATE," +
" char_to_bigger_char CHAR(3)," +
" char_to_smaller_char CHAR(3)," +
" timestamp_millis_to_date TIMESTAMP," +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ varchar_to_double VARCHAR(40),
string_to_double STRING,
varchar_to_double_infinity VARCHAR(40),
varchar_to_special_double VARCHAR(40),
date_to_string DATE,
date_to_bounded_varchar DATE,
char_to_bigger_char CHAR(3),
char_to_smaller_char CHAR(3),
timestamp_millis_to_date TIMESTAMP,
Expand Down

0 comments on commit 1979724

Please sign in to comment.