Skip to content

Commit

Permalink
Allow union data to conform to smaller union
Browse files Browse the repository at this point in the history
HIVE/AVRO: It is possible for data that is written using a 3 element
union to be read with a 2 element union provided that either all
data types can be coerced (already possible) or the offending data
type(s) isn't present. This change delays all type errors to read time
to allow more type leniency.
  • Loading branch information
jklamer authored and hashhar committed Jan 8, 2024
1 parent 6f9f8cf commit 2e1de3b
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -208,11 +208,29 @@ private static BlockBuildingDecoder createBlockBuildingDecoderForAction(Resolver
yield new ReaderUnionCoercedIntoRowBlockBuildingDecoder((Resolver.ReaderUnion) action, typeManager);
}
}
case ERROR -> throw new AvroTypeException("Resolution action returned with error " + action);
case ERROR -> new TypeErrorThrower((Resolver.ErrorAction) action);
case SKIP -> throw new IllegalStateException("Skips filtered by row step");
};
}

private static class TypeErrorThrower
extends BlockBuildingDecoder
{
private final Resolver.ErrorAction action;

public TypeErrorThrower(Resolver.ErrorAction action)
{
this.action = requireNonNull(action, "action is null");
}

@Override
protected void decodeIntoBlock(Decoder decoder, BlockBuilder builder)
throws IOException
{
throw new IOException(new AvroTypeException("Resolution action returned with error " + action));
}
}

// Different plugins may have different Avro Schema to Type mappings
// that are currently transforming GenericDatumReader returned objects into their target type during the record reading process
// This block building decoder allows plugin writers to port that code directly and use within this reader
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import io.airlift.slice.Slice;
import io.trino.filesystem.TrinoInputFile;
import io.trino.spi.Page;
Expand Down Expand Up @@ -335,4 +336,38 @@ public void testCoercionOfUnionToStruct()
assertThat(totalRecords).isEqualTo(3);
}
}

@Test
public void testRead3UnionWith2UnionDataWith2Union()
throws IOException, AvroTypeException
{
Schema twoUnion = Schema.createUnion(Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.INT));
Schema threeUnion = Schema.createUnion(Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.INT), Schema.create(Schema.Type.STRING));

Schema twoUnionRecord = SchemaBuilder.builder()
.record("aRecord")
.fields()
.name("aField")
.type(twoUnion)
.noDefault()
.endRecord();

Schema threeUnionRecord = SchemaBuilder.builder()
.record("aRecord")
.fields()
.name("aField")
.type(threeUnion)
.noDefault()
.endRecord();

// write a file with the 3 union schema, using 2 union data
TrinoInputFile inputFile = createWrittenFileWithData(threeUnionRecord, ImmutableList.copyOf(Iterables.transform(new RandomData(twoUnionRecord, 1000), object -> (GenericRecord) object)));

//read the file with the 2 union schema and ensure that no error thrown
try (AvroFileReader avroFileReader = new AvroFileReader(inputFile, twoUnionRecord, NoOpAvroTypeManager.INSTANCE)) {
while (avroFileReader.hasNext()) {
assertThat(avroFileReader.next()).isNotNull();
}
}
}
}

0 comments on commit 2e1de3b

Please sign in to comment.