Skip to content

Commit

Permalink
Fixed StatsTask fails due to ClassCastException
Browse files Browse the repository at this point in the history
  • Loading branch information
Asmoday committed Nov 7, 2022
1 parent e1d5910 commit e741867
Show file tree
Hide file tree
Showing 16 changed files with 186 additions and 47 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.hive.metastore.columnstats;

import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector;
import org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector;
import org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector;
import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector;
import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector;

/**
* Utils class for columnstats package.
*/
public final class ColumnsStatsUtils {

private ColumnsStatsUtils(){}

/**
* Convertes to DateColumnStatsDataInspector if it's a DateColumnStatsData.
* @param cso ColumnStatisticsObj
* @return DateColumnStatsDataInspector
*/
public static DateColumnStatsDataInspector dateInspectorFromStats(ColumnStatisticsObj cso) {
DateColumnStatsDataInspector dateColumnStats;
if (cso.getStatsData().getDateStats() instanceof DateColumnStatsDataInspector) {
dateColumnStats =
(DateColumnStatsDataInspector)(cso.getStatsData().getDateStats());
} else {
dateColumnStats = new DateColumnStatsDataInspector(cso.getStatsData().getDateStats());
}
return dateColumnStats;
}

/**
* Convertes to StringColumnStatsDataInspector
* if it's a StringColumnStatsData.
* @param cso ColumnStatisticsObj
* @return StringColumnStatsDataInspector
*/
public static StringColumnStatsDataInspector stringInspectorFromStats(ColumnStatisticsObj cso) {
StringColumnStatsDataInspector columnStats;
if (cso.getStatsData().getStringStats() instanceof StringColumnStatsDataInspector) {
columnStats =
(StringColumnStatsDataInspector)(cso.getStatsData().getStringStats());
} else {
columnStats = new StringColumnStatsDataInspector(cso.getStatsData().getStringStats());
}
return columnStats;
}

/**
* Convertes to LongColumnStatsDataInspector if it's a LongColumnStatsData.
* @param cso ColumnStatisticsObj
* @return LongColumnStatsDataInspector
*/
public static LongColumnStatsDataInspector longInspectorFromStats(ColumnStatisticsObj cso) {
LongColumnStatsDataInspector columnStats;
if (cso.getStatsData().getLongStats() instanceof LongColumnStatsDataInspector) {
columnStats =
(LongColumnStatsDataInspector)(cso.getStatsData().getLongStats());
} else {
columnStats = new LongColumnStatsDataInspector(cso.getStatsData().getLongStats());
}
return columnStats;
}

/**
* Convertes to DoubleColumnStatsDataInspector
* if it's a DoubleColumnStatsData.
* @param cso ColumnStatisticsObj
* @return DoubleColumnStatsDataInspector
*/
public static DoubleColumnStatsDataInspector doubleInspectorFromStats(ColumnStatisticsObj cso) {
DoubleColumnStatsDataInspector columnStats;
if (cso.getStatsData().getDoubleStats() instanceof DoubleColumnStatsDataInspector) {
columnStats =
(DoubleColumnStatsDataInspector)(cso.getStatsData().getDoubleStats());
} else {
columnStats = new DoubleColumnStatsDataInspector(cso.getStatsData().getDoubleStats());
}
return columnStats;
}

/**
* Convertes to DecimalColumnStatsDataInspector
* if it's a DecimalColumnStatsData.
* @param cso ColumnStatisticsObj
* @return DecimalColumnStatsDataInspector
*/
public static DecimalColumnStatsDataInspector decimalInspectorFromStats(ColumnStatisticsObj cso) {
DecimalColumnStatsDataInspector columnStats;
if (cso.getStatsData().getDecimalStats() instanceof DecimalColumnStatsDataInspector) {
columnStats =
(DecimalColumnStatsDataInspector)(cso.getStatsData().getDecimalStats());
} else {
columnStats = new DecimalColumnStatsDataInspector(cso.getStatsData().getDecimalStats());
}
return columnStats;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static org.apache.hadoop.hive.metastore.columnstats.ColumnsStatsUtils.dateInspectorFromStats;

public class DateColumnStatsAggregator extends ColumnStatsAggregator implements
IExtrapolatePartStatus {

Expand All @@ -62,8 +64,8 @@ public ColumnStatisticsObj aggregate(List<ColStatsObjWithSourceInfo> colStatsWit
cso.getStatsData().getSetField());
LOG.trace("doAllPartitionContainStats for column: {} is: {}", colName, doAllPartitionContainStats);
}
DateColumnStatsDataInspector dateColumnStats =
(DateColumnStatsDataInspector) cso.getStatsData().getDateStats();
DateColumnStatsDataInspector dateColumnStats = dateInspectorFromStats(cso);

if (dateColumnStats.getNdvEstimator() == null) {
ndvEstimator = null;
break;
Expand Down Expand Up @@ -95,9 +97,7 @@ public ColumnStatisticsObj aggregate(List<ColStatsObjWithSourceInfo> colStatsWit
double densityAvgSum = 0.0;
for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) {
ColumnStatisticsObj cso = csp.getColStatsObj();
DateColumnStatsDataInspector newData =
(DateColumnStatsDataInspector) cso.getStatsData().getDateStats();
lowerBound = Math.max(lowerBound, newData.getNumDVs());
DateColumnStatsDataInspector newData = dateInspectorFromStats(cso);
higherBound += newData.getNumDVs();
densityAvgSum += (diff(newData.getHighValue(), newData.getLowValue()))
/ newData.getNumDVs();
Expand Down Expand Up @@ -174,8 +174,7 @@ public ColumnStatisticsObj aggregate(List<ColStatsObjWithSourceInfo> colStatsWit
for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) {
ColumnStatisticsObj cso = csp.getColStatsObj();
String partName = csp.getPartName();
DateColumnStatsDataInspector newData =
(DateColumnStatsDataInspector) cso.getStatsData().getDateStats();
DateColumnStatsDataInspector newData = dateInspectorFromStats(cso);
// newData.isSetBitVectors() should be true for sure because we
// already checked it before.
if (indexMap.get(partName) != curIndex) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static org.apache.hadoop.hive.metastore.columnstats.ColumnsStatsUtils.decimalInspectorFromStats;

public class DecimalColumnStatsAggregator extends ColumnStatsAggregator implements
IExtrapolatePartStatus {

Expand All @@ -65,8 +67,8 @@ public ColumnStatisticsObj aggregate(List<ColStatsObjWithSourceInfo> colStatsWit
LOG.trace("doAllPartitionContainStats for column: {} is: {}", colName,
doAllPartitionContainStats);
}
DecimalColumnStatsDataInspector decimalColumnStatsData =
(DecimalColumnStatsDataInspector) cso.getStatsData().getDecimalStats();
DecimalColumnStatsDataInspector decimalColumnStatsData = decimalInspectorFromStats(cso);

if (decimalColumnStatsData.getNdvEstimator() == null) {
ndvEstimator = null;
break;
Expand Down Expand Up @@ -98,8 +100,7 @@ public ColumnStatisticsObj aggregate(List<ColStatsObjWithSourceInfo> colStatsWit
double densityAvgSum = 0.0;
for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) {
ColumnStatisticsObj cso = csp.getColStatsObj();
DecimalColumnStatsDataInspector newData =
(DecimalColumnStatsDataInspector) cso.getStatsData().getDecimalStats();
DecimalColumnStatsDataInspector newData = decimalInspectorFromStats(cso);
lowerBound = Math.max(lowerBound, newData.getNumDVs());
higherBound += newData.getNumDVs();
densityAvgSum += (MetaStoreUtils.decimalToDouble(newData.getHighValue()) - MetaStoreUtils
Expand Down Expand Up @@ -187,8 +188,7 @@ public ColumnStatisticsObj aggregate(List<ColStatsObjWithSourceInfo> colStatsWit
for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) {
ColumnStatisticsObj cso = csp.getColStatsObj();
String partName = csp.getPartName();
DecimalColumnStatsDataInspector newData =
(DecimalColumnStatsDataInspector) cso.getStatsData().getDecimalStats();
DecimalColumnStatsDataInspector newData = decimalInspectorFromStats(cso);
// newData.isSetBitVectors() should be true for sure because we
// already checked it before.
if (indexMap.get(partName) != curIndex) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static org.apache.hadoop.hive.metastore.columnstats.ColumnsStatsUtils.doubleInspectorFromStats;

public class DoubleColumnStatsAggregator extends ColumnStatsAggregator implements
IExtrapolatePartStatus {

Expand All @@ -63,7 +65,7 @@ public ColumnStatisticsObj aggregate(List<ColStatsObjWithSourceInfo> colStatsWit
doAllPartitionContainStats);
}
DoubleColumnStatsDataInspector doubleColumnStatsData =
(DoubleColumnStatsDataInspector) cso.getStatsData().getDoubleStats();
doubleInspectorFromStats(cso);
if (doubleColumnStatsData.getNdvEstimator() == null) {
ndvEstimator = null;
break;
Expand Down Expand Up @@ -95,8 +97,7 @@ public ColumnStatisticsObj aggregate(List<ColStatsObjWithSourceInfo> colStatsWit
double densityAvgSum = 0.0;
for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) {
ColumnStatisticsObj cso = csp.getColStatsObj();
DoubleColumnStatsDataInspector newData =
(DoubleColumnStatsDataInspector) cso.getStatsData().getDoubleStats();
DoubleColumnStatsDataInspector newData = doubleInspectorFromStats(cso);
lowerBound = Math.max(lowerBound, newData.getNumDVs());
higherBound += newData.getNumDVs();
densityAvgSum += (newData.getHighValue() - newData.getLowValue()) / newData.getNumDVs();
Expand Down Expand Up @@ -173,7 +174,7 @@ public ColumnStatisticsObj aggregate(List<ColStatsObjWithSourceInfo> colStatsWit
ColumnStatisticsObj cso = csp.getColStatsObj();
String partName = csp.getPartName();
DoubleColumnStatsDataInspector newData =
(DoubleColumnStatsDataInspector) cso.getStatsData().getDoubleStats();
doubleInspectorFromStats(cso);
// newData.isSetBitVectors() should be true for sure because we
// already checked it before.
if (indexMap.get(partName) != curIndex) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static org.apache.hadoop.hive.metastore.columnstats.ColumnsStatsUtils.longInspectorFromStats;

public class LongColumnStatsAggregator extends ColumnStatsAggregator implements
IExtrapolatePartStatus {

Expand All @@ -63,8 +65,7 @@ public ColumnStatisticsObj aggregate(List<ColStatsObjWithSourceInfo> colStatsWit
LOG.trace("doAllPartitionContainStats for column: {} is: {}", colName,
doAllPartitionContainStats);
}
LongColumnStatsDataInspector longColumnStatsData =
(LongColumnStatsDataInspector) cso.getStatsData().getLongStats();
LongColumnStatsDataInspector longColumnStatsData = longInspectorFromStats(cso);
if (longColumnStatsData.getNdvEstimator() == null) {
ndvEstimator = null;
break;
Expand Down Expand Up @@ -96,8 +97,7 @@ public ColumnStatisticsObj aggregate(List<ColStatsObjWithSourceInfo> colStatsWit
double densityAvgSum = 0.0;
for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) {
ColumnStatisticsObj cso = csp.getColStatsObj();
LongColumnStatsDataInspector newData =
(LongColumnStatsDataInspector) cso.getStatsData().getLongStats();
LongColumnStatsDataInspector newData = longInspectorFromStats(cso);
lowerBound = Math.max(lowerBound, newData.getNumDVs());
higherBound += newData.getNumDVs();
densityAvgSum += (newData.getHighValue() - newData.getLowValue()) / newData.getNumDVs();
Expand Down Expand Up @@ -174,8 +174,7 @@ public ColumnStatisticsObj aggregate(List<ColStatsObjWithSourceInfo> colStatsWit
for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) {
ColumnStatisticsObj cso = csp.getColStatsObj();
String partName = csp.getPartName();
LongColumnStatsDataInspector newData =
(LongColumnStatsDataInspector) cso.getStatsData().getLongStats();
LongColumnStatsDataInspector newData = longInspectorFromStats(cso);
// newData.isSetBitVectors() should be true for sure because we
// already checked it before.
if (indexMap.get(partName) != curIndex) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static org.apache.hadoop.hive.metastore.columnstats.ColumnsStatsUtils.stringInspectorFromStats;

public class StringColumnStatsAggregator extends ColumnStatsAggregator implements
IExtrapolatePartStatus {

Expand All @@ -63,8 +65,7 @@ public ColumnStatisticsObj aggregate(List<ColStatsObjWithSourceInfo> colStatsWit
LOG.trace("doAllPartitionContainStats for column: {} is: {}", colName,
doAllPartitionContainStats);
}
StringColumnStatsDataInspector stringColumnStatsData =
(StringColumnStatsDataInspector) cso.getStatsData().getStringStats();
StringColumnStatsDataInspector stringColumnStatsData = stringInspectorFromStats(cso);
if (stringColumnStatsData.getNdvEstimator() == null) {
ndvEstimator = null;
break;
Expand Down Expand Up @@ -93,8 +94,7 @@ public ColumnStatisticsObj aggregate(List<ColStatsObjWithSourceInfo> colStatsWit
StringColumnStatsDataInspector aggregateData = null;
for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) {
ColumnStatisticsObj cso = csp.getColStatsObj();
StringColumnStatsDataInspector newData =
(StringColumnStatsDataInspector) cso.getStatsData().getStringStats();
StringColumnStatsDataInspector newData = stringInspectorFromStats(cso);
if (ndvEstimator != null) {
ndvEstimator.mergeEstimators(newData.getNdvEstimator());
}
Expand Down Expand Up @@ -149,7 +149,7 @@ public ColumnStatisticsObj aggregate(List<ColStatsObjWithSourceInfo> colStatsWit
ColumnStatisticsObj cso = csp.getColStatsObj();
String partName = csp.getPartName();
StringColumnStatsDataInspector newData =
(StringColumnStatsDataInspector) cso.getStatsData().getStringStats();
stringInspectorFromStats(cso);
// newData.isSetBitVectors() should be true for sure because we
// already checked it before.
if (indexMap.get(partName) != curIndex) {
Expand Down Expand Up @@ -211,7 +211,8 @@ public void extrapolate(ColumnStatisticsData extrapolateData, int numParts,
int numPartsWithStats, Map<String, Double> adjustedIndexMap,
Map<String, ColumnStatisticsData> adjustedStatsMap, double densityAvg) {
int rightBorderInd = numParts;
StringColumnStatsDataInspector extrapolateStringData = new StringColumnStatsDataInspector();
StringColumnStatsDataInspector extrapolateStringData =
new StringColumnStatsDataInspector();
Map<String, StringColumnStatsData> extractedAdjustedStatsMap = new HashMap<>();
for (Map.Entry<String, ColumnStatisticsData> entry : adjustedStatsMap.entrySet()) {
extractedAdjustedStatsMap.put(entry.getKey(), entry.getValue().getStringStats());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ public DateColumnStatsDataInspector(DateColumnStatsDataInspector other) {
}
}

public DateColumnStatsDataInspector(DateColumnStatsData other) {
super(other);
}

@Override
public DateColumnStatsDataInspector deepCopy() {
return new DateColumnStatsDataInspector(this);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ public DecimalColumnStatsDataInspector(DecimalColumnStatsDataInspector other) {
}
}

public DecimalColumnStatsDataInspector(DecimalColumnStatsData other) {
super(other);
}

@Override
public DecimalColumnStatsDataInspector deepCopy() {
return new DecimalColumnStatsDataInspector(this);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ public DoubleColumnStatsDataInspector(DoubleColumnStatsDataInspector other) {
}
}

public DoubleColumnStatsDataInspector(DoubleColumnStatsData other) {
super(other);
}

@Override
public DoubleColumnStatsDataInspector deepCopy() {
return new DoubleColumnStatsDataInspector(this);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ public LongColumnStatsDataInspector(LongColumnStatsDataInspector other) {
}
}

public LongColumnStatsDataInspector(LongColumnStatsData other) {
super(other);
}

@Override
public LongColumnStatsDataInspector deepCopy() {
return new LongColumnStatsDataInspector(this);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ public StringColumnStatsDataInspector(StringColumnStatsDataInspector other) {
}
}

public StringColumnStatsDataInspector(StringColumnStatsData other) {
super(other);
}

@Override
public StringColumnStatsDataInspector deepCopy() {
return new StringColumnStatsDataInspector(this);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@
import org.apache.hadoop.hive.metastore.api.Date;
import org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector;

import static org.apache.hadoop.hive.metastore.columnstats.ColumnsStatsUtils.dateInspectorFromStats;

public class DateColumnStatsMerger extends ColumnStatsMerger {
@Override
public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
DateColumnStatsDataInspector aggregateData =
(DateColumnStatsDataInspector) aggregateColStats.getStatsData().getDateStats();
DateColumnStatsDataInspector newData =
(DateColumnStatsDataInspector) newColStats.getStatsData().getDateStats();
DateColumnStatsDataInspector aggregateData = dateInspectorFromStats(aggregateColStats);
DateColumnStatsDataInspector newData = dateInspectorFromStats(newColStats);
Date lowValue = aggregateData.getLowValue().compareTo(newData.getLowValue()) < 0 ? aggregateData
.getLowValue() : newData.getLowValue();
aggregateData.setLowValue(lowValue);
Expand Down
Loading

0 comments on commit e741867

Please sign in to comment.