/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hadoop.hive.metastore.hbase.stats.merge;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator;
import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData;
import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData._Fields;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
public class ColumnStatsMergerFactory {
private ColumnStatsMergerFactory() {
}
// we depend on the toString() method for javolution.util.FastCollection.
private static int countNumBitVectors(String s) {
if (s != null) {
return StringUtils.countMatches(s, "{");
} else {
return 0;
}
}
public static ColumnStatsMerger getColumnStatsMerger(ColumnStatisticsObj statsObjNew,
ColumnStatisticsObj statsObjOld) {
ColumnStatsMerger agg;
_Fields typeNew = statsObjNew.getStatsData().getSetField();
_Fields typeOld = statsObjOld.getStatsData().getSetField();
// make sure that they have the same type
typeNew = typeNew == typeOld ? typeNew : null;
int numBitVectors = 0;
switch (typeNew) {
case BOOLEAN_STATS:
agg = new BooleanColumnStatsMerger();
break;
case LONG_STATS: {
agg = new LongColumnStatsMerger();
int nbvNew = countNumBitVectors(statsObjNew.getStatsData().getLongStats().getBitVectors());
int nbvOld = countNumBitVectors(statsObjOld.getStatsData().getLongStats().getBitVectors());
numBitVectors = nbvNew == nbvOld ? nbvNew : 0;
break;
}
case DOUBLE_STATS: {
agg = new DoubleColumnStatsMerger();
int nbvNew = countNumBitVectors(statsObjNew.getStatsData().getDoubleStats().getBitVectors());
int nbvOld = countNumBitVectors(statsObjOld.getStatsData().getDoubleStats().getBitVectors());
numBitVectors = nbvNew == nbvOld ? nbvNew : 0;
break;
}
case STRING_STATS: {
agg = new StringColumnStatsMerger();
int nbvNew = countNumBitVectors(statsObjNew.getStatsData().getStringStats().getBitVectors());
int nbvOld = countNumBitVectors(statsObjOld.getStatsData().getStringStats().getBitVectors());
numBitVectors = nbvNew == nbvOld ? nbvNew : 0;
break;
}
case BINARY_STATS:
agg = new BinaryColumnStatsMerger();
break;
case DECIMAL_STATS: {
agg = new DecimalColumnStatsMerger();
int nbvNew = countNumBitVectors(statsObjNew.getStatsData().getDecimalStats().getBitVectors());
int nbvOld = countNumBitVectors(statsObjOld.getStatsData().getDecimalStats().getBitVectors());
numBitVectors = nbvNew == nbvOld ? nbvNew : 0;
break;
}
default:
throw new RuntimeException("Woh, bad. Unknown stats type " + typeNew.toString());
}
if (numBitVectors > 0) {
agg.ndvEstimator = new NumDistinctValueEstimator(numBitVectors);
}
return agg;
}
public static ColumnStatisticsObj newColumnStaticsObj(String colName, String colType, _Fields type) {
ColumnStatisticsObj cso = new ColumnStatisticsObj();
ColumnStatisticsData csd = new ColumnStatisticsData();
cso.setColName(colName);
cso.setColType(colType);
switch (type) {
case BOOLEAN_STATS:
csd.setBooleanStats(new BooleanColumnStatsData());
break;
case LONG_STATS:
csd.setLongStats(new LongColumnStatsData());
break;
case DOUBLE_STATS:
csd.setDoubleStats(new DoubleColumnStatsData());
break;
case STRING_STATS:
csd.setStringStats(new StringColumnStatsData());
break;
case BINARY_STATS:
csd.setBinaryStats(new BinaryColumnStatsData());
break;
case DECIMAL_STATS:
csd.setDecimalStats(new DecimalColumnStatsData());
break;
default:
throw new RuntimeException("Woh, bad. Unknown stats type!");
}
cso.setStatsData(csd);
return cso;
}
}