package com.klarna.hiverunner.data; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkNotNull; import static com.google.common.base.Preconditions.checkState; import java.io.File; import java.util.ArrayList; import java.util.List; import java.util.Map; import org.apache.commons.beanutils.ConversionException; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hive.hcatalog.api.HCatTable; import org.apache.hive.hcatalog.common.HCatException; import org.apache.hive.hcatalog.data.DefaultHCatRecord; import org.apache.hive.hcatalog.data.HCatRecord; import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; import org.apache.hive.hcatalog.data.schema.HCatSchema; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMultimap; import com.google.common.collect.ImmutableMultimap.Builder; import com.google.common.collect.Multimap; class TableDataBuilder { private final Builder<Map<String, String>, HCatRecord> rowsBuilder = ImmutableMultimap.builder(); private final HCatSchema schema; private final List<HCatFieldSchema> partitionColumns; private HCatRecord row; private List<String> names; TableDataBuilder(HCatTable table) { schema = new HCatSchema(ImmutableList .<HCatFieldSchema> builder() .addAll(table.getCols()) .addAll(table.getPartCols()) .build()); partitionColumns = table.getPartCols(); withAllColumns(); } TableDataBuilder withColumns(String... names) { checkArgument(checkNotNull(names).length > 0, "Column names must be provided."); this.names = new ArrayList<>(names.length); for (String name : names) { checkColumn(name); this.names.add(name); } return this; } TableDataBuilder withAllColumns() { names = schema.getFieldNames(); return this; } TableDataBuilder newRow() { flushRow(); row = new DefaultHCatRecord(schema.size()); return this; } TableDataBuilder addRow(Object... values) { return newRow().setRow(values); } TableDataBuilder setRow(Object... values) { checkArgument(values.length == names.size(), "Expected %d values, got %d", names.size(), values.length); for (int i = 0; i < values.length; i++) { set(names.get(i), values[i]); } return this; } TableDataBuilder addRowsFromTsv(File file) { return addRowsFrom(file, new TsvFileParser()); } TableDataBuilder addRowsFromDelimited(File file, String delimiter, Object nullValue) { return addRowsFrom(file, new TsvFileParser().withDelimiter(delimiter).withNullValue(nullValue)); } TableDataBuilder addRowsFrom(File file, FileParser fileParser) { if (fileParser.hasColumnNames()) { checkArgument(names.equals(schema.getFieldNames()), "Manual column spec and header column spec are mutually exclusive"); List<String> columns = fileParser.getColumnNames(file); withColumns(columns.toArray(new String[columns.size()])); } return addRows(fileParser.parse(file, schema, names)); } private TableDataBuilder addRows(List<Object[]> rows) { for (Object[] row : rows) { addRow(row); } return this; } TableDataBuilder copyRow() { checkState(row != null, "No previous row to copy."); HCatRecord copy = new DefaultHCatRecord(new ArrayList<>(row.getAll())); flushRow(); row = copy; return this; } TableDataBuilder set(String name, Object value) { checkColumn(name); PrimitiveTypeInfo typeInfo; try { typeInfo = schema.get(name).getTypeInfo(); } catch (HCatException e) { throw new IllegalArgumentException("Error getting type info for " + name, e); } Object converted; try { converted = Converters.convert(value, typeInfo); } catch (ConversionException e) { throw new IllegalArgumentException("Invalid value for " + name + ". Got '" + value + "' (" + value.getClass().getSimpleName() + "). Expected " + typeInfo.getTypeName() + ".", e); } try { row.set(name, schema, converted); } catch (HCatException e) { throw new RuntimeException("Error setting value for " + name, e); } return this; } private Object get(String name) { checkColumn(name); try { return row.get(name, schema); } catch (HCatException e) { throw new RuntimeException("Error getting value for " + name, e); } } private void flushRow() { if (row != null) { rowsBuilder.put(createPartitionSpec(), row); } } private Map<String, String> createPartitionSpec() { ImmutableMap.Builder<String, String> builder = ImmutableMap.builder(); for (HCatFieldSchema partitionColumn : partitionColumns) { String name = partitionColumn.getName(); Object value = get(name); checkState(value != null, "Value for partition column %s must not be null.", name); builder.put(name, value.toString()); } return builder.build(); } Multimap<Map<String, String>, HCatRecord> build() { flushRow(); return rowsBuilder.build(); } private void checkColumn(String name) { checkArgument(schema.getFieldNames().contains(name), "Column %s does not exist", name); } }