/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.accumulo.mr;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.SortedMap;
import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.util.PeekingIterator;
import org.apache.hadoop.hive.accumulo.AccumuloHiveRow;
import org.apache.hadoop.hive.accumulo.predicate.PrimitiveComparisonFilter;
import org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.RecordReader;
import com.google.common.collect.Lists;
/**
* Translate the {@link Key} {@link Value} pairs from {@link AccumuloInputFormat} to a
* {@link Writable} for consumption by the {@link AccumuloSerDe}.
*/
public class HiveAccumuloRecordReader implements RecordReader<Text,AccumuloHiveRow> {
private RecordReader<Text,PeekingIterator<Entry<Key,Value>>> recordReader;
private int iteratorCount;
public HiveAccumuloRecordReader(
RecordReader<Text,PeekingIterator<Entry<Key,Value>>> recordReader, int iteratorCount) {
this.recordReader = recordReader;
this.iteratorCount = iteratorCount;
}
@Override
public void close() throws IOException {
recordReader.close();
}
@Override
public Text createKey() {
return new Text();
}
@Override
public AccumuloHiveRow createValue() {
return new AccumuloHiveRow();
}
@Override
public long getPos() throws IOException {
return 0;
}
@Override
public float getProgress() throws IOException {
return recordReader.getProgress();
}
@Override
public boolean next(Text rowKey, AccumuloHiveRow row) throws IOException {
Text key = recordReader.createKey();
PeekingIterator<Map.Entry<Key,Value>> iter = recordReader.createValue();
if (recordReader.next(key, iter)) {
row.clear();
row.setRowId(key.toString());
List<Key> keys = Lists.newArrayList();
List<Value> values = Lists.newArrayList();
while (iter.hasNext()) { // collect key/values for this row.
Map.Entry<Key,Value> kv = iter.next();
keys.add(kv.getKey());
values.add(kv.getValue());
}
if (iteratorCount == 0) { // no encoded values, we can push directly to row.
pushToValue(keys, values, row);
} else {
for (int i = 0; i < iteratorCount; i++) { // each iterator creates a level of encoding.
SortedMap<Key,Value> decoded = PrimitiveComparisonFilter.decodeRow(keys.get(0),
values.get(0));
keys = Lists.newArrayList(decoded.keySet());
values = Lists.newArrayList(decoded.values());
}
pushToValue(keys, values, row); // after decoding we can push to value.
}
return true;
} else {
return false;
}
}
// flatten key/value pairs into row object for use in Serde.
private void pushToValue(List<Key> keys, List<Value> values, AccumuloHiveRow row)
throws IOException {
Iterator<Key> kIter = keys.iterator();
Iterator<Value> vIter = values.iterator();
while (kIter.hasNext()) {
Key k = kIter.next();
Value v = vIter.next();
row.add(k.getColumnFamily().toString(), k.getColumnQualifier().toString(), v.get());
}
}
}