HiveAccumuloRecordReader.java example

Explorer
hive-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hive.accumulo.mr;

import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.SortedMap;

import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.util.PeekingIterator;
import org.apache.hadoop.hive.accumulo.AccumuloHiveRow;
import org.apache.hadoop.hive.accumulo.predicate.PrimitiveComparisonFilter;
import org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.RecordReader;

import com.google.common.collect.Lists;

/**
 * Translate the {@link Key} {@link Value} pairs from {@link AccumuloInputFormat} to a
 * {@link Writable} for consumption by the {@link AccumuloSerDe}.
 */
public class HiveAccumuloRecordReader implements RecordReader<Text,AccumuloHiveRow> {
  private RecordReader<Text,PeekingIterator<Entry<Key,Value>>> recordReader;
  private int iteratorCount;

  public HiveAccumuloRecordReader(
      RecordReader<Text,PeekingIterator<Entry<Key,Value>>> recordReader, int iteratorCount) {
    this.recordReader = recordReader;
    this.iteratorCount = iteratorCount;
  }

  @Override
  public void close() throws IOException {
    recordReader.close();
  }

  @Override
  public Text createKey() {
    return new Text();
  }

  @Override
  public AccumuloHiveRow createValue() {
    return new AccumuloHiveRow();
  }

  @Override
  public long getPos() throws IOException {
    return 0;
  }

  @Override
  public float getProgress() throws IOException {
    return recordReader.getProgress();
  }

  @Override
  public boolean next(Text rowKey, AccumuloHiveRow row) throws IOException {
    Text key = recordReader.createKey();
    PeekingIterator<Map.Entry<Key,Value>> iter = recordReader.createValue();
    if (recordReader.next(key, iter)) {
      row.clear();
      row.setRowId(key.toString());
      List<Key> keys = Lists.newArrayList();
      List<Value> values = Lists.newArrayList();
      while (iter.hasNext()) { // collect key/values for this row.
        Map.Entry<Key,Value> kv = iter.next();
        keys.add(kv.getKey());
        values.add(kv.getValue());

      }
      if (iteratorCount == 0) { // no encoded values, we can push directly to row.
        pushToValue(keys, values, row);
      } else {
        for (int i = 0; i < iteratorCount; i++) { // each iterator creates a level of encoding.
          SortedMap<Key,Value> decoded = PrimitiveComparisonFilter.decodeRow(keys.get(0),
              values.get(0));
          keys = Lists.newArrayList(decoded.keySet());
          values = Lists.newArrayList(decoded.values());
        }
        pushToValue(keys, values, row); // after decoding we can push to value.
      }

      return true;
    } else {
      return false;
    }
  }

  // flatten key/value pairs into row object for use in Serde.
  private void pushToValue(List<Key> keys, List<Value> values, AccumuloHiveRow row)
      throws IOException {
    Iterator<Key> kIter = keys.iterator();
    Iterator<Value> vIter = values.iterator();
    while (kIter.hasNext()) {
      Key k = kIter.next();
      Value v = vIter.next();
      row.add(k.getColumnFamily().toString(), k.getColumnQualifier().toString(), v.get());
    }
  }
}