/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package org.apache.pig.piggybank.storage.avro;
import java.io.IOException;
import java.nio.ByteBuffer;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericFixed;
import org.apache.avro.io.Decoder;
import org.apache.avro.io.ResolvingDecoder;
import org.apache.avro.util.Utf8;
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.DefaultTuple;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
/**
* An avro GenericDatumReader which reads in avro data and
* converts them to pig data: tuples, bags, etc.
*
*/
public class PigAvroDatumReader extends GenericDatumReader<Object> {
/**
* Construct where the writer's and reader's schemas are the same.
*/
public PigAvroDatumReader(Schema schema) {
super(schema);
}
/**
* Construct given writer's and reader's schema.
*/
public PigAvroDatumReader(Schema writer, Schema reader) throws IOException {
super(writer, reader);
}
/**
* Called to read a record instance. Overridden to read a pig tuple.
*/
@Override
protected Object readRecord(Object old, Schema expected, ResolvingDecoder in) throws IOException {
// find out the order in which we will receive fields from the ResolvingDecoder
Field[] readOrderedFields = in.readFieldOrder();
/* create an empty tuple */
Tuple tuple = TupleFactory.getInstance().newTuple(readOrderedFields.length);
/* read fields and put in output order in tuple
* The ResolvingDecoder figures out the writer schema to reader schema mapping for us
*/
for (Field f : readOrderedFields) {
tuple.set(f.pos(), read(old, f.schema(), in));
}
return tuple;
}
/**
* Called to read a map instance. Overridden to read a pig map.
*/
protected Object readMap(Object old, Schema expected, ResolvingDecoder in) throws IOException {
Schema eValue = expected.getValueType();
long l = in.readMapStart();
Object map = newMap(old, (int) l);
if (l > 0) {
do {
for (int i = 0; i < l; i++) {
addToMap(map, readString(null, AvroStorageUtils.StringSchema, in),
read(null, eValue, in));
}
} while ((l = in.mapNext()) > 0);
}
return map;
}
/**
* Called to create an enum value. Overridden to create a pig string.
*/
@Override
protected Object createEnum(String symbol, Schema schema) {
return symbol;
}
/**
* Called by the default implementation of {@link #readArray} to retrieve a
* value from a reused instance.
*/
@Override
protected Object peekArray(Object array) {
return null;
}
/**
* Called by the default implementation of {@link #readArray} to add a
* value. Overridden to append to pig bag.
*/
@Override
protected void addToArray(Object array, long pos, Object e) {
if (e instanceof Tuple) {
((DataBag) array).add((Tuple) e);
} else {
Tuple t = new DefaultTuple();
t.append(e);
((DataBag) array).add(t);
}
}
/**
* Called to read a fixed value. Overridden to read a pig byte array.
*/
@Override
protected Object readFixed(Object old, Schema expected, Decoder in) throws IOException {
GenericFixed fixed = (GenericFixed) super.readFixed(old, expected, in);
DataByteArray byteArray = new DataByteArray(fixed.bytes());
return byteArray;
}
/**
* Called to create new record instances. Overridden to return a new tuple.
*/
@Override
protected Object newRecord(Object old, Schema schema) {
return TupleFactory.getInstance().newTuple();
}
/**
* Called to create new array instances. Overridden to return a new bag.
*/
@Override
protected Object newArray(Object old, int size, Schema schema) {
return BagFactory.getInstance().newDefaultBag();
}
/**
* Called to read strings. Overridden to return a pig string.
*/
@Override
protected Object readString(Object old, Schema expected, Decoder in) throws IOException {
return super.readString(old, expected, in).toString();
}
/**
* Called to read byte arrays. Overridden to return a pig byte array.
*/
@Override
protected Object readBytes(Object old, Decoder in) throws IOException {
ByteBuffer buf = (ByteBuffer) super.readBytes(old, in);
DataByteArray byteArray = new DataByteArray(buf.array());
return byteArray;
}
}