/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.impl.util.avro;
import java.io.IOException;
import java.nio.ByteBuffer;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
import org.apache.avro.Schema.Type;
import org.apache.avro.generic.GenericData;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.joda.time.DateTime;
/**
* Utility classes for AvroStorage; contains static methods
* for converting between Avro and Pig objects.
*
*/
public class AvroStorageDataConversionUtilities {
/**
* Packs a Pig Tuple into an Avro record.
* @param t the Pig tuple to pack into the avro object
* @param s The avro schema for which to determine the type
* @return the avro record corresponding to the input tuple
* @throws IOException
*/
public static GenericData.Record packIntoAvro(final Tuple t, final Schema s)
throws IOException {
try {
GenericData.Record record = new GenericData.Record(s);
for (Field f : s.getFields()) {
Object o = t.get(f.pos());
Schema innerSchema = f.schema();
if (AvroStorageSchemaConversionUtilities.isNullableUnion(innerSchema)) {
if (o == null) {
record.put(f.pos(), null);
continue;
}
innerSchema = AvroStorageSchemaConversionUtilities
.removeSimpleUnion(innerSchema);
}
switch(innerSchema.getType()) {
case RECORD:
record.put(f.pos(), packIntoAvro((Tuple) o, innerSchema));
break;
case ARRAY:
record.put(f.pos(), packIntoAvro((DataBag) o, innerSchema));
break;
case BYTES:
record.put(f.pos(), ByteBuffer.wrap(((DataByteArray) o).get()));
break;
case FIXED:
record.put(f.pos(), new GenericData.Fixed(
innerSchema, ((DataByteArray) o).get()));
break;
default:
if (t.getType(f.pos()) == DataType.DATETIME) {
record.put(f.pos(), ((DateTime) o).getMillis() );
} else {
record.put(f.pos(), o);
}
}
}
return record;
} catch (Exception e) {
throw new IOException(
"exception in AvroStorageDataConversionUtilities.packIntoAvro", e);
}
}
/**
* Packs a Pig DataBag into an Avro array.
* @param db the Pig databad to pack into the avro array
* @param s The avro schema for which to determine the type
* @return the avro array corresponding to the input bag
* @throws IOException
*/
public static GenericData.Array<Object> packIntoAvro(
final DataBag db, final Schema s) throws IOException {
try {
GenericData.Array<Object> array
= new GenericData.Array<Object>(new Long(db.size()).intValue(), s);
for (Tuple t : db) {
if (s.getElementType() != null
&& s.getElementType().getType() == Type.RECORD) {
array.add(packIntoAvro(t, s.getElementType()));
} else if (t.size() == 1) {
array.add(t.get(0));
} else {
throw new IOException(
"AvroStorageDataConversionUtilities.packIntoAvro: Can't pack "
+ t + " into schema " + s);
}
}
return array;
} catch (Exception e) {
throw new IOException(
"exception in AvroStorageDataConversionUtilities.packIntoAvro", e);
}
}
}