/** * Copyright 2013 Cloudera Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.cloudera.cdk.data.hbase.avro; import com.cloudera.cdk.data.DatasetException; import com.cloudera.cdk.data.SchemaValidationException; import com.cloudera.cdk.data.hbase.impl.EntityComposer; import com.cloudera.cdk.data.hbase.impl.EntitySchema.FieldMapping; import com.cloudera.cdk.data.hbase.impl.MappingType; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import org.apache.avro.Schema; import org.apache.avro.Schema.Field; import org.apache.avro.generic.IndexedRecord; /** * An EntityComposer implementation for Avro records. It will handle both * SpecificRecord entities and GenericRecord entities. * * @param <E> * The type of the entity */ public class AvroEntityComposer<E extends IndexedRecord> implements EntityComposer<E> { /** * The Avro schema for the Avro records this EntityComposer will compose. */ private final AvroEntitySchema avroSchema; /** * Boolean to indicate whether this is a specific record or generic record * composer. TODO: Eventually use an enum type when we support more than two * types of Avro records. */ private final boolean specific; /** * An AvroRecordBuilderFactory that can produce AvroRecordBuilders for this * composer to compose Avro entities. */ private final AvroRecordBuilderFactory<E> recordBuilderFactory; /** * A mapping of entity field names to AvroRecordBuilderFactories for any * keyAsColumn mapped fields that are Avro record types. These are needed to * get builders that can construct the keyAsColumn field values from their * parts. */ private final Map<String, AvroRecordBuilderFactory<E>> kacRecordBuilderFactories; /** * The number of key parts in the entity schema. */ private final int keyPartCount; /** * AvroEntityComposer constructor. * * @param avroEntitySchema * The schema for the Avro entities this composer composes. * @param specific * True if this composer composes Specific records. Otherwise, it * composes Generic records. */ public AvroEntityComposer(AvroEntitySchema avroEntitySchema, boolean specific) { this.avroSchema = avroEntitySchema; this.specific = specific; this.recordBuilderFactory = buildAvroRecordBuilderFactory(avroEntitySchema .getAvroSchema()); this.kacRecordBuilderFactories = new HashMap<String, AvroRecordBuilderFactory<E>>(); int keyPartCount = 0; for (FieldMapping fieldMapping : avroEntitySchema.getFieldMappings()) { if (fieldMapping.getMappingType() == MappingType.KEY) { keyPartCount++; } } this.keyPartCount = keyPartCount; initRecordBuilderFactories(); } @Override public Builder<E> getBuilder() { return new Builder<E>() { private final AvroRecordBuilder<E> recordBuilder = recordBuilderFactory .getBuilder(); @Override public com.cloudera.cdk.data.hbase.impl.EntityComposer.Builder<E> put( String fieldName, Object value) { recordBuilder.put(fieldName, value); return this; } @Override public E build() { return recordBuilder.build(); } }; } @Override public Object extractField(E entity, String fieldName) { Schema schema = avroSchema.getAvroSchema(); Field field = schema.getField(fieldName); if (field == null) { throw new SchemaValidationException("No field named " + fieldName + " in schema " + schema); } Object fieldValue = entity.get(field.pos()); if (fieldValue == null) { // if the field value is null, and the field is a primitive type, // we should make the field represent java's default type. This // can happen when using GenericRecord. SpecificRecord has it's // fields represented by members of a class, so a SpecificRecord's // primitive fields will never be null. We are doing this so // GenericRecord acts like SpecificRecord in this case. fieldValue = getDefaultPrimitive(field); } return fieldValue; } @SuppressWarnings("unchecked") @Override public Map<CharSequence, Object> extractKeyAsColumnValues(String fieldName, Object fieldValue) { Schema schema = avroSchema.getAvroSchema(); Field field = schema.getField(fieldName); if (field == null) { throw new SchemaValidationException("No field named " + fieldName + " in schema " + schema); } if (field.schema().getType() == Schema.Type.MAP) { return new HashMap<CharSequence, Object>( (Map<CharSequence, Object>) fieldValue); } else if (field.schema().getType() == Schema.Type.RECORD) { Map<CharSequence, Object> keyAsColumnValues = new HashMap<CharSequence, Object>(); IndexedRecord avroRecord = (IndexedRecord) fieldValue; for (Field avroRecordField : avroRecord.getSchema().getFields()) { keyAsColumnValues.put(avroRecordField.name(), avroRecord.get(avroRecordField.pos())); } return keyAsColumnValues; } else { throw new SchemaValidationException( "Only MAP or RECORD type valid for keyAsColumn fields. Found " + field.schema().getType()); } } @Override public Object buildKeyAsColumnField(String fieldName, Map<CharSequence, Object> keyAsColumnValues) { Schema schema = avroSchema.getAvroSchema(); Field field = schema.getField(fieldName); if (field == null) { throw new SchemaValidationException("No field named " + fieldName + " in schema " + schema); } Schema.Type fieldType = field.schema().getType(); if (fieldType == Schema.Type.MAP) { Map<CharSequence, Object> retMap = new HashMap<CharSequence, Object>(); for (Entry<CharSequence, Object> entry : keyAsColumnValues.entrySet()) { retMap.put(entry.getKey(), entry.getValue()); } return retMap; } else if (fieldType == Schema.Type.RECORD) { AvroRecordBuilder<E> builder = kacRecordBuilderFactories.get(fieldName) .getBuilder(); for (Entry<CharSequence, Object> keyAsColumnEntry : keyAsColumnValues .entrySet()) { builder.put(keyAsColumnEntry.getKey().toString(), keyAsColumnEntry.getValue()); } return builder.build(); } else { throw new SchemaValidationException( "Only MAP or RECORD type valid for keyAsColumn fields. Found " + fieldType); } } /** * Initialize the AvroRecordBuilderFactories for all keyAsColumn mapped fields * that are record types. We need to be able to get record builders for these * since the records are broken across many columns, and need to be * constructed by the composer. */ private void initRecordBuilderFactories() { for (FieldMapping fieldMapping : avroSchema.getFieldMappings()) { if (fieldMapping.getMappingType() == MappingType.KEY_AS_COLUMN) { String fieldName = fieldMapping.getFieldName(); Schema fieldSchema = avroSchema.getAvroSchema().getField(fieldName) .schema(); Schema.Type fieldSchemaType = fieldSchema.getType(); if (fieldSchemaType == Schema.Type.RECORD) { AvroRecordBuilderFactory<E> factory = buildAvroRecordBuilderFactory(fieldSchema); kacRecordBuilderFactories.put(fieldName, factory); } } } } /** * Build the appropriate AvroRecordBuilderFactory for this instance. Avro has * many different record types, of which we support two: Specific and Generic. * * @param schema * The Avro schema needed to construct the AvroRecordBuilderFactory. * @return The constructed AvroRecordBuilderFactory. */ @SuppressWarnings({ "unchecked", "rawtypes" }) private AvroRecordBuilderFactory<E> buildAvroRecordBuilderFactory( Schema schema) { if (specific) { Class<E> specificClass; String className = schema.getFullName(); try { specificClass = (Class<E>) Class.forName(className); } catch (ClassNotFoundException e) { throw new DatasetException("Could not get Class instance for " + className); } return new SpecificAvroRecordBuilderFactory(specificClass); } else { return (AvroRecordBuilderFactory<E>) new GenericAvroRecordBuilderFactory( schema); } } /** * Get's the default value for the primitive types. This matches the default * Java would assign to the following primitive types: * * int, long, boolean, float, and double. * * If field is any other type, this method will return null. * * @param field * The Schema field * @return The default value for the schema field's type, or null if the type * of field is not a primitive type. */ private Object getDefaultPrimitive(Schema.Field field) { Schema.Type type = field.schema().getType(); if (type == Schema.Type.INT) { return 0; } else if (type == Schema.Type.LONG) { return 0L; } else if (type == Schema.Type.BOOLEAN) { return false; } else if (type == Schema.Type.FLOAT) { return 0.0f; } else if (type == Schema.Type.DOUBLE) { return 0.0d; } else { // not a primitive type, so return null return null; } } @Override public List<Object> getPartitionKeyParts(E entity) { Object[] parts = new Object[keyPartCount]; for (FieldMapping fieldMapping : avroSchema.getFieldMappings()) { if (fieldMapping.getMappingType() == MappingType.KEY) { int pos = avroSchema.getAvroSchema() .getField(fieldMapping.getFieldName()).pos(); parts[Integer.parseInt(fieldMapping.getMappingValue())] = entity.get(pos); } } return Arrays.asList(parts); } }