/**
* Copyright 2013 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kitesdk.data.hbase.avro;
import org.kitesdk.data.ColumnMapping;
import org.kitesdk.data.DatasetDescriptor;
import org.kitesdk.data.PartitionStrategy;
import org.kitesdk.data.hbase.impl.KeyEntitySchemaParser;
/**
* This implementation parses the AvroKeySchema and AvroEntitySchema from Avro
* schemas. The entities can contain metadata in annotations of the Avro record
* and Avro record fields.
*
* Each field must have a mapping annotation, which specifies how that field is
* mapped to an HBase column.
*
* Allowed mapping types are "key", "column", "keyAsColumn", and "occVersion".
*
* The key mapping type on a field indicates that there is an identity field
* partitioner on the field. The identity field partitioners are taken in order.
*
* The column mapping type on a field tells this entity mapper to map that field
* to the fully_qualified_column.
*
* The keyAsColumn mapping type on a field tells the entity mapper to map each
* key of the value type to a column in the specified column_family. This
* annotation is only allowed on map and record types.
*
* The occVersion mapping type on a field indicates that the entity participates
* in optimistic concurrency control, and the field is the version number that
* is automatically incremented by the system to validate that there are no
* write conflicts.
*
* Here is an example schema:
*
* <pre>
*
* {
* "name": "record_name",
* "type": "record",
* "partitions": [
* { "sourceName": "field1", "type": "identity" }
* ],
* "fields": [
* {
* "name": "field1",
* "type": "int",
* "mapping": { "type": "column", "value": "meta:field1" }
* },
*
* {
* "name": "field2",
* "type": { "type": "map", "values": "string" },
* "mapping": { "type": "keyAsColumn": "value": "map_family" }
* }
* ]
* }
*
* </pre>
*
* An Avro instance of this schema would have its field1 value encoded to the
* meta:field1 column. Each key/value pair of the field2 map type would have its
* value mapped to the map_family:[key] column. It will also participate in
* optimistic concurrency control.
*/
public class AvroKeyEntitySchemaParser implements
KeyEntitySchemaParser<AvroKeySchema, AvroEntitySchema> {
@Override
public AvroKeySchema parseKeySchema(String rawSchema) {
DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
.schemaLiteral(rawSchema)
.build();
return new AvroKeySchema(
descriptor.getSchema(), descriptor.getPartitionStrategy());
}
@Override
public AvroKeySchema parseKeySchema(String rawSchema,
PartitionStrategy partitionStrategy) {
// use DatasetDescriptor.Builder because it checks consistency
DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
.schemaLiteral(rawSchema)
.partitionStrategy(partitionStrategy)
.build();
return new AvroKeySchema(
descriptor.getSchema(), descriptor.getPartitionStrategy());
}
@Override
public AvroEntitySchema parseEntitySchema(String rawSchema) {
DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
.schemaLiteral(rawSchema)
.build();
return new AvroEntitySchema(
descriptor.getSchema(), rawSchema, descriptor.getColumnMapping());
}
@Override
public AvroEntitySchema parseEntitySchema(String rawSchema,
ColumnMapping columnMapping) {
// use DatasetDescriptor.Builder because it checks consistency
DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
.schemaLiteral(rawSchema)
.columnMapping(columnMapping)
.build();
return new AvroEntitySchema(
descriptor.getSchema(), rawSchema, descriptor.getColumnMapping());
}
}