/**
* Copyright 2014 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kitesdk.data.spi;
import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.JsonNodeFactory;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.fasterxml.jackson.databind.node.TextNode;
import com.google.common.base.Splitter;
import com.google.common.collect.Maps;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
import java.util.Iterator;
import java.util.Map;
import org.apache.avro.Schema;
import org.kitesdk.data.ColumnMapping;
import org.kitesdk.data.DatasetIOException;
import org.kitesdk.data.FieldMapping;
import org.kitesdk.data.ValidationException;
/**
* Parser for {@link ColumnMapping}. Will parse the mapping annotation from
* Avro schemas, and will parse the ColumnMapping JSON format. An
* example of that is:
*
* <pre>
* [
* { "source": "field1", "type": "column", "value": "cf:field1" },
* { "source": "field2", "type": "keyAsColumn", "value": "kac:" },
* { "source": "field3", "type": "occVersion" }
* ]
* </pre>
*
*/
public class ColumnMappingParser {
// name of the json node when embedded in a schema
private static final String MAPPING = "mapping";
// property constants
private static final String TYPE = "type";
private static final String SOURCE = "source";
private static final String FAMILY = "family";
private static final String QUALIFIER = "qualifier";
private static final String PREFIX = "prefix";
private static final String VALUE = "value";
private static final Splitter VALUE_SPLITTER = Splitter.on(":").limit(2);
/**
* Parses the Mapping Descriptor as a JSON string.
*
* @param mappingDescriptor
* The mapping descriptor as a JSON string
* @return ColumnMapping
*/
public static ColumnMapping parse(String mappingDescriptor) {
return buildColumnMapping(JsonUtil.parse(mappingDescriptor));
}
/**
* Parses the Mapping Descriptor from a File
*
* @param file
* The File that contains the Mapping Descriptor in JSON format.
* @return ColumnMapping.
*/
public static ColumnMapping parse(File file) {
return buildColumnMapping(JsonUtil.parse(file));
}
/**
* Parses the Mapping Descriptor from an input stream
*
* @param in
* The input stream that contains the Mapping Descriptor in JSON
* format.
* @return ColumnMapping.
*/
public static ColumnMapping parse(InputStream in) {
return buildColumnMapping(JsonUtil.parse(in));
}
public static boolean hasEmbeddedColumnMapping(Schema schema) {
return schema.getJsonProp(MAPPING) != null;
}
public static Schema removeEmbeddedMapping(Schema schema) {
// TODO: avoid embedding mappings in the schema
// Avro considers Props read-only and uses an older Jackson version
// Parse the Schema as a String because Avro uses com.codehaus.jackson
ObjectNode schemaJson = JsonUtil.parse(schema.toString(), ObjectNode.class);
schemaJson.remove(MAPPING);
return new Schema.Parser().parse(schemaJson.toString());
}
public static ColumnMapping parseFromSchema(Schema schema) {
// parse the String because Avro uses com.codehaus.jackson
return parse(schema.getJsonProp(MAPPING).toString());
}
public static boolean hasEmbeddedFieldMappings(Schema schema) {
if (Schema.Type.RECORD == schema.getType()) {
for (Schema.Field field : schema.getFields()) {
if (field.getJsonProp(MAPPING) != null) {
return true;
}
}
}
return false;
}
public static ColumnMapping parseFromSchemaFields(Schema schema) {
if (Schema.Type.RECORD == schema.getType()) {
ColumnMapping.Builder builder = new ColumnMapping.Builder();
for (Schema.Field field : schema.getFields()) {
if (field.getJsonProp(MAPPING) != null) {
// parse the String because Avro uses com.codehaus.jackson
builder.fieldMapping(parseFieldMapping(field.name(),
JsonUtil.parse(field.getJsonProp(MAPPING).toString())));
}
}
return builder.build();
}
throw new IllegalArgumentException(
"Cannot parse field-level mappings from non-Record");
}
public static Schema embedColumnMapping(Schema schema, ColumnMapping mapping) {
// TODO: avoid embedding mappings in the schema
// Avro considers Props read-only and uses an older Jackson version
// Parse the Schema as a String because Avro uses com.codehaus.jackson
ObjectNode schemaJson = JsonUtil.parse(schema.toString(), ObjectNode.class);
schemaJson.set(MAPPING, toJson(mapping));
return new Schema.Parser().parse(schemaJson.toString());
}
public static Map<Integer, FieldMapping> parseKeyMappingsFromSchemaFields(
Schema schema) {
Map<Integer, FieldMapping> keyMappings = Maps.newHashMap();
if (Schema.Type.RECORD == schema.getType()) {
for (Schema.Field field : schema.getFields()) {
if (field.getJsonProp(MAPPING) != null) {
// parse the String because Avro uses com.codehaus.jackson
JsonNode mappingNode = JsonUtil.parse(
field.getJsonProp(MAPPING).toString());
FieldMapping fm = parseFieldMapping(field.name(), mappingNode);
if (FieldMapping.MappingType.KEY == fm.getMappingType() &&
mappingNode.has(VALUE)) {
Integer index = mappingNode.get(VALUE).asInt();
keyMappings.put(index, fm);
}
}
}
return keyMappings;
}
throw new IllegalArgumentException(
"Cannot parse field-level mappings from non-Record");
}
/**
* Parses the FieldMapping from an annotated schema field.
*
* @param mappingNode
* The value of the "mapping" node
* @return FieldMapping
*/
public static FieldMapping parseFieldMapping(JsonNode mappingNode) {
ValidationException.check(mappingNode.isObject(),
"A column mapping must be a JSON record");
ValidationException.check(mappingNode.has(SOURCE),
"Partitioners must have a %s.", SOURCE);
String source = mappingNode.get("source").asText();
return parseFieldMapping(source, mappingNode);
}
/**
* Parses the FieldMapping from an annotated schema field.
*
* @param source
* The source field name for this mapping
* @param mappingNode
* The value of the "mapping" node
* @return FieldMapping
*/
public static FieldMapping parseFieldMapping(String source, JsonNode mappingNode) {
ValidationException.check(mappingNode.isObject(),
"A column mapping must be a JSON record");
ValidationException.check(mappingNode.has(TYPE),
"Column mappings must have a %s.", TYPE);
String type = mappingNode.get(TYPE).asText();
// return easy cases
if ("occVersion".equals(type)) {
return FieldMapping.version(source);
} else if ("key".equals(type)) {
return FieldMapping.key(source);
}
String family = null;
String qualifier = null;
String prefix = null;
// for backward-compatibility, check for "value": "fam:qual"
if (mappingNode.has(VALUE)) {
// avoids String#split because of odd cases, like ":".split(":")
String value = mappingNode.get(VALUE).asText();
Iterator<String> values = VALUE_SPLITTER.split(value).iterator();
if (values.hasNext()) {
family = values.next();
}
if (values.hasNext()) {
if ("keyAsColumn".equals(type)) {
prefix = values.next();
if (prefix.isEmpty()) {
prefix = null;
}
} else {
qualifier = values.next();
}
}
}
// replace any existing values with explicit family and qualifier
if (mappingNode.has(FAMILY)) {
family = mappingNode.get(FAMILY).textValue();
}
if (mappingNode.has(QUALIFIER)) {
qualifier = mappingNode.get(QUALIFIER).textValue();
}
if ("column".equals(type)) {
ValidationException.check(family != null && !family.isEmpty(),
"Column mapping %s must have a %s", source, FAMILY);
ValidationException.check(qualifier != null && !qualifier.isEmpty(),
"Column mapping %s must have a %s", source, QUALIFIER);
return FieldMapping.column(source, family, qualifier);
} else if ("keyAsColumn".equals(type)) {
ValidationException.check(family != null && !family.isEmpty(),
"Column mapping %s must have a %s", source, FAMILY);
ValidationException.check(qualifier == null,
"Key-as-column mapping %s cannot have a %s", source, QUALIFIER);
if (mappingNode.has(PREFIX)) {
prefix = mappingNode.get(PREFIX).asText();
if (prefix.isEmpty()) {
prefix = null;
}
}
return FieldMapping.keyAsColumn(source, family, prefix);
} else if ("counter".equals(type)) {
ValidationException.check(family != null && !family.isEmpty(),
"Counter mapping %s must have a %s", source, FAMILY);
ValidationException.check(qualifier != null && !qualifier.isEmpty(),
"Counter mapping %s must have a %s", source, QUALIFIER);
return FieldMapping.counter(source, family, qualifier);
} else {
throw new ValidationException("Invalid mapping type: " + type);
}
}
private static ColumnMapping buildColumnMapping(JsonNode node) {
ValidationException.check(node.isArray(),
"Must be a JSON array of column mappings");
ColumnMapping.Builder builder = new ColumnMapping.Builder();
for (Iterator<JsonNode> it = node.elements(); it.hasNext();) {
builder.fieldMapping(parseFieldMapping(it.next()));
}
return builder.build();
}
private static JsonNode toJson(FieldMapping fm) {
ObjectNode fieldMapping = JsonNodeFactory.instance.objectNode();
fieldMapping.set(SOURCE, TextNode.valueOf(fm.getFieldName()));
switch (fm.getMappingType()) {
case KEY:
fieldMapping.set(TYPE, TextNode.valueOf("key"));
break;
case KEY_AS_COLUMN:
fieldMapping.set(TYPE, TextNode.valueOf("keyAsColumn"));
fieldMapping.set(FAMILY, TextNode.valueOf(fm.getFamilyAsString()));
if (fm.getPrefix() != null) {
fieldMapping.set(PREFIX, TextNode.valueOf(fm.getPrefix()));
}
break;
case COLUMN:
fieldMapping.set(TYPE, TextNode.valueOf("column"));
fieldMapping.set(FAMILY, TextNode.valueOf(fm.getFamilyAsString()));
fieldMapping.set(QUALIFIER, TextNode.valueOf(fm.getQualifierAsString()));
break;
case COUNTER:
fieldMapping.set(TYPE, TextNode.valueOf("counter"));
fieldMapping.set(FAMILY, TextNode.valueOf(fm.getFamilyAsString()));
fieldMapping.set(QUALIFIER, TextNode.valueOf(fm.getQualifierAsString()));
break;
case OCC_VERSION:
fieldMapping.set(TYPE, TextNode.valueOf("occVersion"));
break;
default:
throw new ValidationException(
"Unknown mapping type: " + fm.getMappingType());
}
return fieldMapping;
}
public static String toString(FieldMapping mapping) {
StringWriter writer = new StringWriter();
JsonGenerator gen;
try {
gen = new JsonFactory().createGenerator(writer);
gen.setCodec(new ObjectMapper());
gen.writeTree(toJson(mapping));
gen.close();
} catch (IOException e) {
throw new DatasetIOException("Cannot write to JSON generator", e);
}
return writer.toString();
}
private static JsonNode toJson(ColumnMapping mapping) {
ArrayNode mappingJson = JsonNodeFactory.instance.arrayNode();
for (FieldMapping fm : mapping.getFieldMappings()) {
mappingJson.add(toJson(fm));
}
return mappingJson;
}
public static String toString(ColumnMapping mapping, boolean pretty) {
StringWriter writer = new StringWriter();
JsonGenerator gen;
try {
gen = new JsonFactory().createGenerator(writer);
if (pretty) {
gen.useDefaultPrettyPrinter();
}
gen.setCodec(new ObjectMapper());
gen.writeTree(toJson(mapping));
gen.close();
} catch (IOException e) {
throw new DatasetIOException("Cannot write to JSON generator", e);
}
return writer.toString();
}
}