/** * Copyright (c) 2013, Cloudera, Inc. All Rights Reserved. * * Cloudera, Inc. licenses this file to you under the Apache License, * Version 2.0 (the "License"). You may not use this file except in * compliance with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR * CONDITIONS OF ANY KIND, either express or implied. See the License for * the specific language governing permissions and limitations under the * License. */ package com.cloudera.science.avro.common; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.avro.Schema; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericRecord; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.codehaus.jackson.JsonNode; import org.codehaus.jackson.map.ObjectMapper; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; /** * */ public class JsonConverter { private static final Log LOG = LogFactory.getLog(JsonConverter.class); private static final Set<Schema.Type> SUPPORTED_TYPES = ImmutableSet.of( Schema.Type.RECORD, Schema.Type.ARRAY, Schema.Type.MAP, Schema.Type.INT, Schema.Type.LONG, Schema.Type.BOOLEAN, Schema.Type.FLOAT, Schema.Type.DOUBLE, Schema.Type.STRING); private final ObjectMapper mapper = new ObjectMapper(); private final Schema baseSchema; private int logMessageCounter = 0; public JsonConverter(Schema schema) { this.baseSchema = checkSchema(schema, true); } private Schema checkSchema(Schema schema, boolean mustBeRecord) { if (!mustBeRecord) { if (!SUPPORTED_TYPES.contains(schema.getType())) { throw new IllegalArgumentException("Unsupported type: " + schema.getType()); } if (schema.getType() != Schema.Type.RECORD) { return schema; } } for (Schema.Field f : schema.getFields()) { Schema fs = f.schema(); if (isNullableSchema(fs)) { fs = getNonNull(fs); } Schema.Type st = fs.getType(); if (!SUPPORTED_TYPES.contains(st)) { throw new IllegalArgumentException(String.format( "Unsupported type '%s' for field '%s'", st.toString(), f.name())); } switch (st) { case RECORD: checkSchema(fs, true); break; case MAP: checkSchema(fs.getValueType(), false); break; case ARRAY: checkSchema(fs.getElementType(), false); default: break; // No need to check primitives } } return schema; } @SuppressWarnings("unchecked") public GenericRecord convert(String json) throws IOException { return convert(mapper.readValue(json, Map.class), baseSchema); } @SuppressWarnings({ "unchecked", "rawtypes" }) private GenericRecord convert(Map<String, Object> raw, Schema schema) throws IOException { GenericRecord result = new GenericData.Record(schema); Set<String> usedFields = Sets.newHashSet(); for (Schema.Field f : schema.getFields()) { String name = f.name(); if (raw.containsKey(name)) { result.put(f.pos(), typeConvert(raw.get(name), name, f.schema())); usedFields.add(name); } else { JsonNode defaultValue = f.defaultValue(); if (defaultValue == null) { if (isNullableSchema(f.schema())) { result.put(f.pos(), null); } else { throw new IllegalArgumentException( "No default value provided for non-nullable field: " + f.name()); } } else { Schema fieldSchema = f.schema(); if (isNullableSchema(fieldSchema)) { fieldSchema = getNonNull(fieldSchema); } Object value = null; switch (fieldSchema.getType()) { case BOOLEAN: value = defaultValue.getValueAsBoolean(); break; case DOUBLE: value = defaultValue.getValueAsDouble(); break; case FLOAT: value = (float) defaultValue.getValueAsDouble(); break; case INT: value = defaultValue.getValueAsInt(); break; case LONG: value = defaultValue.getValueAsLong(); break; case STRING: value = defaultValue.getValueAsText(); break; case MAP: Map<String, Object> fieldMap = mapper.readValue( defaultValue.getValueAsText(), Map.class); Map<String, Object> mvalue = Maps.newHashMap(); for (Map.Entry<String, Object> e : fieldMap.entrySet()) { mvalue.put(e.getKey(), typeConvert(e.getValue(), name, fieldSchema.getValueType())); } value = mvalue; break; case ARRAY: List fieldArray = mapper.readValue( defaultValue.getValueAsText(), List.class); List lvalue = Lists.newArrayList(); for (Object elem : fieldArray) { lvalue.add(typeConvert(elem, name, fieldSchema.getElementType())); } value = lvalue; break; case RECORD: Map<String, Object> fieldRec = mapper.readValue( defaultValue.getValueAsText(), Map.class); value = convert(fieldRec, fieldSchema); break; default: throw new IllegalArgumentException( "JsonConverter cannot handle type: " + fieldSchema.getType()); } result.put(f.pos(), value); } } } if (usedFields.size() < raw.size()) { // Log a notification about unused fields if (logMessageCounter % 1000 == 0) { LOG.warn("Ignoring unused JSON fields: " + Sets.difference(raw.keySet(), usedFields)); } logMessageCounter++; } return result; } @SuppressWarnings({ "unchecked", "rawtypes" }) private Object typeConvert(Object value, String name, Schema schema) throws IOException { if (isNullableSchema(schema)) { if (value == null) { return null; } else { schema = getNonNull(schema); } } else if (value == null) { // Always fail on null for non-nullable schemas throw new JsonConversionException(null, name, schema); } switch (schema.getType()) { case BOOLEAN: if (value instanceof Boolean) { return (Boolean) value; } else if (value instanceof String) { return Boolean.valueOf((String) value); } else if (value instanceof Number) { return ((Number) value).intValue() == 0 ? Boolean.FALSE : Boolean.TRUE; } break; case DOUBLE: if (value instanceof Number) { return ((Number) value).doubleValue(); } else if (value instanceof String) { return Double.valueOf((String) value); } break; case FLOAT: if (value instanceof Number) { return ((Number) value).floatValue(); } else if (value instanceof String) { return Float.valueOf((String) value); } break; case INT: if (value instanceof Number) { return ((Number) value).intValue(); } else if (value instanceof String) { return Integer.valueOf((String) value); } break; case LONG: if (value instanceof Number) { return ((Number) value).longValue(); } else if (value instanceof String) { return Long.valueOf((String) value); } break; case STRING: return value.toString(); case RECORD: return convert((Map<String, Object>) value, schema); case ARRAY: Schema elementSchema = schema.getElementType(); List listRes = new ArrayList(); for (Object v : (List) value) { listRes.add(typeConvert(v, name, elementSchema)); } return listRes; case MAP: Schema valueSchema = schema.getValueType(); Map<String, Object> mapRes = new HashMap<String, Object>(); for (Map.Entry<String, Object> v : ((Map<String, Object>) value).entrySet()) { mapRes.put(v.getKey(), typeConvert(v.getValue(), name, valueSchema)); } return mapRes; default: throw new IllegalArgumentException( "JsonConverter cannot handle type: " + schema.getType()); } throw new JsonConversionException(value, name, schema); } private boolean isNullableSchema(Schema schema) { return schema.getType().equals(Schema.Type.UNION) && schema.getTypes().size() == 2 && (schema.getTypes().get(0).getType().equals(Schema.Type.NULL) || schema.getTypes().get(1).getType().equals(Schema.Type.NULL)); } private Schema getNonNull(Schema schema) { List<Schema> types = schema.getTypes(); return types.get(0).getType().equals(Schema.Type.NULL) ? types.get(1) : types.get(0); } @SuppressWarnings("serial") private static class JsonConversionException extends RuntimeException { private Object value; private String fieldName; private Schema schema; public JsonConversionException(Object value, String fieldName, Schema schema) { this.value = value; this.fieldName = fieldName; this.schema = schema; } @Override public String toString() { return String.format("Type conversion error for field %s, %s for %s", fieldName, value, schema); } } }