/*
* Copyright © 2014-2016 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.internal.io;
import co.cask.cdap.api.data.schema.Schema;
import co.cask.cdap.api.data.schema.UnsupportedTypeException;
import co.cask.cdap.internal.guava.reflect.TypeToken;
import java.lang.reflect.ParameterizedType;
import java.lang.reflect.Type;
import java.net.URI;
import java.net.URL;
import java.nio.ByteBuffer;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
/**
* An abstract base class for generating schema. It knows how to generate
* most of the supported data type, except record (bean class) type, which
* it delegates to child class.
*/
public abstract class AbstractSchemaGenerator implements SchemaGenerator {
/**
* Mapping Java types into Schemas for simple data types.
*/
private static final Map<Class<?>, Schema> SIMPLE_SCHEMAS;
static {
Map<Class<?>, Schema> simpleSchemas = new HashMap<>();
simpleSchemas.put(Boolean.TYPE, Schema.of(Schema.Type.BOOLEAN));
simpleSchemas.put(Byte.TYPE, Schema.of(Schema.Type.INT));
simpleSchemas.put(Character.TYPE, Schema.of(Schema.Type.INT));
simpleSchemas.put(Short.TYPE, Schema.of(Schema.Type.INT));
simpleSchemas.put(Integer.TYPE, Schema.of(Schema.Type.INT));
simpleSchemas.put(Long.TYPE, Schema.of(Schema.Type.LONG));
simpleSchemas.put(Float.TYPE, Schema.of(Schema.Type.FLOAT));
simpleSchemas.put(Double.TYPE, Schema.of(Schema.Type.DOUBLE));
simpleSchemas.put(Boolean.class, Schema.of(Schema.Type.BOOLEAN));
simpleSchemas.put(Byte.class, Schema.of(Schema.Type.INT));
simpleSchemas.put(Character.class, Schema.of(Schema.Type.INT));
simpleSchemas.put(Short.class, Schema.of(Schema.Type.INT));
simpleSchemas.put(Integer.class, Schema.of(Schema.Type.INT));
simpleSchemas.put(Long.class, Schema.of(Schema.Type.LONG));
simpleSchemas.put(Float.class, Schema.of(Schema.Type.FLOAT));
simpleSchemas.put(Double.class, Schema.of(Schema.Type.DOUBLE));
simpleSchemas.put(String.class, Schema.of(Schema.Type.STRING));
simpleSchemas.put(byte[].class, Schema.of(Schema.Type.BYTES));
simpleSchemas.put(ByteBuffer.class, Schema.of(Schema.Type.BYTES));
// Some extra ones for some common build-in types. Need corresponding handling in DatumReader/Writer
simpleSchemas.put(URI.class, Schema.of(Schema.Type.STRING));
simpleSchemas.put(URL.class, Schema.of(Schema.Type.STRING));
simpleSchemas.put(UUID.class, Schema.of(Schema.Type.BYTES));
SIMPLE_SCHEMAS = Collections.unmodifiableMap(simpleSchemas);
}
@Override
public final Schema generate(Type type) throws UnsupportedTypeException {
return generate(type, true);
}
@Override
public final Schema generate(Type type, boolean acceptRecursiveTypes) throws UnsupportedTypeException {
Set<String> knownRecords = Collections.emptySet();
return doGenerate(TypeToken.of(type), knownRecords, acceptRecursiveTypes);
}
/**
* Actual schema generation. It recursively resolves container types.
*
* @param typeToken Encapsulate the Java type for generating a {@link Schema}.
* @param knownRecords Set of record names that has the schema already generated. It is used for
* recursive class field references.
* @param acceptRecursion Whether to tolerate type recursion. If false, will throw UnsupportedTypeException if
* a recursive type is encountered.
* @return A {@link Schema} representing the given java {@link Type}.
* @throws UnsupportedTypeException Indicates schema generation is not support for the given java {@link Type}.
*/
@SuppressWarnings("unchecked")
protected final Schema doGenerate(TypeToken<?> typeToken, Set<String> knownRecords, boolean acceptRecursion)
throws UnsupportedTypeException {
Type type = typeToken.getType();
Class<?> rawType = typeToken.getRawType();
if (SIMPLE_SCHEMAS.containsKey(rawType)) {
return SIMPLE_SCHEMAS.get(rawType);
}
// Enum type, simply use all the enum constants for ENUM schema.
if (rawType.isEnum()) {
return Schema.enumWith((Class<Enum<?>>) rawType);
}
// Java array, use ARRAY schema.
if (rawType.isArray()) {
Schema componentSchema = doGenerate(TypeToken.of(rawType.getComponentType()), knownRecords, acceptRecursion);
if (rawType.getComponentType().isPrimitive()) {
return Schema.arrayOf(componentSchema);
}
return Schema.arrayOf(Schema.unionOf(componentSchema, Schema.of(Schema.Type.NULL)));
}
if (!(type instanceof Class || type instanceof ParameterizedType)) {
throw new UnsupportedTypeException("Type " + type + " is not supported. " +
"Only Class or ParameterizedType are supported.");
}
// Any parameterized Collection class would be represented by ARRAY schema.
if (Collection.class.isAssignableFrom(rawType)) {
if (!(type instanceof ParameterizedType)) {
throw new UnsupportedTypeException("Only supports parameterized Collection type.");
}
TypeToken<?> componentType = typeToken.resolveType(((ParameterizedType) type).getActualTypeArguments()[0]);
Schema componentSchema = doGenerate(componentType, knownRecords, acceptRecursion);
return Schema.arrayOf(Schema.unionOf(componentSchema, Schema.of(Schema.Type.NULL)));
}
// Java Map, use MAP schema.
if (Map.class.isAssignableFrom(rawType)) {
if (!(type instanceof ParameterizedType)) {
throw new UnsupportedTypeException("Only supports parameterized Map type.");
}
Type[] typeArgs = ((ParameterizedType) type).getActualTypeArguments();
TypeToken<?> keyType = typeToken.resolveType(typeArgs[0]);
TypeToken<?> valueType = typeToken.resolveType(typeArgs[1]);
Schema valueSchema = doGenerate(valueType, knownRecords, acceptRecursion);
return Schema.mapOf(doGenerate(keyType, knownRecords, acceptRecursion),
Schema.unionOf(valueSchema, Schema.of(Schema.Type.NULL)));
}
// Any Java class, class name as the record name.
String recordName = typeToken.getRawType().getName();
if (knownRecords.contains(recordName)) {
// Record already seen before
if (acceptRecursion) {
// simply create a reference RECORD schema by the name.
return Schema.recordOf(recordName);
} else {
throw new UnsupportedTypeException("Recursive type not supported for class " + recordName);
}
}
// Delegate to child class to generate RECORD schema.
return generateRecord(typeToken, knownRecords, acceptRecursion);
}
/**
* Generates a RECORD schema of the given type.
*
* @param typeToken Type of the record.
* @param knownRecords Set of record names that schema has already been generated.
* @param acceptRecursiveTypes Whether to tolerate type recursion. If false, will throw UnsupportedTypeException if
* a recursive type is encountered.
* @return An instance of {@link Schema}
* @throws UnsupportedTypeException
*/
protected abstract Schema generateRecord(TypeToken<?> typeToken,
Set<String> knownRecords,
boolean acceptRecursiveTypes) throws UnsupportedTypeException;
}