/* * Copyright © 2015 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.data2.dataset2.lib.table; import co.cask.cdap.api.data.schema.Schema; import co.cask.cdap.api.data.schema.UnsupportedTypeException; import co.cask.cdap.api.dataset.DatasetAdmin; import co.cask.cdap.api.dataset.DatasetContext; import co.cask.cdap.api.dataset.DatasetDefinition; import co.cask.cdap.api.dataset.DatasetProperties; import co.cask.cdap.api.dataset.DatasetSpecification; import co.cask.cdap.api.dataset.lib.AbstractDatasetDefinition; import co.cask.cdap.api.dataset.lib.ObjectMappedTable; import co.cask.cdap.api.dataset.lib.ObjectMappedTableProperties; import co.cask.cdap.api.dataset.table.Table; import co.cask.cdap.internal.io.TypeRepresentation; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.gson.Gson; import java.io.IOException; import java.util.List; import java.util.Map; /** * DatasetDefinition for {@link ObjectMappedTableDataset}. */ public class ObjectMappedTableDefinition extends AbstractDatasetDefinition<ObjectMappedTable, DatasetAdmin> { private static final Gson GSON = new Gson(); private static final String TABLE_NAME = "objects"; private final DatasetDefinition<? extends Table, ?> tableDef; public ObjectMappedTableDefinition(String name, DatasetDefinition<? extends Table, ?> tableDef) { super(name); Preconditions.checkArgument(tableDef != null, "Table definition is required"); this.tableDef = tableDef; } @Override public DatasetSpecification configure(String instanceName, DatasetProperties properties) { Map<String, String> props = properties.getProperties(); Preconditions.checkArgument(props.containsKey(ObjectMappedTableProperties.OBJECT_TYPE)); // schema can normally be derived from the type. However, we cannot use the Type in this method because // this is called by the system, where the Type is often not available. for example, if somebody creates // an ObjectMappedTable<Purchase> where Purchase is a class internal to their app. // we require schema here because we want to validate it to make sure it is supported. Preconditions.checkArgument(props.containsKey(ObjectMappedTableProperties.OBJECT_SCHEMA)); Preconditions.checkArgument(props.containsKey(ObjectMappedTableProperties.ROW_KEY_EXPLORE_NAME)); Preconditions.checkArgument(props.containsKey(ObjectMappedTableProperties.ROW_KEY_EXPLORE_TYPE)); try { Schema objectSchema = ObjectMappedTableProperties.getObjectSchema(props); validateSchema(objectSchema); String keyName = ObjectMappedTableProperties.getRowKeyExploreName(props); Schema.Type keyType = ObjectMappedTableProperties.getRowKeyExploreType(props); Schema fullSchema = addKeyToSchema(objectSchema, keyName, keyType); DatasetProperties fullProperties = DatasetProperties.builder() .addAll(properties.getProperties()) .add(Table.PROPERTY_SCHEMA, fullSchema.toString()) .add(Table.PROPERTY_SCHEMA_ROW_FIELD, keyName) .build(); return DatasetSpecification.builder(instanceName, getName()) .properties(fullProperties.getProperties()) .datasets(tableDef.configure(TABLE_NAME, fullProperties)) .build(); } catch (IOException e) { throw new IllegalArgumentException("Could not parse schema.", e); } catch (UnsupportedTypeException e) { throw new IllegalArgumentException("Schema is of an unsupported type.", e); } } @Override public DatasetAdmin getAdmin(DatasetContext datasetContext, DatasetSpecification spec, ClassLoader classLoader) throws IOException { return tableDef.getAdmin(datasetContext, spec.getSpecification(TABLE_NAME), classLoader); } @Override public ObjectMappedTableDataset<?> getDataset(DatasetContext datasetContext, DatasetSpecification spec, Map<String, String> arguments, ClassLoader classLoader) throws IOException { String keyName = ObjectMappedTableProperties.getRowKeyExploreName(spec.getProperties()); DatasetSpecification tableSpec = spec.getSpecification(TABLE_NAME); // if the table spec did not have schema, this is an ObjectMappedTable from CDAP 2.8. // add the schema and row key as arguments so that explore will work // TODO: remove after CDAP-2122 is done if (!tableSpec.getProperties().containsKey(Table.PROPERTY_SCHEMA)) { tableSpec = DatasetSpecification.builder(tableSpec.getName(), tableSpec.getType()) .properties(tableSpec.getProperties()) .property(Table.PROPERTY_SCHEMA, spec.getProperty(Table.PROPERTY_SCHEMA)) .property(Table.PROPERTY_SCHEMA_ROW_FIELD, keyName) .datasets(tableSpec.getSpecifications().values()) .build(); } // reconstruct the table schema here because of backwards compatibility Table table = tableDef.getDataset(datasetContext, tableSpec, arguments, classLoader); Map<String, String> properties = spec.getProperties(); TypeRepresentation typeRep = GSON.fromJson( ObjectMappedTableProperties.getObjectTypeRepresentation(properties), TypeRepresentation.class); Schema objSchema = ObjectMappedTableProperties.getObjectSchema(properties); return new ObjectMappedTableDataset(spec.getName(), table, typeRep, objSchema, classLoader); } private void validateSchema(Schema schema) throws UnsupportedTypeException { Schema.Type type = schema.isNullable() ? schema.getNonNullable().getType() : schema.getType(); if (type != Schema.Type.RECORD) { throw new UnsupportedTypeException("Unsupported type " + type + ". Must be a record."); } for (Schema.Field field : schema.getFields()) { Schema fieldSchema = field.getSchema(); Schema.Type fieldType = fieldSchema.isNullable() ? fieldSchema.getNonNullable().getType() : fieldSchema.getType(); if (!fieldType.isSimpleType()) { throw new UnsupportedTypeException( String.format("Field %s is of unsupported type %s." + " Must be a simple type (boolean, int, long, float, double, string, bytes).", field.getName(), fieldType.toString())); } } } // we want to include the key as a column in the table for exploration, so add it to the schema private Schema addKeyToSchema(Schema schema, String keyName, Schema.Type keyType) { List<Schema.Field> fields = Lists.newArrayListWithCapacity(schema.getFields().size() + 1); fields.add(Schema.Field.of(keyName, Schema.of(keyType))); for (Schema.Field objectField : schema.getFields()) { // have to lowercase since Hive will lowercase if (keyName.toLowerCase().equals(objectField.getName().toLowerCase())) { throw new IllegalArgumentException( "Row key " + keyName + " cannot use the same column name as an object field."); } fields.add(objectField); } return Schema.recordOf("record", fields); } }