/*
* Copyright © 2015 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.api.dataset.lib;
import co.cask.cdap.api.annotation.Beta;
import co.cask.cdap.api.data.schema.Schema;
import co.cask.cdap.api.data.schema.UnsupportedTypeException;
import co.cask.cdap.api.dataset.DatasetProperties;
import co.cask.cdap.internal.io.ReflectionSchemaGenerator;
import co.cask.cdap.internal.io.SchemaGenerator;
import co.cask.cdap.internal.io.TypeRepresentation;
import com.google.gson.Gson;
import java.io.IOException;
import java.lang.reflect.Type;
import java.util.Map;
/**
* Helper to build properties for an {@link ObjectMappedTable}.
*/
@Beta
public class ObjectMappedTableProperties {
private static final SchemaGenerator schemaGenerator = new ReflectionSchemaGenerator();
/**
* The type of object in the table.
*/
public static final String OBJECT_TYPE = "object.type";
/**
* The schema of objects in the table. This schema does not include the row key.
*/
public static final String OBJECT_SCHEMA = "object.schema";
/**
* The name of the Hive table column for the key of objects stored in the table.
* See {@link Builder#setRowKeyExploreName(String)} for details.
*/
public static final String ROW_KEY_EXPLORE_NAME = "row.key.explore.name";
/**
* The type of the Hive table column for the row key of objects stored in the table.
* See {@link Builder#setRowKeyExploreType(Schema.Type)} for details.
*/
public static final String ROW_KEY_EXPLORE_TYPE = "row.key.explore.type";
public static Builder builder() {
return new Builder();
}
/**
* @return The serialized representation of the type of objects in the table.
*/
public static String getObjectTypeRepresentation(Map<String, String> properties) {
return properties.get(OBJECT_TYPE);
}
/**
* @return The schema of objects in the table.
*/
public static Schema getObjectSchema(Map<String, String> properties) throws IOException {
return Schema.parseJson(properties.get(OBJECT_SCHEMA));
}
/**
* @return The name of the key to use when exploring the table.
*/
public static String getRowKeyExploreName(Map<String, String> properties) {
return properties.get(ROW_KEY_EXPLORE_NAME);
}
/**
* @return The type of the key when exploring the table.
*/
public static Schema.Type getRowKeyExploreType(Map<String, String> properties) {
return Schema.Type.valueOf(properties.get(ROW_KEY_EXPLORE_TYPE));
}
/**
* A Builder to construct properties for {@link ObjectMappedTable} datasets.
*/
public static class Builder extends DatasetProperties.Builder {
private final Gson gson = new Gson();
/**
* Package visible default constructor, to allow sub-classing by other datasets in this package.
*/
Builder() {
add(ROW_KEY_EXPLORE_NAME, "rowkey");
add(ROW_KEY_EXPLORE_TYPE, Schema.Type.BYTES.name());
}
/**
* Sets the type of object stored in the table. The schema of the Hive table for an ObjectMappedTable
* is derived from the object type set here and the row key explore name set by
* {@link #setRowKeyExploreName(String)}.
*
* For example, if the type set here has three fields - "id", "name", and "price", the corresponding Hive table
* for this Dataset will contain four columns - "rowkey", "id", "name", and "price".
*/
public Builder setType(Type type) throws UnsupportedTypeException {
add(OBJECT_TYPE, gson.toJson(new TypeRepresentation(type)));
add(OBJECT_SCHEMA, schemaGenerator.generate(type, false).toString());
return this;
}
/**
* Sets the row key column name in the corresponding Hive table for an ObjectMappedTable.
* The schema of the Hive table for an ObjectMappedTable is derived from the object type set by
* {@link #setType(Type)} and the row key explore name set here. The name set here cannot be the same
* as any of the fields in the object type.
*
* For example, if you are storing an Object with a single string field named "id", the corresponding
* Hive table will have a schema of (rowkey binary, id string). If you set the name of the row key to "name",
* the corresponding Hive table will instead have the schema (name binary, id string).
*/
public Builder setRowKeyExploreName(String name) {
add(ROW_KEY_EXPLORE_NAME, name);
return this;
}
/**
* Sets the column type for the row key column in the corresponding Hive table for an ObjectMappedTable.
* By default, the type of the row key in your Hive table will be binary. You can set the type using this
* method. Only {@link co.cask.cdap.api.data.schema.Schema.Type#BYTES Schema.Type.BYTES} and
* {@link co.cask.cdap.api.data.schema.Schema.Type#STRING Schema.Type.STRING} are allowed.
*
* For example, if you are storing an Object with a single string field named "id", the corresponding
* Hive table will have a schema of (rowkey binary, id string). If you set the type to a string using this method,
* the corresponding Hive table will instead have the schema (rowkey string, id string).
*/
public Builder setRowKeyExploreType(Schema.Type type) {
if (type != Schema.Type.BYTES && type != Schema.Type.STRING) {
throw new IllegalArgumentException("Key type must be bytes or string.");
}
add(ROW_KEY_EXPLORE_TYPE, type.name());
return this;
}
/**
* Create a DatasetProperties from this builder.
*/
public DatasetProperties build() {
return super.build();
}
}
}