/*
* Copyright © 2014-2015 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.api.dataset.lib;
import co.cask.cdap.api.dataset.DatasetProperties;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
/**
* Helper to build properties for files datasets.
*/
public class FileSetProperties {
/**
* The base path of the dataset.
*/
public static final String BASE_PATH = "base.path";
/**
* Whether the files in the dataset are "external", that is, managed by an external process.
*/
public static final String DATA_EXTERNAL = "data.external";
/**
* The name of the input format class.
*/
public static final String INPUT_FORMAT = "input.format";
/**
* The name of the output format class.
*/
public static final String OUTPUT_FORMAT = "output.format";
/**
* Prefix for additional properties for the input format. They are added to the
* Hadoop configuration, with the prefix stripped from the name.
*/
public static final String INPUT_PROPERTIES_PREFIX = "input.properties.";
/**
* Prefix for additional properties for the output format. They are added to the
* Hadoop configuration, with the prefix stripped from the name.
*/
public static final String OUTPUT_PROPERTIES_PREFIX = "output.properties.";
/**
* Whether this dataset should be enabled for explore.
*/
public static final String PROPERTY_ENABLE_EXPLORE_ON_CREATE = "explore.enabled";
/**
* The format to use for the explore table. Currently, only text is supported.
*/
public static final String PROPERTY_EXPLORE_FORMAT = "explore.format";
/**
* The schema to use for the explore table. This should have the form: column type, ...
*/
public static final String PROPERTY_EXPLORE_SCHEMA = "explore.schema";
/**
* The serde to use for the Hive table.
*/
public static final String PROPERTY_EXPLORE_SERDE = "explore.serde";
/**
* The input format to use for the Hive table.
*/
public static final String PROPERTY_EXPLORE_INPUT_FORMAT = "explore.input.format";
/**
* The output format to use for the Hive table.
*/
public static final String PROPERTY_EXPLORE_OUTPUT_FORMAT = "explore.output.format";
/**
* Prefix used to store additional table properties for Hive.
*/
public static final String PROPERTY_EXPLORE_TABLE_PROPERTY_PREFIX = "explore.table.property.";
public static Builder builder() {
return new Builder();
}
/**
* @return the base path configured in the properties.
*/
public static String getBasePath(Map<String, String> properties) {
return properties.get(BASE_PATH);
}
/**
* @return the input format configured in the properties.
*/
public static String getInputFormat(Map<String, String> properties) {
return properties.get(INPUT_FORMAT);
}
/**
* @return whether the data (the files) in this dataset are considered external.
*/
public static boolean isDataExternal(Map<String, String> properties) {
return Boolean.valueOf(properties.get(DATA_EXTERNAL));
}
/**
* @return the output format configured in the properties.
*/
public static String getOutputFormat(Map<String, String> properties) {
return properties.get(OUTPUT_FORMAT);
}
/**
* @return the input format properties configured in the properties.
*/
public static Map<String, String> getInputProperties(Map<String, String> properties) {
return propertiesWithPrefix(properties, INPUT_PROPERTIES_PREFIX);
}
/**
* @return the output format properties configured in the properties.
*/
public static Map<String, String> getOutputProperties(Map<String, String> properties) {
return propertiesWithPrefix(properties, OUTPUT_PROPERTIES_PREFIX);
}
/**
* @return whether explore is enabled by the properties.
*/
public static boolean isExploreEnabled(Map<String, String> properties) {
// Boolean.valueOf returns false if the value is null
return Boolean.valueOf(properties.get(PROPERTY_ENABLE_EXPLORE_ON_CREATE));
}
/**
* @return the format of the explore table.
*/
public static String getExploreFormat(Map<String, String> properties) {
return properties.get(PROPERTY_EXPLORE_FORMAT);
}
/**
* @return the schema of the explore table.
*/
public static String getExploreSchema(Map<String, String> properties) {
return properties.get(PROPERTY_EXPLORE_SCHEMA);
}
/**
* @return the properties for the explore format
*/
public static Map<String, String> getExploreFormatProperties(Map<String, String> properties) {
String format = getExploreFormat(properties);
if (format == null) {
return Collections.emptyMap();
}
return propertiesWithPrefix(properties, String.format("%s.%s.", PROPERTY_EXPLORE_FORMAT, format));
}
/**
* @return the class name of the serde configured in the properties.
*/
public static String getSerDe(Map<String, String> properties) {
return properties.get(PROPERTY_EXPLORE_SERDE);
}
/**
* @return the class name of the input format to be used in Hive.
* Note that this can be different than the input format used
* for the file set itself.
*/
public static String getExploreInputFormat(Map<String, String> properties) {
return properties.get(PROPERTY_EXPLORE_INPUT_FORMAT);
}
/**
* @return the class name of the output format to be used in Hive.
* Note that this can be different than the output format used
* for the file set itself.
*/
public static String getExploreOutputFormat(Map<String, String> properties) {
return properties.get(PROPERTY_EXPLORE_OUTPUT_FORMAT);
}
/**
* @return the Hive table properties configured in the properties.
*/
public static Map<String, String> getTableProperties(Map<String, String> properties) {
return propertiesWithPrefix(properties, PROPERTY_EXPLORE_TABLE_PROPERTY_PREFIX);
}
/**
* @return a map of all properties whose key begins with the given prefix, without that prefix.
*/
public static Map<String, String> propertiesWithPrefix(Map<String, String> properties, String prefix) {
Map<String, String> result = new HashMap<>();
for (Map.Entry<String, String> entry : properties.entrySet()) {
if (entry.getKey().startsWith(prefix)) {
result.put(entry.getKey().substring(prefix.length()), entry.getValue());
}
}
return result;
}
/**
* A Builder to construct properties for FileSet datasets.
*/
public static class Builder extends DatasetProperties.Builder {
private String format = null;
/**
* Package visible default constructor, to allow sub-classing by other datasets in this package.
*/
Builder() { }
/**
* Sets the base path for the file dataset.
*/
public Builder setBasePath(String path) {
add(BASE_PATH, path);
return this;
}
/**
* Configures whether the files (the data) in this fileset are managed externally.
*/
public Builder setDataExternal(boolean isExternal) {
add(DATA_EXTERNAL, Boolean.toString(isExternal));
return this;
}
/**
* Sets the output format of the file dataset.
*/
public Builder setOutputFormat(Class<?> outputFormatClass) {
setOutputFormat(outputFormatClass.getName());
return this;
}
/**
* Sets the output format of the file dataset.
*/
public Builder setOutputFormat(String className) {
add(OUTPUT_FORMAT, className);
return this;
}
/**
* Sets the input format of the file dataset.
*/
public Builder setInputFormat(Class<?> inputFormatClass) {
setInputFormat(inputFormatClass.getName());
return this;
}
/**
* Sets the input format of the file dataset.
*/
public Builder setInputFormat(String className) {
add(INPUT_FORMAT, className);
return this;
}
/**
* Sets a property for the input format of the file dataset.
*/
public Builder setInputProperty(String name, String value) {
add(INPUT_PROPERTIES_PREFIX + name, value);
return this;
}
/**
* Sets a property for the output format of the file dataset.
*/
public Builder setOutputProperty(String name, String value) {
add(OUTPUT_PROPERTIES_PREFIX + name, value);
return this;
}
/**
* Enable explore for this dataset.
*/
public Builder setEnableExploreOnCreate(boolean enabled) {
add(PROPERTY_ENABLE_EXPLORE_ON_CREATE, Boolean.toString(enabled));
return this;
}
/**
* Set the format for the Hive table.
* @param format currently, only "text" and "csv" are supported.
*/
public Builder setExploreFormat(String format) {
add(PROPERTY_EXPLORE_FORMAT, format);
this.format = format;
return this;
}
/**
* Set the schema for the Hive table.
* @param schema a Hive schema string of the form: field type, ...
*/
public Builder setExploreSchema(String schema) {
add(PROPERTY_EXPLORE_SCHEMA, schema);
return this;
}
/**
* Set a property for the table format.
* This may only be a called after setting the format using {@link #setExploreFormat}.
*/
public Builder setExploreFormatProperty(String name, String value) {
if (format == null) {
throw new IllegalStateException("explore format has not been set");
}
add(String.format("%s.%s.%s", PROPERTY_EXPLORE_FORMAT, format, name), value);
return this;
}
/**
* Set the class name of the SerDe used to create the Hive table.
*/
public Builder setSerDe(String className) {
add(PROPERTY_EXPLORE_SERDE, className);
return this;
}
/**
* Set the class name of the SerDe used to create the Hive table.
*/
public Builder setSerDe(Class<?> serde) {
return setSerDe(serde.getName());
}
/**
* Set the input format used to create the Hive table.
* Note that this can be different than the input format used
* for the file set itself.
*/
public Builder setExploreInputFormat(String className) {
add(PROPERTY_EXPLORE_INPUT_FORMAT, className);
return this;
}
/**
* Set the input format used to create the Hive table.
* Note that this can be different than the input format used
* for the file set itself.
*/
public Builder setExploreInputFormat(Class<?> inputFormat) {
return setExploreInputFormat(inputFormat.getName());
}
/**
* Set the output format used to create the Hive table.
* Note that this can be different than the output format used
* for the file set itself.
*/
public Builder setExploreOutputFormat(String className) {
add(PROPERTY_EXPLORE_OUTPUT_FORMAT, className);
return this;
}
/**
* Set the output format used to create the Hive table.
* Note that this can be different than the output format used
* for the file set itself.
*/
public Builder setExploreOutputFormat(Class<?> outputFormat) {
return setExploreOutputFormat(outputFormat.getName());
}
/**
* Set a table property to be added to the Hive table. Multiple properties can be set.
*/
public Builder setTableProperty(String name, String value) {
add(PROPERTY_EXPLORE_TABLE_PROPERTY_PREFIX + name, value);
return this;
}
/**
* Create a DatasetProperties from this builder.
*/
public DatasetProperties build() {
return super.build();
}
}
}