/*
* Copyright © 2015 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.format;
import co.cask.cdap.api.data.format.FormatSpecification;
import co.cask.cdap.api.data.format.Formats;
import co.cask.cdap.api.data.format.RecordFormat;
import co.cask.cdap.api.data.schema.Schema;
import co.cask.cdap.api.data.schema.UnsupportedTypeException;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
import java.util.Map;
/**
* Creates {@link RecordFormat} objects given the name of a format. Names are first checked against standard names like
* "CSV" or "TSV". If they are not a standard name, they are interpreted as fully qualified class names.
*/
public final class RecordFormats {
// We may eventually want this mapping to be derived from the config.
private static final Map<String, Class<? extends RecordFormat>> NAME_CLASS_MAP =
ImmutableMap.<String, Class<? extends RecordFormat>>builder()
.put(Formats.TEXT, TextRecordFormat.class)
.put(Formats.CSV, DelimitedStringsRecordFormat.class)
.put(Formats.TSV, DelimitedStringsRecordFormat.class)
.put(Formats.AVRO, AvroRecordFormat.class)
.put(Formats.COMBINED_LOG_FORMAT, CombinedLogRecordFormat.class)
.put(Formats.GROK, GrokRecordFormat.class)
.put(Formats.SYSLOG, GrokRecordFormat.class)
.build();
private static final Map<String, Map<String, String>> NAME_SETTINGS_MAP =
ImmutableMap.<String, Map<String, String>>builder()
.put(Formats.CSV, ImmutableMap.of(DelimitedStringsRecordFormat.DELIMITER, ","))
.put(Formats.TSV, ImmutableMap.of(DelimitedStringsRecordFormat.DELIMITER, "\t"))
.put(Formats.SYSLOG, GrokRecordFormat.settings("%{SYSLOGLINE:syslogline}"))
.build();
private static final Map<String, Schema> DEFAULT_SCHEMA_MAP =
ImmutableMap.<String, Schema>builder()
.put(Formats.SYSLOG, Schema.recordOf(
"streamEvent",
Schema.Field.of("timestamp", Schema.nullableOf(Schema.of(Schema.Type.STRING))),
Schema.Field.of("logsource", Schema.nullableOf(Schema.of(Schema.Type.STRING))),
Schema.Field.of("program", Schema.nullableOf(Schema.of(Schema.Type.STRING))),
Schema.Field.of("message", Schema.nullableOf(Schema.of(Schema.Type.STRING))),
Schema.Field.of("pid", Schema.nullableOf(Schema.of(Schema.Type.STRING)))))
.build();
/**
* Create an initialized record format for the given format specification. The name in the specification is
* first checked against standard names like "CSV" or "TSV". If it is a standard name, the corresponding
* format will be created, with specification settings applied on top of default settings.
* For example, "CSV" will map to the {@link DelimitedStringsRecordFormat}, with a comma as the delimiter,
* whereas "TSV" will map to the {@link DelimitedStringsRecordFormat}, with a tab as the delimiter.
* If the name is not a standard name, it is interpreted as a class name.
*
* @param spec the specification for the format to create and initialize
* @param <FROM> Type of underlying object the format reads
* @param <TO> Type of object the format reads the underlying object into
* @return Initialized {@link RecordFormat} based on the given name
* @throws IllegalAccessException if there was an illegal access when instantiating the record format
* @throws InstantiationException if there was an exception instantiating the record format
* @throws ClassNotFoundException if the record format class could not be found
* @throws UnsupportedTypeException if the specification is not supported by the format
*/
public static <FROM, TO> RecordFormat<FROM, TO> createInitializedFormat(FormatSpecification spec)
throws IllegalAccessException, InstantiationException, ClassNotFoundException, UnsupportedTypeException {
String name = spec.getName();
// check if it's a standard class
Class<? extends RecordFormat> formatClass = NAME_CLASS_MAP.get(name.toLowerCase());
@SuppressWarnings("unchecked")
RecordFormat<FROM, TO> format = (RecordFormat<FROM, TO>) (formatClass == null ?
Class.forName(name).newInstance() : formatClass.newInstance());
// compute actual settings: use default settings if present
Map<String, String> settings;
Map<String, String> defaultSettings = NAME_SETTINGS_MAP.get(name.toLowerCase());
if (defaultSettings != null) {
settings = Maps.newHashMap(defaultSettings);
if (spec.getSettings() != null) {
settings.putAll(spec.getSettings());
}
} else {
settings = spec.getSettings();
}
// compute actual schema
Schema schema;
Schema defaultSchema = DEFAULT_SCHEMA_MAP.get(name.toLowerCase());
if (defaultSchema != null && spec.getSchema() == null) {
schema = defaultSchema;
} else {
schema = spec.getSchema();
}
FormatSpecification actualSpec = new FormatSpecification(name, schema, settings);
format.initialize(actualSpec);
return format;
}
}