/*
* Copyright © 2014 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.api.data.format;
import co.cask.cdap.api.annotation.Beta;
import co.cask.cdap.api.data.schema.Schema;
import co.cask.cdap.api.data.schema.UnsupportedTypeException;
import java.util.Collections;
import java.util.Map;
import javax.annotation.Nullable;
/**
* Interface specifying how to read data in some format into java objects.
* A format implies at least a default schema, which may be as simple as a byte array.
*
* @param <FROM> the raw data to read from.
* @param <TO> object to format the data into.
*/
@Beta
public abstract class RecordFormat<FROM, TO> {
protected Schema schema;
/**
* Read data from the input format to the output type.
*
* @param input input object to read.
* @return formatted input.
* @throws UnexpectedFormatException if the input object could not be read because it is of an unexpected format.
*/
public abstract TO read(FROM input) throws UnexpectedFormatException;
/**
* Get the default schema for the format. The default is used if no schema is provided in the call
* to {@link #initialize(FormatSpecification)}. Should return null if there is no default schema, meaning
* a schema must be provided during initialization of the format.
*
* @return the default schema for the format, or null if a schema must be provided to the format
*/
protected abstract Schema getDefaultSchema();
/**
* Validate the given schema, throwing an exception if it is unsupported. It can be assumed that the input schema
* is not null and is a record of at least one field.
*
* @param schema the schema to validate for the format
* @throws UnsupportedTypeException if the schema not supported
*/
protected abstract void validateSchema(Schema schema) throws UnsupportedTypeException;
/**
* Initialize the format with the given desired schema and properties.
* Guaranteed to be called once before any other method is called.
*
* @param formatSpecification the specification for the format, containing the desired schema and settings
* @throws UnsupportedTypeException if the desired schema and properties are not supported
*/
public void initialize(@Nullable FormatSpecification formatSpecification) throws UnsupportedTypeException {
Schema desiredSchema = null;
Map<String, String> settings = Collections.emptyMap();
if (formatSpecification != null) {
desiredSchema = formatSpecification.getSchema();
settings = formatSpecification.getSettings();
}
desiredSchema = desiredSchema == null ? getDefaultSchema() : desiredSchema;
if (desiredSchema == null) {
String msg = "A schema must be provided to the format: ";
if (formatSpecification != null) {
msg += formatSpecification.getName();
}
throw new UnsupportedTypeException(msg);
}
validateIsRecord(desiredSchema);
validateSchema(desiredSchema);
this.schema = desiredSchema;
configure(settings);
}
/**
* Configure the format with the given properties. Guaranteed to be called once before any call to
* {@link #read(Object)} is made, and after a schema for the format has been set.
*
* @param settings the settings to configure the format with
*/
protected void configure(Map<String, String> settings) {
// do nothing by default
}
/**
* Get the schema of the format.
*
* @return the schema of the format.
*/
public Schema getSchema() {
return schema;
}
private void validateIsRecord(Schema schema) throws UnsupportedTypeException {
if (schema.getType() != Schema.Type.RECORD || schema.getFields().size() < 1) {
throw new UnsupportedTypeException("Schema must be a record with at least one field.");
}
}
}