/*
* Copyright © 2015 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.format;
import co.cask.cdap.api.common.Bytes;
import co.cask.cdap.api.data.format.StructuredRecord;
import co.cask.cdap.api.data.schema.Schema;
import co.cask.cdap.api.dataset.table.Put;
import com.google.common.base.Preconditions;
import java.nio.ByteBuffer;
import javax.annotation.Nullable;
/**
* Transforms records into Puts.
*/
public class RecordPutTransformer {
private final String rowField;
private final Schema outputSchema;
public RecordPutTransformer(String rowField, @Nullable Schema outputSchema) {
if (outputSchema != null) {
validateSchema(rowField, outputSchema);
}
this.rowField = rowField;
this.outputSchema = outputSchema;
}
private void validateSchema(String rowField, Schema outputSchema) {
if (outputSchema.getType() != Schema.Type.RECORD) {
throw new IllegalArgumentException(
String.format("Schema must be a record instead of '%s'.", outputSchema.getType()));
}
Schema.Field schemaRowField = outputSchema.getField(rowField);
if (schemaRowField == null) {
throw new IllegalArgumentException("Row field must be present in the schema");
}
if (!schemaRowField.getSchema().isSimpleOrNullableSimple()) {
throw new IllegalArgumentException("Row field must be a simple type");
}
for (Schema.Field field : outputSchema.getFields()) {
if (!field.getSchema().isSimpleOrNullableSimple()) {
throw new IllegalArgumentException(
"Schema must only contain simple fields (boolean, int, long, float, double, bytes, string)");
}
}
}
public Put toPut(StructuredRecord record) {
Schema recordSchema = record.getSchema();
Preconditions.checkArgument(recordSchema.getType() == Schema.Type.RECORD, "input must be a record.");
Schema.Field keyField = getKeyField(recordSchema);
Preconditions.checkArgument(keyField != null, "Could not find key field in record.");
Put output = createPut(record, keyField);
for (Schema.Field field : recordSchema.getFields()) {
if (field.getName().equals(keyField.getName())) {
continue;
}
// Skip fields that are not present in the Output Schema
if (outputSchema != null && outputSchema.getField(field.getName()) == null) {
continue;
}
setField(output, field, record.get(field.getName()));
}
return output;
}
@SuppressWarnings("ConstantConditions")
private void setField(Put put, Schema.Field field, Object val) {
// have to handle nulls differently. In a Put object, it's only valid to use the add(byte[], byte[])
// for null values, as the other add methods take boolean vs Boolean, int vs Integer, etc.
if (field.getSchema().isNullable() && val == null) {
put.add(field.getName(), (byte[]) null);
return;
}
Schema.Type type = validateAndGetType(field);
switch (type) {
case BOOLEAN:
put.add(field.getName(), (Boolean) val);
break;
case INT:
put.add(field.getName(), (Integer) val);
break;
case LONG:
put.add(field.getName(), (Long) val);
break;
case FLOAT:
put.add(field.getName(), (Float) val);
break;
case DOUBLE:
put.add(field.getName(), (Double) val);
break;
case BYTES:
if (val instanceof ByteBuffer) {
put.add(field.getName(), Bytes.toBytes((ByteBuffer) val));
} else {
put.add(field.getName(), (byte[]) val);
}
break;
case STRING:
put.add(field.getName(), (String) val);
break;
default:
throw new IllegalArgumentException("Field " + field.getName() + " is of unsupported type " + type);
}
}
// get the non-nullable type of the field and check that it's a simple type.
private Schema.Type validateAndGetType(Schema.Field field) {
Schema.Type type;
if (field.getSchema().isNullable()) {
type = field.getSchema().getNonNullable().getType();
} else {
type = field.getSchema().getType();
}
Preconditions.checkArgument(type.isSimpleType(),
"only simple types are supported (boolean, int, long, float, double, bytes).");
return type;
}
@SuppressWarnings("ConstantConditions")
private Put createPut(StructuredRecord record, Schema.Field keyField) {
String keyFieldName = keyField.getName();
Object val = record.get(keyFieldName);
Preconditions.checkArgument(val != null, "Row key cannot be null.");
Schema.Type keyType = validateAndGetType(keyField);
switch (keyType) {
case BOOLEAN:
return new Put(Bytes.toBytes((Boolean) val));
case INT:
return new Put(Bytes.toBytes((Integer) val));
case LONG:
return new Put(Bytes.toBytes((Long) val));
case FLOAT:
return new Put(Bytes.toBytes((Float) val));
case DOUBLE:
return new Put(Bytes.toBytes((Double) val));
case BYTES:
if (val instanceof ByteBuffer) {
return new Put(Bytes.toBytes((ByteBuffer) val));
} else {
return new Put((byte[]) val);
}
case STRING:
return new Put(Bytes.toBytes((String) record.get(keyFieldName)));
default:
throw new IllegalArgumentException("Row key is of unsupported type " + keyType);
}
}
@Nullable
private Schema.Field getKeyField(Schema recordSchema) {
return recordSchema.getField(rowField);
}
}