/*
* Copyright © 2015 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.format;
import co.cask.cdap.api.common.Bytes;
import co.cask.cdap.api.data.format.StructuredRecord;
import co.cask.cdap.api.data.schema.Schema;
import co.cask.cdap.api.data.schema.UnsupportedTypeException;
import co.cask.cdap.api.flow.flowlet.StreamEvent;
import co.cask.cdap.spi.stream.AbstractStreamEventRecordFormat;
import com.google.common.base.Charsets;
import java.nio.charset.Charset;
import java.util.List;
import java.util.Map;
/**
* Stream record format that interprets the entire body as a single string.
*/
public class TextRecordFormat extends AbstractStreamEventRecordFormat<StructuredRecord> {
public static final String CHARSET = "charset";
private Charset charset = Charsets.UTF_8;
private String fieldName = "body";
@Override
public StructuredRecord read(StreamEvent event) {
String bodyAsStr = Bytes.toString(event.getBody(), charset);
return StructuredRecord.builder(schema).set(fieldName, bodyAsStr).build();
}
@Override
protected Schema getDefaultSchema() {
return Schema.recordOf("stringBody", Schema.Field.of("body", Schema.of(Schema.Type.STRING)));
}
@Override
protected void validateSchema(Schema desiredSchema) throws UnsupportedTypeException {
List<Schema.Field> fields = desiredSchema.getFields();
if (fields.size() != 1 || fields.get(0).getSchema().getType() != Schema.Type.STRING) {
throw new UnsupportedTypeException("Schema must be a record with a single string field.");
}
}
@Override
protected void configure(Map<String, String> settings) {
String charsetStr = settings.get(CHARSET);
if (charsetStr != null) {
this.charset = Charset.forName(charsetStr);
}
this.fieldName = schema.getFields().get(0).getName();
}
}