/**
* Copyright 2013 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kitesdk.data.spi.filesystem;
import com.google.common.base.Objects;
import com.google.common.io.Closeables;
import java.io.IOException;
import java.util.Arrays;
import org.apache.avro.Schema;
import org.apache.avro.file.CodecFactory;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.reflect.ReflectDatumWriter;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.kitesdk.compat.Hadoop;
import org.kitesdk.data.CompressionType;
import org.kitesdk.data.DatasetRecordException;
import org.kitesdk.data.Formats;
class AvroAppender<E> implements FileSystemWriter.FileAppender<E> {
private final Schema schema;
private final FileSystem fileSystem;
private final Path path;
private final boolean enableCompression;
private final CompressionType compressionType;
private FSDataOutputStream out = null;
private DataFileWriter<E> dataFileWriter = null;
private DatumWriter<E> writer = null;
public AvroAppender(FileSystem fileSystem, Path path, Schema schema,
CompressionType compressionType) {
this.fileSystem = fileSystem;
this.path = path;
this.schema = schema;
this.enableCompression = compressionType != CompressionType.Uncompressed;
this.compressionType = compressionType;
}
@Override
public void open() throws IOException {
writer = new ReflectDatumWriter<E>();
dataFileWriter = new DataFileWriter<E>(writer);
if (enableCompression) {
dataFileWriter.setCodec(getCodecFactory());
}
out = fileSystem.create(path, true);
dataFileWriter.create(schema, out);
}
@Override
public void append(E entity) throws IOException {
try {
dataFileWriter.append(entity);
} catch (DataFileWriter.AppendWriteException e) {
throw new DatasetRecordException("Failed to append record", e);
}
}
@Override
public long pos() throws IOException {
return out.getPos();
}
@Override
public void flush() throws IOException {
// Avro sync forces the end of the current block so the data is recoverable
dataFileWriter.flush();
Hadoop.FSDataOutputStream.hflush.invoke(out);
}
@Override
public void sync() throws IOException {
flush();
Hadoop.FSDataOutputStream.hsync.invoke(out);
}
@Override
public void close() throws IOException {
Closeables.close(dataFileWriter, false);
}
@Override
public void cleanup() throws IOException {
// No cleanup tasks needed
}
@Override
public String toString() {
return Objects.toStringHelper(this)
.add("path", path)
.add("schema", schema)
.add("fileSystem", fileSystem)
.add("enableCompression", enableCompression)
.add("dataFileWriter", dataFileWriter)
.add("writer", writer)
.toString();
}
private CodecFactory getCodecFactory() {
switch (compressionType) {
case Snappy:
return CodecFactory.snappyCodec();
case Deflate:
return CodecFactory.deflateCodec(9);
case Bzip2:
return CodecFactory.bzip2Codec();
default:
throw new IllegalArgumentException(String.format(
"Unsupported compression format %s. Supported formats: %s",
compressionType.getName(), Arrays.toString(
Formats.AVRO.getSupportedCompressionTypes().toArray())));
}
}
}