/*
* Copyright 2013 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kitesdk.data.spi.filesystem;
import com.google.common.collect.Lists;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData.Record;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.junit.Assert;
import org.junit.Test;
import org.kitesdk.data.DatasetDescriptor;
import org.kitesdk.data.DatasetReader;
import org.kitesdk.data.Flushable;
import org.kitesdk.data.Syncable;
import org.kitesdk.data.spi.ReaderWriterState;
public class TestAvroWriter extends TestFileSystemWriters {
@Override
public FileSystemWriter<Record> newWriter(Path directory, Schema datasetSchema, Schema writerSchema) {
return FileSystemWriter.newWriter(fs, directory, 100, 2 * 1024 * 1024,
new DatasetDescriptor.Builder()
.property(
"kite.writer.roll-interval-seconds", String.valueOf(10))
.property(
"kite.writer.target-file-size",
String.valueOf(32 * 1024 * 1024)) // 32 MB
.schema(datasetSchema)
.format("avro")
.build(), writerSchema);
}
@Override
public DatasetReader<Record> newReader(Path path, Schema schema) {
return new FileSystemDatasetReader<Record>(fs, path, schema, Record.class);
}
@Test
public void testIsFlushable() {
Assert.assertTrue(fsWriter instanceof Flushable);
}
@Test
public void testIsSyncable() {
Assert.assertTrue(fsWriter instanceof Syncable);
}
@Test
public void testCommitFlushedRecords() throws IOException {
init(fsWriter);
List<Record> written = Lists.newArrayList();
long i;
for (i = 0; i < 10000; i += 1) {
Record record = record(i, "test-" + i);
fsWriter.write(record);
written.add(record);
}
((Flushable) fsWriter).flush();
for (i = 10000; i < 11000; i += 1) {
fsWriter.write(record(i, "test-" + i));
}
// put the writer into an error state, simulating either:
// 1. A failed record with an IOException or unknown RuntimeException
// 2. A failed flush or sync for IncrementableWriters
fsWriter.state = ReaderWriterState.ERROR;
fsWriter.close();
FileStatus[] stats = fs.listStatus(testDirectory, PathFilters.notHidden());
Assert.assertEquals("Should contain a visible data file", 1, stats.length);
DatasetReader<Record> reader = newReader(stats[0].getPath(), TEST_SCHEMA);
Assert.assertEquals("Should match written records",
written, Lists.newArrayList((Iterator) init(reader)));
}
}