/**
* Copyright 2013 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.cdk.data.filesystem;
import com.cloudera.cdk.data.TestDatasetReaders;
import com.cloudera.cdk.data.DatasetReader;
import com.cloudera.cdk.data.DatasetReaderException;
import com.google.common.collect.Lists;
import com.google.common.io.Resources;
import java.io.IOException;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
import org.apache.avro.Schema.Type;
import org.apache.avro.generic.GenericData.Record;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.codehaus.jackson.node.JsonNodeFactory;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import static com.cloudera.cdk.data.filesystem.DatasetTestUtilities.*;
import org.apache.avro.generic.GenericData;
public class TestFileSystemDatasetReader extends TestDatasetReaders {
@Override
public DatasetReader newReader() throws IOException {
return new FileSystemDatasetReader<String>(
FileSystem.getLocal(new Configuration()),
new Path(Resources.getResource("data/strings-100.avro").getFile()),
STRING_SCHEMA);
}
@Override
public int getTotalRecords() {
return 100;
}
@Override
public DatasetTestUtilities.RecordValidator getValidator() {
return new DatasetTestUtilities.RecordValidator<GenericData.Record>() {
@Override
public void validate(GenericData.Record record, int recordNum) {
Assert.assertEquals(String.valueOf(recordNum), record.get("text"));
}
};
}
private FileSystem fileSystem;
@Before
public void setUp() throws IOException {
fileSystem = FileSystem.getLocal(new Configuration());
}
@Test
public void testEvolvedSchema() throws IOException {
Schema schema = Schema.createRecord("mystring", null, null, false);
schema.setFields(Lists.newArrayList(
new Field("text", Schema.create(Type.STRING), null, null),
new Field("text2", Schema.create(Type.STRING), null,
JsonNodeFactory.instance.textNode("N/A"))));
FileSystemDatasetReader<Record> reader = new FileSystemDatasetReader<Record>(
fileSystem, new Path(Resources.getResource("data/strings-100.avro")
.getFile()), schema);
checkReaderBehavior(reader, 100, new RecordValidator<Record>() {
@Override
public void validate(Record record, int recordNum) {
Assert.assertEquals(String.valueOf(recordNum), record.get("text"));
Assert.assertEquals("N/A", record.get("text2"));
}
});
}
@Test(expected = IllegalArgumentException.class)
public void testNullFileSystem() {
DatasetReader<String> reader = new FileSystemDatasetReader<String>(
null, new Path("/tmp/does-not-exist.avro"), STRING_SCHEMA);
}
@Test(expected = IllegalArgumentException.class)
public void testNullFile() {
DatasetReader<String> reader = new FileSystemDatasetReader<String>(
fileSystem, null, STRING_SCHEMA);
}
@Test(expected = DatasetReaderException.class)
public void testMissingFile() {
DatasetReader<String> reader = new FileSystemDatasetReader<String>(
fileSystem, new Path("/tmp/does-not-exist.avro"), STRING_SCHEMA);
// the reader should not fail until open()
Assert.assertNotNull(reader);
reader.open();
}
@Test(expected = DatasetReaderException.class)
public void testEmptyFile() throws IOException {
final Path emptyFile = new Path("/tmp/empty-file.avro");
// outside the try block; if this fails then it isn't correct to remove it
Assert.assertTrue("Failed to create a new empty file",
fileSystem.createNewFile(emptyFile));
try {
DatasetReader<String> reader = new FileSystemDatasetReader<String>(
fileSystem, emptyFile, STRING_SCHEMA);
// the reader should not fail until open()
Assert.assertNotNull(reader);
reader.open();
} finally {
Assert.assertTrue("Failed to clean up empty file",
fileSystem.delete(emptyFile, true));
}
}
}