/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. */ package org.apache.avro.hadoop.file; import static org.junit.Assert.*; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.avro.AvroRuntimeException; import org.apache.avro.Schema; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericRecord; import org.apache.avro.reflect.ReflectData; import org.apache.avro.specific.SpecificData; import org.apache.avro.hadoop.io.AvroKeyValue; import org.apache.avro.mapred.FsInput; import org.apache.avro.io.DatumReader; import org.apache.avro.file.CodecFactory; import org.apache.avro.file.FileReader; import org.apache.avro.file.DataFileReader; import org.apache.avro.util.Utf8; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class TestSortedKeyValueFile { private static final Logger LOG = LoggerFactory.getLogger(TestSortedKeyValueFile.class); @Rule public TemporaryFolder mTempDir = new TemporaryFolder(); @Test(expected=IllegalArgumentException.class) public void testWriteOutOfSortedOrder() throws IOException { LOG.debug("Writing some records to a SortedKeyValueFile..."); Configuration conf = new Configuration(); SortedKeyValueFile.Writer.Options options = new SortedKeyValueFile.Writer.Options() .withKeySchema(Schema.create(Schema.Type.STRING)) .withValueSchema(Schema.create(Schema.Type.STRING)) .withConfiguration(conf) .withPath(new Path(mTempDir.getRoot().getPath(), "myfile")) .withIndexInterval(2); // Index every other record. SortedKeyValueFile.Writer<CharSequence, CharSequence> writer = new SortedKeyValueFile.Writer<CharSequence, CharSequence>(options); Utf8 key = new Utf8(); // re-use key, to test copied try { writer.append(key.set("banana"), "Banana"); writer.append(key.set("apple"), "Apple"); // Ruh, roh! } finally { writer.close(); } } @Test public void testNamedCodecs() throws IOException { Configuration conf = new Configuration(); Path myfile = new Path(mTempDir.getRoot().getPath(), "myfile"); Schema key = Schema.create(Schema.Type.STRING); Schema value = Schema.create(Schema.Type.STRING); Schema recordSchema = AvroKeyValue.getSchema(key, value); DatumReader<GenericRecord> datumReader = SpecificData.get().createDatumReader(recordSchema); DataFileReader<GenericRecord> reader; SortedKeyValueFile.Writer.Options options = new SortedKeyValueFile.Writer.Options() .withKeySchema(key) .withValueSchema(value) .withConfiguration(conf) .withPath(myfile); SortedKeyValueFile.Writer<CharSequence, CharSequence> writer; for(String codec : new String[]{"null", "deflate", "snappy", "bzip2"}) { LOG.debug("Using " + codec + "codec for a SortedKeyValueFile..."); options.withCodec(codec); writer = new SortedKeyValueFile.Writer<CharSequence, CharSequence>(options); writer.close(); reader = new DataFileReader<GenericRecord>( new FsInput(new Path(myfile,SortedKeyValueFile.DATA_FILENAME), conf), datumReader); assertEquals(codec, reader.getMetaString("avro.codec")); reader.close(); } } @Test public void testDeflateClassCodec() throws IOException { Configuration conf = new Configuration(); Path myfile = new Path(mTempDir.getRoot().getPath(), "myfile"); Schema key = Schema.create(Schema.Type.STRING); Schema value = Schema.create(Schema.Type.STRING); Schema recordSchema = AvroKeyValue.getSchema(key, value); DatumReader<GenericRecord> datumReader = SpecificData.get().createDatumReader(recordSchema); DataFileReader<GenericRecord> reader; LOG.debug("Using CodecFactory.deflateCodec() for a SortedKeyValueFile..."); SortedKeyValueFile.Writer.Options options = new SortedKeyValueFile.Writer.Options() .withKeySchema(key) .withValueSchema(value) .withConfiguration(conf) .withPath(myfile) .withCodec(CodecFactory.deflateCodec(9)); SortedKeyValueFile.Writer<CharSequence, CharSequence> writer = new SortedKeyValueFile.Writer<CharSequence, CharSequence>(options); writer.close(); reader = new DataFileReader<GenericRecord>( new FsInput(new Path(myfile,SortedKeyValueFile.DATA_FILENAME), conf), datumReader); assertEquals("deflate", reader.getMetaString("avro.codec")); reader.close(); } @Test public void testBadCodec() throws IOException { LOG.debug("Using a bad codec for a SortedKeyValueFile..."); try { SortedKeyValueFile.Writer.Options options = new SortedKeyValueFile.Writer.Options().withCodec("foobar"); } catch (AvroRuntimeException e) { assertEquals("Unrecognized codec: foobar", e.getMessage()); } } @Test public void testWriter() throws IOException { LOG.debug("Writing some records to a SortedKeyValueFile..."); Configuration conf = new Configuration(); SortedKeyValueFile.Writer.Options options = new SortedKeyValueFile.Writer.Options() .withKeySchema(Schema.create(Schema.Type.STRING)) .withValueSchema(Schema.create(Schema.Type.STRING)) .withConfiguration(conf) .withPath(new Path(mTempDir.getRoot().getPath(), "myfile")) .withIndexInterval(2); // Index every other record. SortedKeyValueFile.Writer<CharSequence, CharSequence> writer = new SortedKeyValueFile.Writer<CharSequence, CharSequence>(options); try { writer.append("apple", "Apple"); // Will be indexed. writer.append("banana", "Banana"); writer.append("carrot", "Carrot"); // Will be indexed. writer.append("durian", "Durian"); } finally { writer.close(); } LOG.debug("Checking the generated directory..."); File directory = new File(mTempDir.getRoot().getPath(), "myfile"); assertTrue(directory.exists()); LOG.debug("Checking the generated index file..."); File indexFile = new File(directory, SortedKeyValueFile.INDEX_FILENAME); DatumReader<GenericRecord> indexReader = new GenericDatumReader<GenericRecord>( AvroKeyValue.getSchema(options.getKeySchema(), Schema.create(Schema.Type.LONG))); FileReader<GenericRecord> indexFileReader = DataFileReader.openReader(indexFile, indexReader); List<AvroKeyValue<CharSequence, Long>> indexRecords = new ArrayList<AvroKeyValue<CharSequence, Long>>(); try { for (GenericRecord indexRecord : indexFileReader) { indexRecords.add(new AvroKeyValue<CharSequence, Long>(indexRecord)); } } finally { indexFileReader.close(); } assertEquals(2, indexRecords.size()); assertEquals("apple", indexRecords.get(0).getKey().toString()); LOG.debug("apple's position in the file: " + indexRecords.get(0).getValue()); assertEquals("carrot", indexRecords.get(1).getKey().toString()); LOG.debug("carrot's position in the file: " + indexRecords.get(1).getValue()); LOG.debug("Checking the generated data file..."); File dataFile = new File(directory, SortedKeyValueFile.DATA_FILENAME); DatumReader<GenericRecord> dataReader = new GenericDatumReader<GenericRecord>( AvroKeyValue.getSchema(options.getKeySchema(), options.getValueSchema())); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(dataFile, dataReader); try { dataFileReader.seek(indexRecords.get(0).getValue()); assertTrue(dataFileReader.hasNext()); AvroKeyValue<CharSequence, CharSequence> appleRecord = new AvroKeyValue<CharSequence, CharSequence>(dataFileReader.next()); assertEquals("apple", appleRecord.getKey().toString()); assertEquals("Apple", appleRecord.getValue().toString()); dataFileReader.seek(indexRecords.get(1).getValue()); assertTrue(dataFileReader.hasNext()); AvroKeyValue<CharSequence, CharSequence> carrotRecord = new AvroKeyValue<CharSequence, CharSequence>(dataFileReader.next()); assertEquals("carrot", carrotRecord.getKey().toString()); assertEquals("Carrot", carrotRecord.getValue().toString()); assertTrue(dataFileReader.hasNext()); AvroKeyValue<CharSequence, CharSequence> durianRecord = new AvroKeyValue<CharSequence, CharSequence>(dataFileReader.next()); assertEquals("durian", durianRecord.getKey().toString()); assertEquals("Durian", durianRecord.getValue().toString()); } finally { dataFileReader.close(); } } @Test public void testReader() throws IOException { Configuration conf = new Configuration(); SortedKeyValueFile.Writer.Options writerOptions = new SortedKeyValueFile.Writer.Options() .withKeySchema(Schema.create(Schema.Type.STRING)) .withValueSchema(Schema.create(Schema.Type.STRING)) .withConfiguration(conf) .withPath(new Path(mTempDir.getRoot().getPath(), "myfile")) .withIndexInterval(2); // Index every other record. SortedKeyValueFile.Writer<CharSequence, CharSequence> writer = new SortedKeyValueFile.Writer<CharSequence, CharSequence>(writerOptions); try { writer.append("apple", "Apple"); // Will be indexed. writer.append("banana", "Banana"); writer.append("carrot", "Carrot"); // Will be indexed. writer.append("durian", "Durian"); } finally { writer.close(); } LOG.debug("Reading the file back using a reader..."); SortedKeyValueFile.Reader.Options readerOptions = new SortedKeyValueFile.Reader.Options() .withKeySchema(Schema.create(Schema.Type.STRING)) .withValueSchema(Schema.create(Schema.Type.STRING)) .withConfiguration(conf) .withPath(new Path(mTempDir.getRoot().getPath(), "myfile")); SortedKeyValueFile.Reader<CharSequence, CharSequence> reader = new SortedKeyValueFile.Reader<CharSequence, CharSequence>(readerOptions); try { assertEquals("Carrot", reader.get("carrot").toString()); assertEquals("Banana", reader.get("banana").toString()); assertNull(reader.get("a-vegetable")); assertNull(reader.get("beet")); assertNull(reader.get("zzz")); } finally { reader.close(); } } public static class Stringy implements Comparable<Stringy> { private String s; public Stringy() {}; public Stringy(String s) { this.s = s; } @Override public String toString() { return s; } @Override public int hashCode() { return s.hashCode(); } @Override public boolean equals(Object that) { return this.s.equals(that.toString()); } @Override public int compareTo(Stringy that) { return this.s.compareTo(that.s); } } @Test public void testAlternateModel() throws Exception { LOG.debug("Writing some reflect records..."); ReflectData model = ReflectData.get(); Configuration conf = new Configuration(); SortedKeyValueFile.Writer.Options options = new SortedKeyValueFile.Writer.Options() .withKeySchema(model.getSchema(Stringy.class)) .withValueSchema(model.getSchema(Stringy.class)) .withConfiguration(conf) .withPath(new Path(mTempDir.getRoot().getPath(), "reflect")) .withDataModel(model) .withIndexInterval(2); SortedKeyValueFile.Writer<Stringy,Stringy> writer = new SortedKeyValueFile.Writer<Stringy,Stringy>(options); try { writer.append(new Stringy("apple"), new Stringy("Apple")); writer.append(new Stringy("banana"), new Stringy("Banana")); writer.append(new Stringy("carrot"), new Stringy("Carrot")); writer.append(new Stringy("durian"), new Stringy("Durian")); } finally { writer.close(); } LOG.debug("Reading the file back using a reader..."); SortedKeyValueFile.Reader.Options readerOptions = new SortedKeyValueFile.Reader.Options() .withKeySchema(model.getSchema(Stringy.class)) .withValueSchema(model.getSchema(Stringy.class)) .withConfiguration(conf) .withPath(new Path(mTempDir.getRoot().getPath(), "reflect")) .withDataModel(model); SortedKeyValueFile.Reader<Stringy,Stringy> reader = new SortedKeyValueFile.Reader<Stringy,Stringy>(readerOptions); try { assertEquals(new Stringy("Carrot"), reader.get(new Stringy("carrot"))); assertEquals(new Stringy("Banana"), reader.get(new Stringy("banana"))); assertNull(reader.get(new Stringy("a-vegetable"))); assertNull(reader.get(new Stringy("beet"))); assertNull(reader.get(new Stringy("zzz"))); } finally { reader.close(); } } }