/**
* (c) Copyright 2012 WibiData, Inc.
*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kiji.mapreduce.kvstore.lib;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.io.File;
import java.io.IOException;
import com.google.common.collect.Lists;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.fs.Path;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.kiji.mapreduce.kvstore.KeyValueStoreReader;
import org.kiji.schema.KijiClientTest;
import org.kiji.schema.avro.Node;
public class TestAvroRecordKeyValueStore extends KijiClientTest {
private static final Logger LOG = LoggerFactory.getLogger(TestAvroRecordKeyValueStore.class);
/** The path to an existing test avro file of specific records (Nodes). */
public static final String NODE_AVRO_FILE = "org/kiji/mapreduce/kvstore/simple.avro";
@Test
public void testSpecificAvroRecordKeyValueStore() throws IOException, InterruptedException {
final Path avroFilePath =
new Path(getClass().getClassLoader().getResource(NODE_AVRO_FILE).toString());
final AvroRecordKeyValueStore<CharSequence, Node> store = AvroRecordKeyValueStore
.builder()
.withConfiguration(getConf())
.withInputPath(avroFilePath)
.withReaderSchema(Node.SCHEMA$)
.withKeyFieldName("label")
.build();
final KeyValueStoreReader<CharSequence, Node> reader = store.open();
try {
assertTrue(reader.containsKey("foo"));
assertEquals("foo", reader.get("foo").getLabel().toString());
assertTrue(reader.containsKey("hello"));
assertEquals("hello", reader.get("hello").getLabel().toString());
assertFalse(reader.containsKey("does-not-exist"));
} finally {
reader.close();
}
}
/**
* Returns a schema to use for generic records.
*
* @return a Schema to use for files containing generic records.
*/
private Schema getGenericSchema() {
Schema schema = Schema.createRecord("record", null, null, false);
schema.setFields(Lists.newArrayList(
new Schema.Field("key", Schema.create(Schema.Type.INT), null, null),
new Schema.Field("blah", Schema.create(Schema.Type.STRING), null, null),
new Schema.Field("value", Schema.create(Schema.Type.STRING), null, null)));
return schema;
}
/** Writes an avro file of generic records with a 'key', 'blah', and 'value' field. */
private Path writeGenericRecordAvroFile() throws IOException {
// Open a writer.
final File file = new File(getLocalTempDir(), "generic.avro");
final Schema writerSchema = getGenericSchema();
final DataFileWriter<GenericRecord> fileWriter =
new DataFileWriter<GenericRecord>(new GenericDatumWriter<GenericRecord>(writerSchema))
.create(writerSchema, file);
try {
// Write a record.
GenericData.Record record = new GenericData.Record(writerSchema);
record.put("key", 1);
record.put("blah", "blah");
record.put("value", "one");
fileWriter.append(record);
// Write another record.
record = new GenericData.Record(writerSchema);
record.put("key", 2);
record.put("blah", "blah");
record.put("value", "two");
fileWriter.append(record);
// Write a duplicate record with the same key field value.
record = new GenericData.Record(writerSchema);
record.put("key", 2);
record.put("blah", "blah");
record.put("value", "deux");
fileWriter.append(record);
} finally {
// Close it and return the path.
fileWriter.close();
}
return new Path(file.getPath());
}
/** Writes an Avro file containing additional keys and values. */
private Path writeSecondAvroFile() throws IOException {
// Open a writer.
final File file = new File(getLocalTempDir(), "generic2.avro");
final Schema writerSchema = getGenericSchema();
final DataFileWriter<GenericRecord> fileWriter =
new DataFileWriter<GenericRecord>(new GenericDatumWriter<GenericRecord>(writerSchema))
.create(writerSchema, file);
try {
// Write a record.
GenericData.Record record = new GenericData.Record(writerSchema);
record.put("key", 3);
record.put("blah", "blergh");
record.put("value", "three");
fileWriter.append(record);
// Write another record that shadows a record in the first file.
record = new GenericData.Record(writerSchema);
record.put("key", 2);
record.put("blah", "blah");
record.put("value", "TWOTWO");
fileWriter.append(record);
} finally {
// Close it and return the path.
fileWriter.close();
}
return new Path(file.getPath());
}
@Test
public void testGenericAvroRecordKeyValueStore() throws IOException, InterruptedException {
// Only read the key and value fields (skip the 'blah' field).
final Schema readerSchema = Schema.createRecord("record", null, null, false);
readerSchema.setFields(Lists.newArrayList(
new Schema.Field("key", Schema.create(Schema.Type.INT), null, null),
new Schema.Field("value", Schema.create(Schema.Type.STRING), null, null)));
// Open the store.
final Path avroFilePath = writeGenericRecordAvroFile();
final AvroRecordKeyValueStore<Integer, GenericRecord> store = AvroRecordKeyValueStore
.builder()
.withConfiguration(getConf())
.withInputPath(avroFilePath)
.withReaderSchema(readerSchema)
.withKeyFieldName("key")
.build();
final KeyValueStoreReader<Integer, GenericRecord> reader = store.open();
try {
assertTrue(reader.containsKey(1));
assertEquals("one", reader.get(1).get("value").toString());
assertTrue(reader.containsKey(2));
assertEquals("The first record in the file with key 2 should be mapped as the value.",
"two", reader.get(2).get("value").toString());
} finally {
reader.close();
}
}
@Test
public void testAvroRecordKVStoreWithoutSchema() throws IOException, InterruptedException {
// Open the store.
final Path avroFilePath = writeGenericRecordAvroFile();
final AvroRecordKeyValueStore<Integer, GenericRecord> store = AvroRecordKeyValueStore
.builder()
.withConfiguration(getConf())
.withInputPath(avroFilePath)
.withKeyFieldName("key")
.build();
final KeyValueStoreReader<Integer, GenericRecord> reader = store.open();
try {
assertTrue(reader.containsKey(1));
assertEquals("one", reader.get(1).get("value").toString());
assertTrue(reader.containsKey(2));
assertEquals("The first record in the file with key 2 should be mapped as the value.",
"two", reader.get(2).get("value").toString());
} finally {
reader.close();
}
}
@Test
public void testMultipleInputFiles() throws IOException, InterruptedException {
final Path avroFilePath = writeGenericRecordAvroFile();
final Path secondPath = writeSecondAvroFile();
final AvroRecordKeyValueStore<Integer, GenericRecord> store = AvroRecordKeyValueStore
.builder()
.withConfiguration(getConf())
.withInputPath(avroFilePath)
.withInputPath(secondPath)
.withKeyFieldName("key")
.build();
final KeyValueStoreReader<Integer, GenericRecord> reader = store.open();
try {
assertTrue(reader.containsKey(1));
assertEquals("one", reader.get(1).get("value").toString());
assertTrue(reader.containsKey(3));
assertEquals("three", reader.get(3).get("value").toString());
assertTrue(reader.containsKey(2));
assertEquals("The first record in the first file with key 2 should be mapped as the value.",
"two", reader.get(2).get("value").toString());
} finally {
reader.close();
}
}
@Test
public void testExpandInputDir() throws IOException, InterruptedException {
// Test that we can specify the directory name and that will be sufficient,
// we don't need to name both input files.
writeGenericRecordAvroFile();
writeSecondAvroFile();
final Path temporaryPath = new Path("file:" + getLocalTempDir());
final AvroRecordKeyValueStore<Integer, GenericRecord> store = AvroRecordKeyValueStore
.builder()
.withConfiguration(getConf())
.withInputPath(temporaryPath)
.withKeyFieldName("key")
.build();
final KeyValueStoreReader<Integer, GenericRecord> reader = store.open();
try {
// Check that keys from both files map to their respective values.
// Since we're not specifying ordering on the files within the directory
// when we add them to the store, we can't make an assertion about the
// correct value of reader.get(2); this could be "two" or "TWOTWO" depending
// on the filesystem.
assertTrue(reader.containsKey(1));
assertEquals("one", reader.get(1).get("value").toString());
assertTrue(reader.containsKey(3));
assertEquals("three", reader.get(3).get("value").toString());
assertTrue(reader.containsKey(2));
} finally {
reader.close();
}
}
@Test
public void testExpandInputGlob() throws IOException, InterruptedException {
// Test that we can specify the a glob of files and that these will expand
// to all the input file names.
writeGenericRecordAvroFile();
writeSecondAvroFile();
final Path glob = new Path("file:" + getLocalTempDir(), "*.avro");
LOG.info("Using input glob: {}", glob);
final AvroRecordKeyValueStore<Integer, GenericRecord> store = AvroRecordKeyValueStore
.builder()
.withConfiguration(getConf())
.withInputPath(glob)
.withKeyFieldName("key")
.build();
final KeyValueStoreReader<Integer, GenericRecord> reader = store.open();
try {
// Check that keys from both files map to their respective values.
// Since we're not specifying ordering on the files within the glob
// when we add them to the store, we can't make an assertion about the
// correct value of reader.get(2); this could be "two" or "TWOTWO" depending
// on the filesystem.
assertTrue(reader.containsKey(1));
assertEquals("one", reader.get(1).get("value").toString());
assertTrue(reader.containsKey(3));
assertEquals("three", reader.get(3).get("value").toString());
assertTrue(reader.containsKey(2));
} finally {
reader.close();
}
}
}