/**
* Copyright 2010 TransPac Software, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.bixolabs.simpledb;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.mapred.JobConf;
import org.junit.After;
import org.junit.Test;
import cascading.flow.Flow;
import cascading.flow.FlowConnector;
import cascading.pipe.Pipe;
import cascading.scheme.SequenceFile;
import cascading.tap.Lfs;
import cascading.tap.SinkMode;
import cascading.tap.Tap;
import cascading.tuple.Fields;
import cascading.tuple.Tuple;
import cascading.tuple.TupleEntry;
import cascading.tuple.TupleEntryCollector;
import cascading.tuple.TupleEntryIterator;
import com.bixolabs.aws.SimpleDB;
import com.bixolabs.aws.TestUtils;
public class SimpleDBTapIntegrationTest {
@After
public void tearDown() {
try {
SimpleDB sdb = new SimpleDB(TestUtils.getAccessKeyID(), TestUtils.getSecretAccessKey());
List<String> domains = sdb.listDomains();
for (String domain : domains) {
if (domain.startsWith(TestUtils.TEST_DOMAIN_NAME)) {
try {
sdb.deleteDomain(domain);
} catch (Exception e) {
// ignore
}
}
}
} catch (Exception e) {
// ignore
}
}
@Test
public void testSchemeChecks() {
try {
new SimpleDBScheme(new Fields(), new Fields("a"));
fail("Exception should be thrown when scheme field is empty");
} catch (Exception e) {
}
try {
new SimpleDBScheme(new Fields("a", "b", "c"), new Fields("a", "d"));
fail("Exception should be thrown when item field isn't exactly one item");
} catch (Exception e) {
}
try {
new SimpleDBScheme(new Fields("a", "b", "c"), new Fields("d"));
fail("Exception should be thrown when item field isn't in scheme fields");
} catch (Exception e) {
}
}
@Test
public void testWritingTuples() throws Exception {
String in = "build/test/SimpleDBTapTest/testWritingTuples/in";
final int numShards = 4;
final int numItems = 10;
// There's one hidden attribute (the item hash) for each item.
// So we have have two attributes we write out ("rank", "value")
// plus the item hash attribute.
final int numAttributes = numItems * (2 + 1);
final String domainName = TestUtils.TEST_DOMAIN_NAME;
final Fields testFields = new Fields("key", "rank", "value", "extra");
Lfs lfsSource = makeSourceTuples(in, testFields, numItems);
Pipe pipe = new Pipe("test");
// Skip the "extra" field when writing out tuples, to further complicate things
Fields schemeFields = new Fields("key", "rank", "value");
SimpleDBScheme scheme = new SimpleDBScheme(schemeFields, new Fields("key"));
Tap sink = new SimpleDBTap(scheme, TestUtils.getAccessKeyID(), TestUtils.getSecretAccessKey(), domainName, numShards);
Flow flow = new FlowConnector().connect(lfsSource, sink, pipe);
flow.complete();
SimpleDB sdb = new SimpleDB(TestUtils.getAccessKeyID(), TestUtils.getSecretAccessKey());
List<String> shardNames = SimpleDBUtils.getShardNames(domainName, numShards);
int actualItems = 0;
int atualAttributeValues = 0;
int actualAttributeValuesSize = 0;
for (String shardName : shardNames) {
Map<String, String> metadata = sdb.domainMetaData(shardName);
actualItems += Integer.parseInt(metadata.get("ItemCount"));
atualAttributeValues += Integer.parseInt(metadata.get("AttributeValueCount"));
actualAttributeValuesSize += Integer.parseInt(metadata.get("AttributeValuesSizeBytes"));
}
assertEquals(numItems, actualItems);
assertEquals(numAttributes, atualAttributeValues);
// 10 ranks and 10 values and 10 item hashes
int attributeBytes = (numItems * "rank-value-0".length()) + (numItems * "value-value-0".length())
+ (numItems * 11);
assertEquals(attributeBytes, actualAttributeValuesSize);
}
@Test
public void testBatchWriting() throws Exception {
final int numItems = 25;
final int numShards = 1;
final String domainName = TestUtils.TEST_DOMAIN_NAME;
final Fields itemField = new Fields("key");
final Fields attrFields = new Fields("value");
final Fields testFields = itemField.append(attrFields);
String in = "build/test/SimpleDBTapTest/testRoundTripTypeConversion/in";
Lfs lfsSource = makeSourceTuples(in, testFields, numItems);
Pipe pipe = new Pipe("test");
SimpleDBScheme scheme = new SimpleDBScheme(testFields, itemField);
Tap sdbSink = new SimpleDBTap(scheme, TestUtils.getAccessKeyID(), TestUtils.getSecretAccessKey(), domainName, numShards);
Flow flow = new FlowConnector().connect(lfsSource, sdbSink, pipe);
flow.complete();
// Now verify how much was written out.
SimpleDB sdb = new SimpleDB(TestUtils.getAccessKeyID(), TestUtils.getSecretAccessKey());
Map<String, String> metadata = sdb.domainMetaData(SimpleDBUtils.getShardNames(domainName, 1).get(0));
assertEquals("" + numItems, metadata.get("ItemCount"));
}
@Test
public void testUpdatingItem() throws Exception {
final int numShards = 1;
final String domainName = TestUtils.TEST_DOMAIN_NAME;
final Fields testFields = new Fields("key", "value");
Pipe pipe = new Pipe("test");
SimpleDBScheme scheme = new SimpleDBScheme(testFields, new Fields("key"));
Tap sink = new SimpleDBTap(scheme, TestUtils.getAccessKeyID(), TestUtils.getSecretAccessKey(), domainName, numShards);
// Do an initial write
String in = "build/test/SimpleDBTapTest/testUpdatingItem/in";
Lfs lfs = new Lfs(new SequenceFile(testFields), in, SinkMode.REPLACE);
TupleEntryCollector write = lfs.openForWrite(new JobConf());
write.add(new Tuple("key-0", "value-0"));
write.close();
Flow flow = new FlowConnector().connect(lfs, sink, pipe);
flow.complete();
// Update that same entry with a new value
write = lfs.openForWrite(new JobConf());
write.add(new Tuple("key-0", "value-1"));
write.close();
flow = new FlowConnector().connect(lfs, sink, pipe);
flow.complete();
// Read in that item, and verify the value.
String out = "build/test/SimpleDBTapTest/testUpdatingItem/out";
Tap sdbSource = new SimpleDBTap(scheme, TestUtils.getAccessKeyID(), TestUtils.getSecretAccessKey(), domainName, numShards);
Tap lfsSink = new Lfs(new SequenceFile(testFields), out, SinkMode.REPLACE);
flow = new FlowConnector().connect(sdbSource, lfsSink, pipe);
flow.complete();
// Make sure both values are in the table now
TupleEntryIterator sinkIter = lfsSink.openForRead(new JobConf());
assertTrue(sinkIter.hasNext());
TupleEntry entry = sinkIter.next();
assertEquals("value-1", entry.getString("value"));
assertFalse(sinkIter.hasNext());
}
@Test
public void testWritingMultipleTimes() throws Exception {
final int numShards = 1;
final String domainName = TestUtils.TEST_DOMAIN_NAME;
final Fields testFields = new Fields("key", "value");
Pipe pipe = new Pipe("test");
SimpleDBScheme scheme = new SimpleDBScheme(testFields, new Fields("key"));
Tap sink = new SimpleDBTap(scheme, TestUtils.getAccessKeyID(), TestUtils.getSecretAccessKey(), domainName, numShards);
// Do an initial write
String in = "build/test/SimpleDBTapTest/testWritingMultipleTimes/in1";
Lfs lfs = new Lfs(new SequenceFile(testFields), in, SinkMode.REPLACE);
TupleEntryCollector write = lfs.openForWrite(new JobConf());
write.add(new Tuple("key-0", "value-0"));
write.close();
Flow flow = new FlowConnector().connect(lfs, sink, pipe);
flow.complete();
// Now do a second write
in = "build/test/SimpleDBTapTest/testWritingMultipleTimes/in2";
lfs = new Lfs(new SequenceFile(testFields), in, SinkMode.REPLACE);
write = lfs.openForWrite(new JobConf());
write.add(new Tuple("key-1", "value-1"));
write.close();
flow = new FlowConnector().connect(lfs, sink, pipe);
flow.complete();
final int numItems = 2;
// Make sure both values are in the table now.
SimpleDB sdb = new SimpleDB(TestUtils.getAccessKeyID(), TestUtils.getSecretAccessKey());
List<String> shardNames = SimpleDBUtils.getShardNames(domainName, numShards);
assertEquals(1, shardNames.size());
String shardName = shardNames.get(0);
Map<String, String> metadata = sdb.domainMetaData(shardName);
assertEquals("" + numItems, metadata.get("ItemCount"));
// We have two attributes per each item, the one we explicitly write
// out ("value") and the implicit item hash
assertEquals("" + (numItems * 2), metadata.get("AttributeValueCount"));
// 2 values of 7 bytes ("value-X") each, plus 2 values of 11 bytes (0-padded hash)
assertEquals("" + 36, metadata.get("AttributeValuesSizeBytes"));
for (int i = 0; i < 2; i++) {
String[] values = sdb.getAttribute(shardName, "key-" + i, "value");
assertEquals(1, values.length);
assertEquals("value-" + i, values[0]);
}
}
@Test
public void testWriteThenRead() throws Exception {
final int numShards = 1;
final int numRecords = 100;
final String domainName = TestUtils.TEST_DOMAIN_NAME;
final Fields testFields = new Fields("key", "value");
final String in = "build/test/SimpleDBTapTest/testWriteThenRead/in";
final String out = "build/test/SimpleDBTapTest/testWriteThenRead/out";
Lfs lfsSource = makeSourceTuples(in, testFields, numRecords);
Pipe pipe = new Pipe("test");
SimpleDBScheme scheme = new SimpleDBScheme(testFields, new Fields("key"));
Tap sdbSink = new SimpleDBTap(scheme, TestUtils.getAccessKeyID(), TestUtils.getSecretAccessKey(), domainName, numShards);
Flow flow = new FlowConnector().connect(lfsSource, sdbSink, pipe);
flow.complete();
// Now read back in the values.
Tap sdbSource = new SimpleDBTap(scheme, TestUtils.getAccessKeyID(), TestUtils.getSecretAccessKey(), domainName, numShards);
Tap lfsSink = new Lfs(new SequenceFile(testFields), out, SinkMode.REPLACE);
flow = new FlowConnector().connect(sdbSource, lfsSink, pipe);
flow.complete();
// Now verify that what we read in matches what we wrote out.
TupleEntryIterator sourceIter = lfsSource.openForRead(new JobConf());
List<Tuple> sourceTuples = new ArrayList<Tuple>(numRecords);
while (sourceIter.hasNext()) {
sourceTuples.add(new Tuple(sourceIter.next().getTuple()));
}
TupleEntryIterator sinkIter = lfsSink.openForRead(new JobConf());
List<Tuple> sinkTuples = new ArrayList<Tuple>(numRecords);
while (sinkIter.hasNext()) {
sinkTuples.add(new Tuple(sinkIter.next().getTuple()));
}
assertEquals(numRecords, sourceTuples.size());
// TODO CSc Uncommenting the following line (and changing the for loop below)
// causes this test to fail because sinkTuples.size() suddenly returns 102
// instead of 100 (though only when run via the command line). I modified the
// code in an attempt to find out more about the extra sink tuples, but this
// version succeeds. ARRRRGGGGGHHHHH!
// assertEquals(sourceTuples.size(), sinkTuples.size());
Comparator<Tuple> tupleComparator = new Comparator<Tuple>() {
@Override
public int compare(Tuple o1, Tuple o2) {
return o1.getString(0).compareTo(o2.getString(0));
}
};
Collections.sort(sourceTuples, tupleComparator);
Collections.sort(sinkTuples, tupleComparator);
// TODO CSc Need this version of the code to find out more about extra sink
// tuples, but unfortunately the current version succeeds.
// for (int i = 0; i < numRecords; i++) {
for (int i = 0; i < sinkTuples.size(); i++) {
if (i < sourceTuples.size()) {
assertEquals(sourceTuples.get(i), sinkTuples.get(i));
} else {
fail("Extra sink tuple:" + sinkTuples.get(i));
}
}
}
@Test
public void testRoundTripTypeConversion() throws Exception {
final int numShards = 1;
final String domainName = TestUtils.TEST_DOMAIN_NAME;
final Fields itemField = new Fields("key");
final Fields attrFields = new Fields("booleanValue", "intValue", "longValue", "doubleValue");
final Fields testFields = itemField.append(attrFields);
String in = "build/test/SimpleDBTapTest/testRoundTripTypeConversion/in";
String out = "build/test/SimpleDBTapTest/testRoundTripTypeConversion/out";
Lfs lfs = new Lfs(new SequenceFile(testFields), in, SinkMode.REPLACE);
TupleEntryCollector write = lfs.openForWrite(new JobConf());
write.add(new Tuple("key", true, 100, Long.MAX_VALUE, 1.0));
write.close();
Pipe pipe = new Pipe("test");
SimpleDBScheme scheme = new SimpleDBScheme(testFields, itemField);
Tap sdbSink = new SimpleDBTap(scheme, TestUtils.getAccessKeyID(), TestUtils.getSecretAccessKey(), domainName, numShards);
Flow flow = new FlowConnector().connect(lfs, sdbSink, pipe);
flow.complete();
// Now read back in the values.
Tap sdbSource = new SimpleDBTap(scheme, TestUtils.getAccessKeyID(), TestUtils.getSecretAccessKey(), domainName, numShards);
Tap lfsSink = new Lfs(new SequenceFile(testFields), out, SinkMode.REPLACE);
flow = new FlowConnector().connect(sdbSource, lfsSink, pipe);
flow.complete();
TupleEntryIterator sinkTuples = lfsSink.openForRead(new JobConf());
assertTrue(sinkTuples.hasNext());
TupleEntry t = sinkTuples.next();
assertEquals(true, t.getBoolean("booleanValue"));
assertEquals(100, t.getInteger("intValue"));
assertEquals(Long.MAX_VALUE, t.getLong("longValue"));
assertEquals(1.0, t.getDouble("doubleValue"), .00001);
}
@Test
public void testSelectLimit() throws Exception {
final int numShards = 5;
final int numRecords = 10;
final String domainName = TestUtils.TEST_DOMAIN_NAME;
final Fields testFields = new Fields("key", "value");
final String in = "build/test/SimpleDBTapTest/testQuery/in";
final String out = "build/test/SimpleDBTapTest/testQuery/out";
Lfs lfsSource = makeSourceTuples(in, testFields, numRecords);
Pipe pipe = new Pipe("test");
SimpleDBScheme scheme = new SimpleDBScheme(testFields, new Fields("key"));
Tap sdbSink = new SimpleDBTap(scheme, TestUtils.getAccessKeyID(), TestUtils.getSecretAccessKey(), domainName, numShards);
Flow flow = new FlowConnector().connect(lfsSource, sdbSink, pipe);
flow.complete();
// Now read back in the values, with a limit
scheme.setSelectLimit(2);
Tap sdbSource = new SimpleDBTap(scheme, TestUtils.getAccessKeyID(), TestUtils.getSecretAccessKey(), domainName, numShards);
Tap lfsSink = new Lfs(new SequenceFile(testFields), out, SinkMode.REPLACE);
flow = new FlowConnector().connect(sdbSource, lfsSink, pipe);
flow.complete();
// Make sure we got back what we expected.
TupleEntryIterator sinkTuples = lfsSink.openForRead(new JobConf());
assertTrue(sinkTuples.hasNext());
sinkTuples.next();
assertTrue(sinkTuples.hasNext());
sinkTuples.next();
assertFalse(sinkTuples.hasNext());
}
@Test
public void testQuery() throws Exception {
final int numShards = 1;
final int numRecords = 11; // 0...10
final String domainName = TestUtils.TEST_DOMAIN_NAME;
// validate XML unescaping by forcing key/value names to be escaped
final Fields testFields = new Fields("key-<&;>", "value-<&;>");
final String in = "build/test/SimpleDBTapTest/testQuery/in";
final String out = "build/test/SimpleDBTapTest/testQuery/out";
Lfs lfsSource = makeSourceTuples(in, testFields, numRecords);
Pipe pipe = new Pipe("test");
SimpleDBScheme scheme = new SimpleDBScheme(testFields, new Fields("key-<&;>"));
Tap sdbSink = new SimpleDBTap(scheme, TestUtils.getAccessKeyID(), TestUtils.getSecretAccessKey(), domainName, numShards);
Flow flow = new FlowConnector().connect(lfsSource, sdbSink, pipe);
flow.complete();
// Now read back in the values, using a query that only selects key-1 and key-11
scheme.setQuery("itemName() like 'key-<&;>-value-1%'");
Tap sdbSource = new SimpleDBTap(scheme, TestUtils.getAccessKeyID(), TestUtils.getSecretAccessKey(), domainName, numShards);
Tap lfsSink = new Lfs(new SequenceFile(testFields), out, SinkMode.REPLACE);
flow = new FlowConnector().connect(sdbSource, lfsSink, pipe);
flow.complete();
// Make sure we got back what we expected.
TupleEntryIterator sinkTuples = lfsSink.openForRead(new JobConf());
assertTrue(sinkTuples.hasNext());
TupleEntry t = sinkTuples.next();
assertTrue( "value: " + t.getString("key-<&;>"),
t.getString("key-<&;>").startsWith("key-<&;>-value-1"));
assertTrue(sinkTuples.hasNext());
t = sinkTuples.next();
assertTrue( "value: " + t.getString("key-<&;>"),
t.getString("key-<&;>").startsWith("key-<&;>-value-1"));
assertFalse(sinkTuples.hasNext());
}
private Lfs makeSourceTuples(String path, Fields fields, int numTuples) throws IOException {
Lfs lfs = new Lfs(new SequenceFile(fields), path, SinkMode.REPLACE);
TupleEntryCollector write = lfs.openForWrite(new JobConf());
for (int i = 0; i < numTuples; i++) {
Tuple t = new Tuple();
for (int j = 0; j < fields.size(); j++) {
String value = fields.get(j).toString() + "-value-" + i;
t.add(value);
}
write.add(t);
}
write.close();
return lfs;
}
}