package com.yahoo.glimmer.indexing.preprocessor;
/*
* Copyright (c) 2012 Yahoo! Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software distributed under the License is
* distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and limitations under the License.
* See accompanying LICENSE file.
*/
import static org.junit.Assert.assertEquals;
import java.io.IOException;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.Counters.Counter;
import org.apache.hadoop.mapreduce.Reducer;
import org.hamcrest.BaseMatcher;
import org.hamcrest.Description;
import org.jmock.Expectations;
import org.jmock.Mockery;
import org.jmock.Sequence;
import org.jmock.lib.legacy.ClassImposteriser;
import org.junit.Before;
import org.junit.Test;
import com.yahoo.glimmer.indexing.preprocessor.ResourceRecordWriter.OUTPUT;
import com.yahoo.glimmer.indexing.preprocessor.ResourceRecordWriter.OutputCount;
import com.yahoo.glimmer.util.BySubjectRecord;
import com.yahoo.glimmer.util.BySubjectRecord.BySubjectRecordException;
public class ResourcesReducerTest {
private Mockery context;
private Reducer<Text, Text, Text, Object>.Context mrContext;
private Counter duplicateMatchCounter;
private Counter keysCounter;
private Counter valuesCounter;
private Counter keySubjectCounter;
private Counter keyPredicateCounter;
private Counter keyObjectCounter;
private Counter keyContextCounter;
public static class OutputCountMatcher extends BaseMatcher<OutputCount> {
private final OUTPUT output;
private final int count;
public OutputCountMatcher(OUTPUT output, int count) {
super();
this.output = output;
this.count = count;
}
@Override
public boolean matches(Object object) {
if (object instanceof OutputCount) {
OutputCount outputCount = (OutputCount) object;
return outputCount.output == output && outputCount.count == count;
}
return false;
}
@Override
public void describeTo(Description desc) {
desc.appendText("OutputCount matching " + output + ", " + count);
}
}
public static class BySubjectRecordMatcher extends BaseMatcher<BySubjectRecord> {
private BySubjectRecord expectedRecord;
public BySubjectRecordMatcher set(String string) throws BySubjectRecordException {
expectedRecord = new BySubjectRecord();
byte[] bytes = string.getBytes();
expectedRecord.readFrom(bytes, 0, bytes.length);
return this;
}
@Override
public boolean matches(Object object) {
return expectedRecord.equals(object);
}
@Override
public void describeTo(Description desc) {
desc.appendText("BySubjectRecord matching " + expectedRecord);
}
}
@SuppressWarnings("unchecked")
@Before
public void before() {
context = new Mockery();
context.setImposteriser(ClassImposteriser.INSTANCE);
mrContext = context.mock(Reducer.Context.class, "mrContext");
duplicateMatchCounter = new Counter();
keysCounter = new Counter();
valuesCounter = new Counter();
keySubjectCounter = new Counter();
keyPredicateCounter = new Counter();
keyObjectCounter = new Counter();
keyContextCounter = new Counter();
}
@Test
public void subjectText() throws IOException, InterruptedException, BySubjectRecordException {
context.checking(new Expectations() {
{
one(mrContext).getCounter(ResourcesReducer.Counters.KEYS);
will(returnValue(keysCounter));
one(mrContext).write(with(new TextMatcher("http://some/subject/uri")), with(new OutputCountMatcher(OUTPUT.ALL, 0)));
allowing(mrContext).getCounter(ResourcesReducer.Counters.VALUES);
will(returnValue(valuesCounter));
allowing(mrContext).getCounter(ResourcesReducer.Counters.KEY_SUBJECT);
will(returnValue(keySubjectCounter));
one(mrContext).write(
with(new TextMatcher("http://some/subject/uri")),
with(new BySubjectRecordMatcher().set("0\t-1\thttp://some/subject/uri\t"
+ "<http://some/predicate/uri/1> <http://some/object/uri1> <http://some/context/uri1> .\t"
+ "<http://some/predicate/uri/2> <http://some/object/uri2> <http://some/context/uri2> .\t"
+ "<http://some/predicate/uri/3> \"Some literal value\" <http://some/context/uri3> .\t"
+ "<http://some/predicate/uri/4> \"Duplicate value\" <http://some/context/uri4> .\t")));
one(mrContext).getCounter(ResourcesReducer.Counters.DUPLICATE_RELATIONS);
will(returnValue(duplicateMatchCounter));
}
});
ResourcesReducer reducer = new ResourcesReducer();
Iterable<Text> values = new TextReuseIterable("<http://some/predicate/uri/1> <http://some/object/uri1> <http://some/context/uri1> .",
"<http://some/predicate/uri/2> <http://some/object/uri2> <http://some/context/uri2> .",
"<http://some/predicate/uri/3> \"Some literal value\" <http://some/context/uri3> .",
"<http://some/predicate/uri/4> \"Duplicate value\" <http://some/context/uri4> .",
"<http://some/predicate/uri/4> \"Duplicate value\" <http://some/context/uri4> .");
reducer.reduce(new Text("http://some/subject/uri"), values, mrContext);
context.assertIsSatisfied();
assertEquals(1l, duplicateMatchCounter.getValue());
assertEquals(1l, keysCounter.getValue());
assertEquals(5l, valuesCounter.getValue());
assertEquals(4l, keySubjectCounter.getValue());
assertEquals(0l, keyPredicateCounter.getValue());
assertEquals(0l, keyObjectCounter.getValue());
}
@Test
public void predicateText() throws IOException, InterruptedException {
context.checking(new Expectations() {
{
one(mrContext).getCounter(ResourcesReducer.Counters.KEYS);
will(returnValue(keysCounter));
allowing(mrContext).getCounter(ResourcesReducer.Counters.VALUES);
will(returnValue(valuesCounter));
allowing(mrContext).getCounter(ResourcesReducer.Counters.KEY_PREDICATE);
will(returnValue(keyPredicateCounter));
one(mrContext).write(with(new TextMatcher("http://some/resource/uri")), with(new OutputCountMatcher(OUTPUT.ALL, 0)));
one(mrContext).write(with(new TextMatcher("http://some/resource/uri")), with(new OutputCountMatcher(OUTPUT.PREDICATE, 2)));
}
});
ResourcesReducer reducer = new ResourcesReducer();
Iterable<Text> values = new TextReuseIterable("PREDICATE", "PREDICATE");
reducer.reduce(new Text("http://some/resource/uri"), values, mrContext);
context.assertIsSatisfied();
assertEquals(0l, duplicateMatchCounter.getValue());
assertEquals(1l, keysCounter.getValue());
assertEquals(2l, valuesCounter.getValue());
assertEquals(0l, keySubjectCounter.getValue());
assertEquals(2l, keyPredicateCounter.getValue());
assertEquals(0l, keyObjectCounter.getValue());
}
@Test
public void objectText() throws IOException, InterruptedException {
context.checking(new Expectations() {
{
one(mrContext).getCounter(ResourcesReducer.Counters.KEYS);
will(returnValue(keysCounter));
allowing(mrContext).getCounter(ResourcesReducer.Counters.VALUES);
will(returnValue(valuesCounter));
allowing(mrContext).getCounter(ResourcesReducer.Counters.KEY_OBJECT);
will(returnValue(keyObjectCounter));
allowing(mrContext).getCounter(ResourcesReducer.Counters.KEY_CONTEXT);
will(returnValue(keyContextCounter));
one(mrContext).write(with(new TextMatcher("http://some/resource/uri")), with(new OutputCountMatcher(OUTPUT.ALL, 0)));
one(mrContext).write(with(new TextMatcher("http://some/resource/uri")), with(new OutputCountMatcher(OUTPUT.OBJECT, 2)));
}
});
ResourcesReducer reducer = new ResourcesReducer();
Iterable<Text> values = new TextReuseIterable("OBJECT", "OBJECT");
reducer.reduce(new Text("http://some/resource/uri"), values, mrContext);
context.assertIsSatisfied();
assertEquals(0l, duplicateMatchCounter.getValue());
assertEquals(1l, keysCounter.getValue());
assertEquals(2l, valuesCounter.getValue());
assertEquals(0l, keySubjectCounter.getValue());
assertEquals(0l, keyPredicateCounter.getValue());
assertEquals(2l, keyObjectCounter.getValue());
assertEquals(0l, keyContextCounter.getValue());
}
@Test
public void contextText() throws IOException, InterruptedException {
context.checking(new Expectations() {
{
one(mrContext).getCounter(ResourcesReducer.Counters.KEYS);
will(returnValue(keysCounter));
allowing(mrContext).getCounter(ResourcesReducer.Counters.VALUES);
will(returnValue(valuesCounter));
allowing(mrContext).getCounter(ResourcesReducer.Counters.KEY_CONTEXT);
will(returnValue(keyContextCounter));
one(mrContext).write(with(new TextMatcher("http://some/resource/uri")), with(new OutputCountMatcher(OUTPUT.ALL, 0)));
one(mrContext).write(with(new TextMatcher("http://some/resource/uri")), with(new OutputCountMatcher(OUTPUT.CONTEXT, 4)));
}
});
ResourcesReducer reducer = new ResourcesReducer();
Iterable<Text> values = new TextReuseIterable("CONTEXT", "CONTEXT", "CONTEXT", "CONTEXT");
reducer.reduce(new Text("http://some/resource/uri"), values, mrContext);
context.assertIsSatisfied();
assertEquals(0l, duplicateMatchCounter.getValue());
assertEquals(1l, keysCounter.getValue());
assertEquals(4l, valuesCounter.getValue());
assertEquals(0l, keySubjectCounter.getValue());
assertEquals(0l, keyPredicateCounter.getValue());
assertEquals(0l, keyObjectCounter.getValue());
assertEquals(4l, keyContextCounter.getValue());
}
@Test
public void predicateObectContextText() throws IOException, InterruptedException, BySubjectRecordException {
final Sequence sequence = context.sequence("sequence");
context.checking(new Expectations() {
{
allowing(mrContext).getCounter(ResourcesReducer.Counters.KEYS);
will(returnValue(keysCounter));
allowing(mrContext).getCounter(ResourcesReducer.Counters.VALUES);
will(returnValue(valuesCounter));
allowing(mrContext).getCounter(ResourcesReducer.Counters.KEY_SUBJECT);
will(returnValue(keySubjectCounter));
allowing(mrContext).getCounter(ResourcesReducer.Counters.KEY_PREDICATE);
will(returnValue(keyPredicateCounter));
allowing(mrContext).getCounter(ResourcesReducer.Counters.KEY_OBJECT);
will(returnValue(keyObjectCounter));
allowing(mrContext).getCounter(ResourcesReducer.Counters.KEY_CONTEXT);
will(returnValue(keyContextCounter));
one(mrContext).write(with(new TextMatcher("http://some/resource/uri1")), with(new OutputCountMatcher(OUTPUT.ALL, 0)));
inSequence(sequence);
one(mrContext).write(
with(new TextMatcher("http://some/resource/uri1")),
with(new BySubjectRecordMatcher().set("0\t-1\thttp://some/resource/uri1\t<http://predicate1> <http://object1> <context> ."
+ "\t<http://predicate2> <http://object2> <context> .\t")));
inSequence(sequence);
one(mrContext).write(with(new TextMatcher("http://some/resource/uri1")), with(new OutputCountMatcher(OUTPUT.PREDICATE, 1)));
inSequence(sequence);
one(mrContext).write(with(new TextMatcher("http://some/resource/uri1")), with(new OutputCountMatcher(OUTPUT.OBJECT, 1)));
inSequence(sequence);
one(mrContext).write(with(new TextMatcher("http://some/resource/uri1")), with(new OutputCountMatcher(OUTPUT.CONTEXT, 1)));
inSequence(sequence);
one(mrContext).write(with(new TextMatcher("http://some/resource/uri2")), with(new OutputCountMatcher(OUTPUT.ALL, 0)));
inSequence(sequence);
one(mrContext).write(
with(new TextMatcher("http://some/resource/uri2")),
with(new BySubjectRecordMatcher().set("1\t0\thttp://some/resource/uri2\t<http://predicateX> <http://objectX> <context> ."
+ "\t<http://predicateY> <http://objectY> <context> .\t")));
inSequence(sequence);
}
});
ResourcesReducer reducer = new ResourcesReducer();
Iterable<Text> values = new TextReuseIterable("PREDICATE", "<http://predicate1> <http://object1> <context> .",
"<http://predicate2> <http://object2> <context> .", "OBJECT", "CONTEXT");
reducer.reduce(new Text("http://some/resource/uri1"), values, mrContext);
values = new TextReuseIterable("<http://predicateX> <http://objectX> <context> .", "<http://predicateY> <http://objectY> <context> .");
reducer.reduce(new Text("http://some/resource/uri2"), values, mrContext);
context.assertIsSatisfied();
assertEquals(0l, duplicateMatchCounter.getValue());
assertEquals(2l, keysCounter.getValue());
assertEquals(7l, valuesCounter.getValue());
assertEquals(4l, keySubjectCounter.getValue());
assertEquals(1l, keyPredicateCounter.getValue());
assertEquals(1l, keyObjectCounter.getValue());
assertEquals(1l, keyContextCounter.getValue());
}
@Test
public void subjectAndObjectText() throws IOException, InterruptedException, BySubjectRecordException {
context.checking(new Expectations() {
{
allowing(mrContext).getCounter(ResourcesReducer.Counters.KEYS);
will(returnValue(keysCounter));
allowing(mrContext).getCounter(ResourcesReducer.Counters.VALUES);
will(returnValue(valuesCounter));
allowing(mrContext).getCounter(ResourcesReducer.Counters.KEY_SUBJECT);
will(returnValue(keySubjectCounter));
allowing(mrContext).getCounter(ResourcesReducer.Counters.KEY_OBJECT);
will(returnValue(keyObjectCounter));
allowing(mrContext).getCounter(ResourcesReducer.Counters.KEY_CONTEXT);
will(returnValue(keyContextCounter));
one(mrContext).write(with(new TextMatcher("bnodeSubject1")), with(new OutputCountMatcher(OUTPUT.ALL, 0)));
one(mrContext).write(
with(new TextMatcher("bnodeSubject1")),
with(new BySubjectRecordMatcher()
.set("0\t-1\tbnodeSubject1\t<http://some/predicate/uri/1> <http://some/object/uri1> <http://some/context/uri1> .\t"
+ "<http://some/predicate/uri/2> _:bnode2 <http://some/context/uri2> .\t")));
one(mrContext).write(with(new TextMatcher("bnodeSubject1")), with(new OutputCountMatcher(OUTPUT.OBJECT, 2)));
one(mrContext).write(with(new TextMatcher("http://some/context/uri1")), with(new OutputCountMatcher(OUTPUT.ALL, 0)));
one(mrContext).write(with(new TextMatcher("http://some/context/uri1")), with(new OutputCountMatcher(OUTPUT.CONTEXT, 1)));
one(mrContext).write(with(new TextMatcher("bnodeSubject2")), with(new OutputCountMatcher(OUTPUT.ALL, 0)));
one(mrContext).write(with(new TextMatcher("bnodeSubject2")),
with(new BySubjectRecordMatcher().set("2\t0\tbnodeSubject2\t<http://some/predicate/uri/3> _:bnode3 <http://some/context/uri1> .\t")));
}
});
ResourcesReducer reducer = new ResourcesReducer();
reducer.reduce(new Text("bnodeSubject1"), new TextReuseIterable("<http://some/predicate/uri/1> <http://some/object/uri1> <http://some/context/uri1> .",
"OBJECT", "<http://some/predicate/uri/2> _:bnode2 <http://some/context/uri2> .", "OBJECT"), mrContext);
reducer.reduce(new Text("http://some/context/uri1"), new TextReuseIterable("CONTEXT"), mrContext);
reducer.reduce(new Text("bnodeSubject2"), new TextReuseIterable("<http://some/predicate/uri/3> _:bnode3 <http://some/context/uri1> ."), mrContext);
context.assertIsSatisfied();
assertEquals(0l, duplicateMatchCounter.getValue());
assertEquals(3l, keysCounter.getValue());
assertEquals(6l, valuesCounter.getValue());
assertEquals(3l, keySubjectCounter.getValue());
assertEquals(0l, keyPredicateCounter.getValue());
assertEquals(2l, keyObjectCounter.getValue());
assertEquals(1l, keyContextCounter.getValue());
}
/**
* The iterator's next() method returns the same Text object on each
* invocation but with but a different value. Object reuse is common in
* Hadoop. This is used to simulate that.
*
* @author tep
*/
private static class TextReuseIterable implements Iterable<Text> {
private final List<String> strings;
public TextReuseIterable(String... strings) {
this.strings = Arrays.asList(strings);
}
@Override
public Iterator<Text> iterator() {
return new Iterator<Text>() {
private Iterator<String> iterator;
private Text reusedTextObject;
{
iterator = strings.iterator();
reusedTextObject = new Text();
}
@Override
public boolean hasNext() {
return iterator.hasNext();
}
@Override
public Text next() {
reusedTextObject.set(iterator.next());
return reusedTextObject;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
}
}