package com.yahoo.glimmer.indexing.generator;
/*
* Copyright (c) 2012 Yahoo! Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software distributed under the License is
* distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and limitations under the License.
* See accompanying LICENSE file.
*/
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.ArrayList;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.hamcrest.BaseMatcher;
import org.hamcrest.Description;
import org.jmock.Expectations;
import org.jmock.Mockery;
import org.jmock.lib.legacy.ClassImposteriser;
import org.junit.Before;
import org.junit.Test;
import com.yahoo.glimmer.indexing.generator.TermValue.Type;
public class TermReduceTest {
private Mockery context;
private Reducer<TermKey, TermValue, IntWritable, IndexRecordWriterValue>.Context reducerContext;
@SuppressWarnings("unchecked")
@Before
public void before() throws IOException, URISyntaxException {
context = new Mockery();
context.setImposteriser(ClassImposteriser.INSTANCE);
reducerContext = context.mock(Context.class, "reducerContext");
}
@Test
public void treeTermsTest() throws Exception {
context.checking(new Expectations() {{
allowing(reducerContext).setStatus(with(any(String.class)));
one(reducerContext).write(
with(new IntWritable(0)),
with(new IndexRecordWriterTermValueMatcher("term1", 3, 6, 15 + 12 + 18)));
one(reducerContext).write(
with(new IntWritable(0)),
with(new IndexRecordWriterDocValueMatcher(3, 11, 15)));
one(reducerContext).write(
with(new IntWritable(0)),
with(new IndexRecordWriterDocValueMatcher(4, 12)));
one(reducerContext).write(
with(new IntWritable(0)),
with(new IndexRecordWriterDocValueMatcher(7, 14, 17, 18)));
// Alignement. without counts or positions..
one(reducerContext).write(
with(new IntWritable(-1)),
with(new IndexRecordWriterTermValueMatcher("term1", 1, 0, 0)));
one(reducerContext).write(
with(new IntWritable(-1)),
with(new IndexRecordWriterDocValueMatcher(0)));
one(reducerContext).write(
with(new IntWritable(1)),
with(new IndexRecordWriterTermValueMatcher("term2", 2, 4, 35)));
one(reducerContext).write(
with(new IntWritable(1)),
with(new IndexRecordWriterDocValueMatcher(1, 10, 19)));
one(reducerContext).write(
with(new IntWritable(1)),
with(new IndexRecordWriterDocValueMatcher(7, 13, 16)));
// Alignement. without counts or positions..
one(reducerContext).write(
with(new IntWritable(-1)),
with(new IndexRecordWriterTermValueMatcher("term2", 1, 0, 0)));
one(reducerContext).write(
with(new IntWritable(-1)),
with(new IndexRecordWriterDocValueMatcher(1)));
one(reducerContext).write(
with(new IntWritable(0)),
with(new IndexRecordWriterTermValueMatcher("term3", 1, 2, 7)));
one(reducerContext).write(
with(new IntWritable(0)),
with(new IndexRecordWriterDocValueMatcher(2, 5, 7)));
one(reducerContext).write(
with(new IntWritable(1)),
with(new IndexRecordWriterTermValueMatcher("term3", 1, 2, 11)));
one(reducerContext).write(
with(new IntWritable(1)),
with(new IndexRecordWriterDocValueMatcher(2, 10, 11)));
// Alignement. without counts or positions..
one(reducerContext).write(
with(new IntWritable(-1)),
with(new IndexRecordWriterTermValueMatcher("term3", 2, 0, 0)));
one(reducerContext).write(
with(new IntWritable(-1)),
with(new IndexRecordWriterDocValueMatcher(0)));
one(reducerContext).write(
with(new IntWritable(-1)),
with(new IndexRecordWriterDocValueMatcher(1)));
// Doc sizes
one(reducerContext).write(
with(new IntWritable(2)),
with(new IndexRecordWriterSizeValueMatcher(20, 4)));
one(reducerContext).write(
with(new IntWritable(2)),
with(new IndexRecordWriterSizeValueMatcher(24, 3)));
one(reducerContext).write(
with(new IntWritable(2)),
with(new IndexRecordWriterSizeValueMatcher(27, 2)));
}});
TermReduce reducer = new TermReduce();
reducer.setup(reducerContext);
TermKey key = new TermKey("term1", 0, null);
ArrayList<TermValue> values = new ArrayList<TermValue>();
values.add(new TermValue(Type.TERM_STATS, 2, 15));
values.add(new TermValue(Type.TERM_STATS, 1, 12));
values.add(new TermValue(Type.TERM_STATS, 3, 18));
values.add(new TermValue(Type.OCCURRENCE, 3, 11));
values.add(new TermValue(Type.OCCURRENCE, 3, 15));
values.add(new TermValue(Type.OCCURRENCE, 4, 12));
values.add(new TermValue(Type.OCCURRENCE, 7, 14));
values.add(new TermValue(Type.OCCURRENCE, 7, 17));
values.add(new TermValue(Type.OCCURRENCE, 7, 18));
reducer.reduce(key, values, reducerContext);
// Alignment. Term1 is indexed in predicate id(index id) 0.
key = new TermKey("term1", -1, null);
values.clear();
values.add(new TermValue(Type.INDEX_ID, 0));
values.add(new TermValue(Type.INDEX_ID, 0));
values.add(new TermValue(Type.INDEX_ID, 0));
reducer.reduce(key, values, reducerContext);
key = new TermKey("term2", 1, null);
values.clear();
values.add(new TermValue(Type.TERM_STATS, 2, 19));
values.add(new TermValue(Type.TERM_STATS, 2, 16));
values.add(new TermValue(Type.OCCURRENCE, 1, 10));
values.add(new TermValue(Type.OCCURRENCE, 1, 19));
values.add(new TermValue(Type.OCCURRENCE, 7, 13));
values.add(new TermValue(Type.OCCURRENCE, 7, 16));
reducer.reduce(key, values, reducerContext);
// Alignment. Term2 is indexed in predicate id(index id) 1.
key = new TermKey("term2", -1, null);
values.clear();
values.add(new TermValue(Type.INDEX_ID, 1));
values.add(new TermValue(Type.INDEX_ID, 1));
reducer.reduce(key, values, reducerContext);
key = new TermKey("term3", 0, null);
values.clear();
values.add(new TermValue(Type.TERM_STATS, 2, 7));
values.add(new TermValue(Type.OCCURRENCE, 2, 5));
values.add(new TermValue(Type.OCCURRENCE, 2, 7));
reducer.reduce(key, values, reducerContext);
key = new TermKey("term3", 1, null);
values.clear();
values.add(new TermValue(Type.TERM_STATS, 2, 11));
values.add(new TermValue(Type.OCCURRENCE, 2, 10));
values.add(new TermValue(Type.OCCURRENCE, 2, 11));
reducer.reduce(key, values, reducerContext);
// Alignment. Term3 is indexed in predicate id(index id) 0 & 1.
key = new TermKey("term3", -1, null);
values.clear();
values.add(new TermValue(Type.INDEX_ID, 0));
values.add(new TermValue(Type.INDEX_ID, 0));
values.add(new TermValue(Type.INDEX_ID, 1));
values.add(new TermValue(Type.INDEX_ID, 1));
reducer.reduce(key, values, reducerContext);
// Doc sizes
key = new TermKey(TermKey.DOC_SIZE_TERM, 2, null);
values.clear();
values.add(new TermValue(Type.DOC_SIZE, 20, 4));
values.add(new TermValue(Type.DOC_SIZE, 24, 3));
values.add(new TermValue(Type.DOC_SIZE, 27, 2));
reducer.reduce(key, values, reducerContext);
context.assertIsSatisfied();
}
private static class IndexRecordWriterTermValueMatcher extends BaseMatcher<IndexRecordWriterTermValue> {
private final IndexRecordWriterTermValue termValue;
public IndexRecordWriterTermValueMatcher(String term, int termFrequency, int occurrenceCount, long sumOfMaxTermPositions) {
termValue = new IndexRecordWriterTermValue();
termValue.setTerm(term);
termValue.setTermFrequency(termFrequency);
termValue.setOccurrenceCount(occurrenceCount);
termValue.setSumOfMaxTermPositions(sumOfMaxTermPositions);
}
@Override
public boolean matches(Object object) {
return termValue.equals(object);
}
@Override
public void describeTo(Description description) {
description.appendText(termValue.toString());
}
}
private static class IndexRecordWriterSizeValueMatcher extends BaseMatcher<IndexRecordWriterSizeValue> {
private final IndexRecordWriterSizeValue sizeValue;
public IndexRecordWriterSizeValueMatcher(long document, int size) {
sizeValue = new IndexRecordWriterSizeValue();
sizeValue.setDocument(document);
sizeValue.setSize(size);
}
@Override
public boolean matches(Object object) {
return sizeValue.equals(object);
}
@Override
public void describeTo(Description description) {
description.appendText(sizeValue.toString());
}
}
private static class IndexRecordWriterDocValueMatcher extends BaseMatcher<IndexRecordWriterDocValue> {
private final IndexRecordWriterDocValue docValue;
public IndexRecordWriterDocValueMatcher(long document, int ... occurrences) {
docValue = new IndexRecordWriterDocValue(1);
docValue.setDocument(document);
for (int i = 0 ; i < occurrences.length ; i++) {
docValue.addOccurrence(occurrences[i]);
}
}
@Override
public boolean matches(Object object) {
return docValue.equals(object);
}
@Override
public void describeTo(Description description) {
description.appendText(docValue.toString());
}
}
}