/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mahout.vectorizer.collocations.llr;
import java.util.Arrays;
import java.util.Collection;
import java.util.LinkedList;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.math.stats.LogLikelihood;
import org.apache.mahout.vectorizer.collocations.llr.LLRReducer.LLCallback;
import org.easymock.EasyMock;
import org.junit.Before;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Test the LLRReducer
* TODO Add negative test cases.
*/
public final class LLRReducerTest extends MahoutTestCase {
private static final Logger log =
LoggerFactory.getLogger(LLRReducerTest.class);
private Reducer<Gram, Gram, Text, DoubleWritable>.Context context;
private LLCallback ll;
private LLCallback cl;
@Override
@Before
public void setUp() throws Exception {
super.setUp();
context = EasyMock.createMock(Reducer.Context.class);
ll = EasyMock.createMock(LLCallback.class);
cl = new LLCallback() {
@Override
public double logLikelihoodRatio(long k11, long k12, long k21, long k22) {
log.info("k11:{} k12:{} k21:{} k22:{}", new Object[] {k11, k12, k21, k22});
return LogLikelihood.logLikelihoodRatio(k11, k12, k21, k22);
}
};
}
@Test
public void testReduce() throws Exception {
LLRReducer reducer = new LLRReducer(ll);
// test input, input[*][0] is the key,
// input[*][1..n] are the values passed in via
// the iterator.
Gram[][] input = {
{new Gram("the best", 1, Gram.Type.NGRAM), new Gram("the", 2, Gram.Type.HEAD), new Gram("best", 1, Gram.Type.TAIL) },
{new Gram("best of", 1, Gram.Type.NGRAM), new Gram("best", 1, Gram.Type.HEAD), new Gram("of", 2, Gram.Type.TAIL) },
{new Gram("of times", 2, Gram.Type.NGRAM), new Gram("of", 2, Gram.Type.HEAD), new Gram("times", 2, Gram.Type.TAIL) },
{new Gram("times the", 1, Gram.Type.NGRAM), new Gram("times", 1, Gram.Type.HEAD), new Gram("the", 1, Gram.Type.TAIL) },
{new Gram("the worst", 1, Gram.Type.NGRAM), new Gram("the", 2, Gram.Type.HEAD), new Gram("worst", 1, Gram.Type.TAIL) },
{new Gram("worst of", 1, Gram.Type.NGRAM), new Gram("worst", 1, Gram.Type.HEAD), new Gram("of", 2, Gram.Type.TAIL) }
};
int[][] expectations = {
// A+B, A+!B, !A+B, !A+!B
{1, 1, 0, 5}, // the best
{1, 0, 1, 5}, // best of
{2, 0, 0, 5}, // of times
{1, 0, 0, 6}, // times the
{1, 1, 0, 5}, // the worst
{1, 0, 1, 5} // worst of
};
Configuration config = new Configuration();
config.set(LLRReducer.NGRAM_TOTAL, "7");
EasyMock.expect(context.getConfiguration()).andReturn(config);
for (int i=0; i < expectations.length; i++) {
int[] ee = expectations[i];
context.write(EasyMock.eq(new Text(input[i][0].getString())), (DoubleWritable) EasyMock.anyObject());
EasyMock.expect(ll.logLikelihoodRatio(ee[0], ee[1], ee[2], ee[3])).andDelegateTo(cl);
}
EasyMock.replay(context, ll);
reducer.setup(context);
for (Gram[] ii: input) {
Collection<Gram> vv = new LinkedList<Gram>();
vv.addAll(Arrays.asList(ii).subList(1, ii.length));
reducer.reduce(ii[0], vv, context);
}
EasyMock.verify(ll);
}
}