/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mahout.clustering.spectral.common;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.mahout.clustering.spectral.eigencuts.EigencutsKeys;
import org.apache.mahout.common.DummyRecordWriter;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
import org.apache.mahout.math.hadoop.DistributedRowMatrix.MatrixEntryWritable;
import org.junit.Test;
/**
* <p>Tests the affinity matrix input M/R task.</p>
*
* <p>The tricky item with this task is that the format of the input
* must be correct; it must take the form of a graph input, and for the
* current implementation, the input must be symmetric, e.g. the weight
* from node A to B = the weight from node B to A. This is not explicitly
* enforced within the task itself (since, as of the time these tests were
* written, we have not yet decided on a final rule regarding the
* symmetry/non-symmetry of the affinity matrix, so we are unofficially
* enforcing symmetry). Input looks something like this:</p>
*
* <pre>0, 0, 0
* 0, 1, 10
* 0, 2, 20
* ...
* 1, 0, 10
* 2, 0, 20
* ...</pre>
*
* <p>The mapper's task is simply to convert each line of text into a
* DistributedRowMatrix entry, allowing the reducer to join each entry
* of the same row into a VectorWritable.</p>
*
* <p>Exceptions are thrown in cases of bad input format: if there are
* more or fewer than 3 numbers per line, or any of the numbers are missing.
*/
public class TestAffinityMatrixInputJob extends MahoutTestCase {
private static final String [] RAW = {"0,0,0", "0,1,5", "0,2,10", "1,0,5", "1,1,0",
"1,2,20", "2,0,10", "2,1,20", "2,2,0"};
private static final int RAW_DIMENSIONS = 3;
@Test
public void testAffinityMatrixInputMapper() throws Exception {
AffinityMatrixInputMapper mapper = new AffinityMatrixInputMapper();
Configuration conf = new Configuration();
conf.setInt(EigencutsKeys.AFFINITY_DIMENSIONS, RAW_DIMENSIONS);
// set up the dummy writer and the M/R context
DummyRecordWriter<IntWritable, MatrixEntryWritable> writer =
new DummyRecordWriter<IntWritable, MatrixEntryWritable>();
Mapper<LongWritable, Text, IntWritable, MatrixEntryWritable>.Context
context = DummyRecordWriter.build(mapper, conf, writer);
// loop through all the points and test each one is converted
// successfully to a DistributedRowMatrix.MatrixEntry
for (String s : RAW) {
mapper.map(new LongWritable(), new Text(s), context);
}
// test the data was successfully constructed
assertEquals("Number of map results", RAW_DIMENSIONS, writer.getData().size());
Set<IntWritable> keys = writer.getData().keySet();
for (IntWritable i : keys) {
List<MatrixEntryWritable> row = writer.getData().get(i);
assertEquals("Number of items in row", RAW_DIMENSIONS, row.size());
}
}
@Test
public void testAffinitymatrixInputReducer() throws Exception {
AffinityMatrixInputMapper mapper = new AffinityMatrixInputMapper();
Configuration conf = new Configuration();
conf.setInt(EigencutsKeys.AFFINITY_DIMENSIONS, RAW_DIMENSIONS);
// set up the dummy writer and the M/R context
DummyRecordWriter<IntWritable, MatrixEntryWritable> mapWriter =
new DummyRecordWriter<IntWritable, MatrixEntryWritable>();
Mapper<LongWritable, Text, IntWritable, MatrixEntryWritable>.Context
mapContext = DummyRecordWriter.build(mapper, conf, mapWriter);
// loop through all the points and test each one is converted
// successfully to a DistributedRowMatrix.MatrixEntry
for (String s : RAW) {
mapper.map(new LongWritable(), new Text(s), mapContext);
}
// store the data for checking later
Map<IntWritable, List<MatrixEntryWritable>> map = mapWriter.getData();
// now reduce the data
AffinityMatrixInputReducer reducer = new AffinityMatrixInputReducer();
DummyRecordWriter<IntWritable, VectorWritable> redWriter =
new DummyRecordWriter<IntWritable, VectorWritable>();
Reducer<IntWritable, MatrixEntryWritable,
IntWritable, VectorWritable>.Context redContext = DummyRecordWriter
.build(reducer, conf, redWriter, IntWritable.class, MatrixEntryWritable.class);
for (IntWritable key : mapWriter.getKeys()) {
reducer.reduce(key, mapWriter.getValue(key), redContext);
}
// check that all the elements are correctly ordered
assertEquals("Number of reduce results", RAW_DIMENSIONS, redWriter.getData().size());
for (IntWritable row : redWriter.getKeys()) {
List<VectorWritable> list = redWriter.getValue(row);
assertEquals("Should only be one vector", 1, list.size());
// check that the elements in the array are correctly ordered
Vector v = list.get(0).get();
for (Vector.Element e : v) {
// find this value in the original map
MatrixEntryWritable toCompare = new MatrixEntryWritable();
toCompare.setRow(-1);
toCompare.setCol(e.index());
toCompare.setVal(e.get());
assertTrue("This entry was correctly placed in its row", map.get(row).contains(toCompare));
}
}
}
}