/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.mahout.clustering.spectral.eigencuts; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.mahout.common.DummyRecordWriter; import org.apache.mahout.common.MahoutTestCase; import org.apache.mahout.math.DenseVector; import org.apache.mahout.math.Vector; import org.apache.mahout.math.VectorWritable; import org.junit.Test; /** * <p>Tests the Eigencuts M/R process for generating perturbation sensitivities * in a weighted graph.</p> * * <p>This process requires a lot of inputs. Please read the * EigencutsSensitivityJob javadocs for more information on these variables. * For now, * */ public class TestEigencutsSensitivityJob extends MahoutTestCase { /* private final double [][] affinity = { {0, 0.9748, 0.6926, 0.6065}, {0.9748, 0, 0.7178, 0.6350}, {0.6926, 0.7178, 0, 0.9898}, {0.6065, 0.6350, 0.9898, 0} }; */ private final double [] diagonal = {2.2739, 2.3276, 2.4002, 2.2313}; private final double [][] eigenvectors = { {-0.4963, -0.5021, -0.5099, -0.4916}, {-0.5143, -0.4841, 0.4519, 0.5449}, {-0.6858, 0.7140, -0.1146, 0.0820}, {0.1372, -0.0616, -0.7230, 0.6743} }; private final double [] eigenvalues = {1.000, -0.1470, -0.4238, -0.4293}; /** * This is the toughest step, primarily because of the intensity of * the calculations that are performed and the amount of data required. * Four parameters in particular - the list of eigenvalues, the * vector representing the diagonal matrix, and the scalars beta0 and * epsilon - must be set here prior to the start of the mapper. Once * the mapper is executed, it iterates over a matrix of all corresponding * eigenvectors. * @throws Exception */ @Test public void testEigencutsSensitivityMapper() throws Exception { EigencutsSensitivityMapper mapper = new EigencutsSensitivityMapper(); Configuration conf = new Configuration(); // construct the writers DummyRecordWriter<IntWritable, EigencutsSensitivityNode> writer = new DummyRecordWriter<IntWritable, EigencutsSensitivityNode>(); Mapper<IntWritable, VectorWritable, IntWritable, EigencutsSensitivityNode>.Context context = DummyRecordWriter.build(mapper, conf, writer); mapper.setup(2.0, 0.25, new DenseVector(eigenvalues), new DenseVector(diagonal)); // perform the mapping for (int i = 0; i < eigenvectors.length; i++) { VectorWritable row = new VectorWritable(new DenseVector(eigenvectors[i])); mapper.map(new IntWritable(i), row, context); } // the results line up for (IntWritable key : writer.getKeys()) { List<EigencutsSensitivityNode> list = writer.getValue(key); assertEquals("Only one result per row", 1, list.size()); EigencutsSensitivityNode item = list.get(0); assertTrue("Sensitivity values are correct", Math.abs(item.getSensitivity() + 0.48) < 0.01); } } /** * This step will simply assemble sensitivities into one coherent matrix. * @throws Exception */ @Test public void testEigencutsSensitivityReducer() throws Exception { EigencutsSensitivityMapper mapper = new EigencutsSensitivityMapper(); Configuration conf = new Configuration(); conf.setInt(EigencutsKeys.AFFINITY_DIMENSIONS, eigenvectors.length); // construct the writers DummyRecordWriter<IntWritable, EigencutsSensitivityNode> mapWriter = new DummyRecordWriter<IntWritable, EigencutsSensitivityNode>(); Mapper<IntWritable, VectorWritable, IntWritable, EigencutsSensitivityNode>.Context mapContext = DummyRecordWriter.build(mapper, conf, mapWriter); mapper.setup(2.0, 0.25, new DenseVector(eigenvalues), new DenseVector(diagonal)); // perform the mapping for (int i = 0; i < eigenvectors.length; i++) { VectorWritable row = new VectorWritable(new DenseVector(eigenvectors[i])); mapper.map(new IntWritable(i), row, mapContext); } // set up the values for the reducer conf.set(EigencutsKeys.DELTA, "1.0"); conf.set(EigencutsKeys.TAU, "-0.1"); EigencutsSensitivityReducer reducer = new EigencutsSensitivityReducer(); // set up the writers DummyRecordWriter<IntWritable, VectorWritable> redWriter = new DummyRecordWriter<IntWritable, VectorWritable>(); Reducer<IntWritable, EigencutsSensitivityNode, IntWritable, VectorWritable>.Context redContext = DummyRecordWriter.build(reducer, conf, redWriter, IntWritable.class, EigencutsSensitivityNode.class); // perform the reduction for (IntWritable key : mapWriter.getKeys()) { reducer.reduce(key, mapWriter.getValue(key), redContext); } // since all the sensitivities were below the threshold, // each of them should have survived for (IntWritable key : redWriter.getKeys()) { List<VectorWritable> list = redWriter.getValue(key); assertEquals("One item in the list", 1, list.size()); Vector item = list.get(0).get(); // should only be one non-zero item assertTrue("One non-zero item in the array", Math.abs(item.zSum() + 0.48) < 0.01); } } }