/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.mahout.clustering.spectral.eigencuts; import java.io.IOException; import java.util.Map; import com.google.common.collect.Maps; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.mapreduce.Mapper; import org.apache.mahout.clustering.spectral.common.VectorCache; import org.apache.mahout.math.DenseVector; import org.apache.mahout.math.SequentialAccessSparseVector; import org.apache.mahout.math.Vector; import org.apache.mahout.math.VectorWritable; import org.apache.mahout.math.function.Functions; public class EigencutsSensitivityMapper extends Mapper<IntWritable, VectorWritable, IntWritable, EigencutsSensitivityNode> { private Vector eigenvalues; private Vector diagonal; private double beta0; private double epsilon; @Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Configuration config = context.getConfiguration(); beta0 = Double.parseDouble(config.get(EigencutsKeys.BETA)); epsilon = Double.parseDouble(config.get(EigencutsKeys.EPSILON)); // read in the two vectors from the cache eigenvalues = VectorCache.load(config); diagonal = VectorCache.load(config); if (!(eigenvalues instanceof SequentialAccessSparseVector || eigenvalues instanceof DenseVector)) { eigenvalues = new SequentialAccessSparseVector(eigenvalues); } if (!(diagonal instanceof SequentialAccessSparseVector || diagonal instanceof DenseVector)) { diagonal = new SequentialAccessSparseVector(diagonal); } } @Override protected void map(IntWritable row, VectorWritable vw, Context context) throws IOException, InterruptedException { // first, does this particular eigenvector even pass the required threshold? double eigenvalue = Math.abs(eigenvalues.get(row.get())); double betak = -Functions.LOGARITHM.apply(2) / Functions.LOGARITHM.apply(eigenvalue); if (eigenvalue >= 1.0 || betak <= epsilon * beta0) { // doesn't pass the threshold! quit return; } // go through the vector, performing the calculations // sadly, no way to get around n^2 computations Map<Integer, EigencutsSensitivityNode> columns = Maps.newHashMap(); Vector ev = vw.get(); for (int i = 0; i < ev.size(); i++) { double minsij = Double.MAX_VALUE; int minInd = -1; for (int j = 0; j < ev.size(); j++) { double sij = performSensitivityCalculation(eigenvalue, ev.get(i), ev.get(j), diagonal.get(i), diagonal.get(j)); // perform non-maximal suppression // is this the smallest value in the row? if (sij < minsij) { minsij = sij; minInd = j; } } // is this the smallest value in the column? Integer column = minInd; EigencutsSensitivityNode value = new EigencutsSensitivityNode(i, minInd, minsij); if (!columns.containsKey(column)) { columns.put(column, value); } else if (columns.get(column).getSensitivity() > minsij) { columns.remove(column); columns.put(column, value); } } // write whatever values made it through for (EigencutsSensitivityNode e : columns.values()) { context.write(new IntWritable(e.getRow()), e); } } /** * Helper method, performs the actual calculation. Looks something like this: * * (log(2) / lambda_k * log(lambda_k) * log(lambda_k^beta0 / 2)) * [ * - (((u_i / sqrt(d_i)) - (u_j / sqrt(d_j)))^2 + (1 - lambda) * * ((u_i^2 / d_i) + (u_j^2 / d_j))) ] */ private double performSensitivityCalculation(double eigenvalue, double evi, double evj, double diagi, double diagj) { double firsthalf = Functions.LOGARITHM.apply(2) / (eigenvalue * Functions.LOGARITHM.apply(eigenvalue) * Functions.LOGARITHM.apply(Functions.POW.apply(eigenvalue, beta0) / 2)); double secondhalf = -Functions.POW.apply(evi / Functions.SQRT.apply(diagi) - evj / Functions.SQRT.apply(diagj), 2) + (1.0 - eigenvalue) * (Functions.POW.apply(evi, 2) / diagi + Functions.POW.apply(evj, 2) / diagj); return firsthalf * secondhalf; } /** * Utility helper method, used for unit testing. */ void setup(double beta0, double epsilon, Vector eigenvalues, Vector diagonal) { this.beta0 = beta0; this.epsilon = epsilon; this.eigenvalues = eigenvalues; this.diagonal = diagonal; } }