/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mahout.math.decomposer.hebbian;
import org.apache.mahout.math.DenseMatrix;
import org.apache.mahout.math.Matrix;
import org.apache.mahout.math.decomposer.AsyncEigenVerifier;
import org.apache.mahout.math.decomposer.SingularVectorVerifier;
import org.apache.mahout.math.decomposer.SolverTest;
import org.junit.Test;
/**
* This test is woefully inadequate, and also requires tons of memory, because it's part
* unit test, part performance test, and part comparison test (between the Hebbian and Lanczos
* approaches).
* TODO: make better.
*/
public final class TestHebbianSolver extends SolverTest {
public static long timeSolver(Matrix corpus,
double convergence,
int maxNumPasses,
TrainingState state) {
return timeSolver(corpus,
convergence,
maxNumPasses,
10,
state);
}
public static long timeSolver(Matrix corpus,
double convergence,
int maxNumPasses,
int desiredRank,
TrainingState state) {
HebbianUpdater updater = new HebbianUpdater();
SingularVectorVerifier verifier = new AsyncEigenVerifier();
HebbianSolver solver = new HebbianSolver(updater,
verifier,
convergence,
maxNumPasses);
long start = System.nanoTime();
TrainingState finalState = solver.solve(corpus, desiredRank);
assertNotNull(finalState);
state.setCurrentEigens(finalState.getCurrentEigens());
state.setCurrentEigenValues(finalState.getCurrentEigenValues());
long time = 0L;
time += System.nanoTime() - start;
assertEquals(state.getCurrentEigens().numRows(), desiredRank);
return time / 1000000L;
}
public static long timeSolver(Matrix corpus, TrainingState state) {
return timeSolver(corpus, state, 10);
}
public static long timeSolver(Matrix corpus, TrainingState state, int rank) {
return timeSolver(corpus, 0.01, 20, rank, state);
}
@Test
public void testHebbianSolver() {
int numColumns = 800;
Matrix corpus = randomSequentialAccessSparseMatrix(1000, 900, numColumns, 30, 1.0);
int rank = 50;
Matrix eigens = new DenseMatrix(rank, numColumns);
TrainingState state = new TrainingState(eigens, null);
long optimizedTime = timeSolver(corpus,
0.00001,
5,
rank,
state);
eigens = state.getCurrentEigens();
assertEigen(eigens, corpus, 0.05, false);
assertOrthonormal(eigens, 1.0e-6);
System.out.println("Avg solving (Hebbian) time in ms: " + optimizedTime);
}
/*
public void testSolverWithSerialization() throws Exception
{
_corpusProjectionsVectorFactory = new DenseMapVectorFactory();
_eigensVectorFactory = new DenseMapVectorFactory();
timeSolver(TMP_EIGEN_DIR,
0.001,
5,
new TrainingState(null, null));
File eigenDir = new File(TMP_EIGEN_DIR + File.separator + HebbianSolver.EIGEN_VECT_DIR);
DiskBufferedDoubleMatrix eigens = new DiskBufferedDoubleMatrix(eigenDir, 10);
DoubleMatrix inMemoryMatrix = new HashMapDoubleMatrix(_corpusProjectionsVectorFactory, eigens);
for(Entry<Integer, MapVector> diskEntry : eigens)
{
for(Entry<Integer, MapVector> inMemoryEntry : inMemoryMatrix)
{
if(diskEntry.getKey() - inMemoryEntry.getKey() == 0)
{
assertTrue("vector with index : " + diskEntry.getKey() + " is not the same on disk as in memory",
Math.abs(1 - diskEntry.getValue().dot(inMemoryEntry.getValue())) < 1e-6);
}
else
{
assertTrue("vector with index : " + diskEntry.getKey()
+ " is not orthogonal to memory vect with index : " + inMemoryEntry.getKey(),
Math.abs(diskEntry.getValue().dot(inMemoryEntry.getValue())) < 1e-6);
}
}
}
File corpusDir = new File(TMP_EIGEN_DIR + File.separator + "corpus");
corpusDir.mkdir();
// TODO: persist to disk?
// DiskBufferedDoubleMatrix.persistChunk(corpusDir, corpus, true);
// eigens.delete();
// DiskBufferedDoubleMatrix.delete(new File(TMP_EIGEN_DIR));
}
*/
/*
public void testHebbianVersusLanczos() throws Exception
{
_corpusProjectionsVectorFactory = new DenseMapVectorFactory();
_eigensVectorFactory = new DenseMapVectorFactory();
int desiredRank = 200;
long time = timeSolver(TMP_EIGEN_DIR,
0.00001,
5,
desiredRank,
new TrainingState());
System.out.println("Hebbian time: " + time + "ms");
File eigenDir = new File(TMP_EIGEN_DIR + File.separator + HebbianSolver.EIGEN_VECT_DIR);
DiskBufferedDoubleMatrix eigens = new DiskBufferedDoubleMatrix(eigenDir, 10);
DoubleMatrix2D srm = asSparseDoubleMatrix2D(corpus);
long timeA = System.nanoTime();
EigenvalueDecomposition asSparseRealDecomp = new EigenvalueDecomposition(srm);
for(int i=0; i<desiredRank; i++)
asSparseRealDecomp.getEigenvector(i);
System.out.println("CommonsMath time: " + (System.nanoTime() - timeA)/TimingConstants.NANOS_IN_MILLI + "ms");
// System.out.println("Hebbian results:");
// printEigenVerify(eigens, corpus);
DoubleMatrix lanczosEigenVectors = new HashMapDoubleMatrix(new HashMapVectorFactory());
List<Double> lanczosEigenValues = new ArrayList<Double>();
LanczosSolver solver = new LanczosSolver();
solver.solve(corpus, desiredRank*5, lanczosEigenVectors, lanczosEigenValues);
for(TimingSection section : LanczosSolver.TimingSection.values())
{
System.out.println("Lanczos " + section.toString() + " = " + (int)(solver.getTimeMillis(section)/1000) + " seconds");
}
// System.out.println("\nLanczos results:");
// printEigenVerify(lanczosEigenVectors, corpus);
}
private DoubleMatrix2D asSparseDoubleMatrix2D(Matrix corpus)
{
DoubleMatrix2D result = new DenseDoubleMatrix2D(corpus.numRows(), corpus.numRows());
for(int i=0; i<corpus.numRows(); i++) {
for(int j=i; j<corpus.numRows(); j++) {
double v = corpus.getRow(i).dot(corpus.getRow(j));
result.set(i, j, v);
result.set(j, i, v);
}
}
return result;
}
public static void printEigenVerify(DoubleMatrix eigens, DoubleMatrix corpus)
{
for(Map.Entry<Integer, MapVector> entry : eigens)
{
MapVector eigen = entry.getValue();
MapVector afterMultiply = corpus.timesSquared(eigen);
double norm = afterMultiply.norm();
double error = 1 - eigen.dot(afterMultiply) / (eigen.norm() * afterMultiply.norm());
System.out.println(entry.getKey() + ": error = " + error + ", eVal = " + (norm / eigen.norm()));
}
}
*/
}