/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mahout.math.ssvd;
import com.google.common.collect.Lists;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.math.DenseMatrix;
import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.DiagonalMatrix;
import org.apache.mahout.math.Matrix;
import org.apache.mahout.math.MatrixWritable;
import org.apache.mahout.math.RandomTrinaryMatrix;
import org.apache.mahout.math.SingularValueDecomposition;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.function.Functions;
import org.junit.Before;
import org.junit.Test;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
public final class SequentialOutOfCoreSvdTest extends MahoutTestCase {
private File tmpDir;
@Override
@Before
public void setUp() throws Exception {
super.setUp();
tmpDir = getTestTempDir("matrix");
}
@Test
public void testSingularValues() throws IOException {
Matrix A = lowRankMatrix(tmpDir, "A", 200, 970, 1020);
List<File> partsOfA = Arrays.asList(tmpDir.listFiles(new FilenameFilter() {
@Override
public boolean accept(File file, String fileName) {
return fileName.matches("A-.*");
}
}));
// rearrange A to make sure we don't depend on lexical ordering.
partsOfA = Lists.reverse(partsOfA);
SequentialOutOfCoreSvd s = new SequentialOutOfCoreSvd(partsOfA, tmpDir, 100, 210);
SequentialBigSvd svd = new SequentialBigSvd(A, 100);
Vector reference = new DenseVector(svd.getSingularValues()).viewPart(0, 6);
Vector actual = s.getSingularValues().viewPart(0, 6);
assertEquals(0, reference.minus(actual).maxValue(), 1.0e-9);
s.computeU(partsOfA, tmpDir);
Matrix u = readBlockMatrix(Arrays.asList(tmpDir.listFiles(new FilenameFilter() {
@Override
public boolean accept(File file, String fileName) {
return fileName.matches("U-.*");
}
})));
s.computeV(tmpDir, A.columnSize());
Matrix v = readBlockMatrix(Arrays.asList(tmpDir.listFiles(new FilenameFilter() {
@Override
public boolean accept(File file, String fileName) {
return fileName.matches("V-.*");
}
})));
// The values in A are pretty big so this is a pretty tight relative tolerance
assertEquals(0, A.minus(u.times(new DiagonalMatrix(s.getSingularValues())).times(v.transpose())).aggregate(Functions.PLUS, Functions.ABS), 1.0e-7);
}
/**
* Reads a list of files that contain a column of blocks. It is assumed that the files
* can be sorted lexicographically to determine the order they should be stacked. It
* is also assumed here that all blocks will be the same size except the last one which
* may be shorter than the others.
* @param files The list of files to read.
* @return The row-wise concatenation of the matrices in the files.
* @throws IOException If we can't read the sub-matrices.
*/
private static Matrix readBlockMatrix(List<File> files) throws IOException {
// force correct ordering
Collections.sort(files);
// initially, we don't know what size buffer to hold
int nrows = -1;
int ncols = -1;
Matrix r = null;
MatrixWritable m = new MatrixWritable();
int row = 0;
for (File file : files) {
DataInputStream in = new DataInputStream(new FileInputStream(file));
m.readFields(in);
in.close();
if (nrows == -1) {
// now we can set an upper bound on how large our result will be
nrows = m.get().rowSize() * files.size();
ncols = m.get().columnSize();
r = new DenseMatrix(nrows, ncols);
}
r.viewPart(row, m.get().rowSize(), 0, r.columnSize()).assign(m.get());
row += m.get().rowSize();
}
// at the end, row will have the true size of the result
if (row != nrows && r != null) {
// and if that isn't the size of the buffer, we need to crop the result a bit
r = r.viewPart(0, row, 0, ncols);
}
return r;
}
@Test
public void testLeftVectors() throws IOException {
Matrix A = lowRankMatrixInMemory(20, 20);
SequentialBigSvd s = new SequentialBigSvd(A, 6);
SingularValueDecomposition svd = new SingularValueDecomposition(A);
// can only check first few singular vectors
Matrix u1 = svd.getU().viewPart(0, 20, 0, 3).assign(Functions.ABS);
Matrix u2 = s.getU().viewPart(0, 20, 0, 3).assign(Functions.ABS);
assertEquals(u1, u2);
}
private static Matrix lowRankMatrixInMemory(int rows, int columns) throws IOException {
return lowRankMatrix(null, null, 0, rows, columns);
}
private static void assertEquals(Matrix u1, Matrix u2) {
assertEquals(0.0, u1.minus(u2).aggregate(Functions.MAX, Functions.ABS), 1.0e-10);
}
@Test
public void testRightVectors() throws IOException {
Matrix A = lowRankMatrixInMemory(20, 20);
SequentialBigSvd s = new SequentialBigSvd(A, 6);
SingularValueDecomposition svd = new SingularValueDecomposition(A);
Matrix v1 = svd.getV().viewPart(0, 20, 0, 3).assign(Functions.ABS);
Matrix v2 = s.getV().viewPart(0, 20, 0, 3).assign(Functions.ABS);
assertEquals(v1, v2);
}
private static Matrix lowRankMatrix(File tmpDir, String aBase, int rowsPerSlice, int rows, int columns) throws IOException {
int rank = 10;
Matrix u = new RandomTrinaryMatrix(1, rows, rank, false);
Matrix d = new DenseMatrix(rank, rank);
d.set(0, 0, 5);
d.set(1, 1, 3);
d.set(2, 2, 1);
d.set(3, 3, 0.5);
Matrix v = new RandomTrinaryMatrix(2, columns, rank, false);
Matrix a = u.times(d).times(v.transpose());
if (tmpDir != null) {
for (int i = 0; i < a.rowSize(); i += rowsPerSlice) {
MatrixWritable m = new MatrixWritable(a.viewPart(i, Math.min(a.rowSize() - i, rowsPerSlice), 0, a.columnSize()));
DataOutputStream out = new DataOutputStream(new FileOutputStream(new File(tmpDir, String.format("%s-%09d", aBase, i))));
try {
m.write(out);
} finally {
out.close();
}
}
}
return a;
}
}