/*******************************************************************************
* Copyright 2007, 2009 Jorge Villalon (jorge.villalon@uai.cl)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
package tml.test;
import static org.junit.Assert.assertEquals;
import org.junit.BeforeClass;
import org.junit.Test;
import tml.Configuration;
import tml.corpus.Corpus;
import tml.corpus.SearchResultsCorpus;
import tml.corpus.CorpusParameters.DimensionalityReduction;
import tml.utils.LuceneUtils;
import tml.vectorspace.TermWeighting.GlobalWeight;
import tml.vectorspace.TermWeighting.LocalWeight;
import Jama.Matrix;
public class ValidateIntroToLSATest extends AbstractTmlIndexingTest {
private static String[] documents = {
"c1","c2","c3","c4","c5","m1","m2","m3","m4"
};
private static String[] terms = {
"computer",
"ep", // stemmed version of EPS
"graph",
"human",
"interface",
"minors",
"response",
"survey",
"system",
"time",
"trees",
"user"
};
private static double[][] U = {
{0.24,0.04,-0.16,-0.59,-0.11,-0.25,-0.3,0.06,0.49},
{0.3,-0.14,0.33,0.19,0.11,0.27,0.03,-0.02,-0.17},
{0.04,0.62,0.22,0,-0.07,0.11,0.16,-0.68,0.23},
{0.22,-0.11,0.29,-0.41,-0.11,-0.34,0.52,-0.06,-0.41},
{0.2,-0.07,0.14,-0.55,0.28,0.5,-0.07,-0.01,-0.11},
{0.03,0.45,0.14,-0.01,-0.3,0.28,0.34,0.68,0.18},
{0.27,0.11,-0.43,0.07,0.08,-0.17,0.28,-0.02,-0.05},
{0.21,0.27,-0.18,-0.03,-0.54,0.08,-0.47,-0.04,-0.58},
{0.64,-0.17,0.36,0.33,-0.16,-0.21,-0.17,0.03,0.27},
{0.27,0.11,-0.43,0.07,0.08,-0.17,0.28,-0.02,-0.05},
{0.01,0.49,0.23,0.03,0.59,-0.39,-0.29,0.25,-0.23},
{0.4,0.06,-0.34,0.1,0.33,0.38,0,0,0.01}
};
private static double[] S = {
3.34,
2.54,
2.35,
1.64,
1.50,
1.31,
0.85,
0.56,
0.36
};
private static double[][] V = {
{0.2,-0.06,0.11,-0.95,0.05,-0.08,0.18,-0.01,-0.06},
{0.61,0.17,-0.5,-0.03,-0.21,-0.26,-0.43,0.05,0.24},
{0.46,-0.13,0.21,0.04,0.38,0.72,-0.24,0.01,0.02},
{0.54,-0.23,0.57,0.27,-0.21,-0.37,0.26,-0.02,-0.08},
{0.28,0.11,-0.51,0.15,0.33,0.03,0.67,-0.06,-0.26},
{0,0.19,0.1,0.02,0.39,-0.3,-0.34,0.45,-0.62},
{0.01,0.44,0.19,0.02,0.35,-0.21,-0.15,-0.76,0.02},
{0.02,0.62,0.25,0.01,0.15,0,0.25,0.45,0.52},
{0.08,0.53,0.08,-0.03,-0.6,0.36,0.04,-0.07,-0.45},
};
private static double[][] termdoc = {
{1, 1, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 1, 1, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 1, 1, 1},
{1, 0, 0, 1, 0, 0, 0, 0, 0},
{1, 0, 1, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 1, 1},
{0, 1, 0, 0, 1, 0, 0, 0, 0},
{0, 1, 0, 0, 0, 0, 0, 0, 1},
{0, 1, 1, 2, 0, 0, 0, 0, 0},
{0, 1, 0, 0, 1, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 1, 1, 1, 0},
{0, 1, 1, 0, 1, 0, 0, 0, 0}
};
private static double[][] termdocK = {
{0.15,0.51,0.36,0.41,0.24,0.02,0.06,0.09,0.12},
{0.22,0.55,0.51,0.63,0.24,-0.07,-0.14,-0.2,-0.11},
{-0.06,0.34,-0.15,-0.3,0.2,0.31,0.69,0.98,0.85},
{0.16,0.4,0.38,0.47,0.18,-0.05,-0.12,-0.16,-0.09},
{0.14,0.37,0.33,0.4,0.16,-0.03,-0.07,-0.1,-0.04},
{-0.04,0.25,-0.1,-0.21,0.15,0.22,0.5,0.71,0.62},
{0.16,0.58,0.38,0.42,0.28,0.06,0.13,0.19,0.22},
{0.1,0.53,0.23,0.21,0.27,0.14,0.31,0.44,0.42},
{0.45,1.23,1.05,1.27,0.56,-0.07,-0.15,-0.21,-0.05},
{0.16,0.58,0.38,0.42,0.28,0.06,0.13,0.19,0.22},
{-0.06,0.23,-0.14,-0.27,0.14,0.24,0.55,0.77,0.66},
{0.26,0.84,0.61,0.7,0.39,0.03,0.08,0.12,0.19}
};
private static Corpus corpus = null;
private static Corpus corpusLanczos = null;
@BeforeClass
public static void setUpBeforeClass() throws Exception {
AbstractTmlIndexingTest.setUpBeforeClass();
documentsFolder = Configuration.getTmlFolder() + "/corpora/introLSA";
repository.addDocumentsInFolder(documentsFolder);
corpus = new SearchResultsCorpus("type:document");
corpus.getParameters().setTermWeightLocal(LocalWeight.TF);
corpus.getParameters().setTermWeightGlobal(GlobalWeight.None);
corpus.getParameters().setDimensionalityReduction(DimensionalityReduction.NO);
corpus.getParameters().setDimensionalityReductionThreshold(2);
corpus.load(repository);
corpusLanczos = new SearchResultsCorpus("type:document");
corpusLanczos.getParameters().setTermWeightLocal(LocalWeight.TF);
corpusLanczos.getParameters().setTermWeightGlobal(GlobalWeight.None);
corpusLanczos.getParameters().setDimensionalityReduction(DimensionalityReduction.NO);
corpusLanczos.getParameters().setDimensionalityReductionThreshold(2);
corpusLanczos.getParameters().setLanczosSVD(true);
corpusLanczos.load(repository);
}
@Test
public void validateTermsAndDocuments() throws Exception {
for(int i=0;i<corpus.getTerms().length; i++) {
String term = corpus.getTerms()[i];
assertEquals(term, LuceneUtils.stemWords(terms[i]));
}
for(int i=0;i<corpus.getPassages().length; i++) {
String passage = corpus.getPassages()[i];
assertEquals(passage, documents[i]);
}
}
@Test
public void validateTermDocMatrix() {
Matrix actual = corpus.getTermDocMatrix();
Matrix expected = new Matrix(termdoc);
assertEquals(expected.getRowDimension(), actual.getRowDimension());
assertEquals(expected.getColumnDimension(), actual.getColumnDimension());
for(int i=0; i<actual.getRowDimension(); i++) {
for(int j=0; j<actual.getColumnDimension(); j++) {
assertEquals(expected.get(i, j), actual.get(i, j), 0.01);
}
}
}
@Test
public void validateU() {
Matrix expected = corpus.getSemanticSpace().getUk();
Matrix actual = new Matrix(U);
for(int i=0; i<actual.getRowDimension(); i++) {
for(int j=0; j<actual.getColumnDimension(); j++) {
assertEquals(Math.abs(expected.get(i, j)), Math.abs(actual.get(i, j)), 0.01);
}
}
}
@Test
public void validateS() {
Matrix expected = corpus.getSemanticSpace().getSk();
Matrix actual = new Matrix(S.length, S.length);
for(int i=0; i<S.length;i++)
actual.set(i, i, S[i]);
for(int i=0; i<actual.getRowDimension(); i++) {
for(int j=0; j<actual.getColumnDimension(); j++) {
assertEquals(expected.get(i, j), actual.get(i, j), 0.01);
}
}
}
@Test
public void validateV() {
Matrix actual = corpus.getSemanticSpace().getVk();
Matrix expected = new Matrix(V);
expected.print(5, 2);
actual.print(5, 2);
for(int i=0; i<actual.getRowDimension(); i++) {
for(int j=0; j<actual.getColumnDimension(); j++) {
assertEquals(Math.abs(expected.get(i, j)), Math.abs(actual.get(i, j)), 0.01);
}
}
}
@Test
public void validateTermDocMatrixReduced() throws Exception {
corpus.getParameters().setDimensionalityReduction(DimensionalityReduction.NUM);
corpus.load(repository);
corpus.getSemanticSpace().calculate();
Matrix actual = corpus.getSemanticSpace().getTermsDocuments();
Matrix expected = new Matrix(termdocK);
for(int i=0; i<actual.getRowDimension(); i++) {
for(int j=0; j<actual.getColumnDimension(); j++) {
assertEquals(expected.get(i, j), actual.get(i, j), 0.05);
}
}
}
// TODO: Fix Lanczos SVD and then uncomment the tests
/* @Test
public void validateULanczos() {
Matrix expected = corpusLanczos.getSemanticSpace().getUk();
Matrix actual = new Matrix(U);
for(int i=0; i<actual.getRowDimension(); i++) {
for(int j=0; j<actual.getColumnDimension(); j++) {
assertEquals(Math.abs(expected.get(i, j)), Math.abs(actual.get(i, j)), 0.01);
}
}
}
@Test
public void validateSLanczos() {
Matrix expected = corpusLanczos.getSemanticSpace().getSk();
Matrix actual = new Matrix(S.length, S.length);
for(int i=0; i<S.length;i++)
actual.set(i, i, S[i]);
for(int i=0; i<actual.getRowDimension(); i++) {
for(int j=0; j<actual.getColumnDimension(); j++) {
assertEquals(expected.get(i, j), actual.get(i, j), 0.01);
}
}
}
@Test
public void validateVLanczos() {
Matrix actual = corpusLanczos.getSemanticSpace().getVk();
Matrix expected = new Matrix(V);
expected.print(5, 2);
actual.print(5, 2);
for(int i=0; i<actual.getRowDimension(); i++) {
for(int j=0; j<actual.getColumnDimension(); j++) {
assertEquals(Math.abs(expected.get(i, j)), Math.abs(actual.get(i, j)), 0.01);
}
}
}
@Test
public void validateTermDocMatrixReducedLanczos() throws Exception {
corpusLanczos.getParameters().setDimensionalityReduction(DimensionalityReduction.NUM);
corpusLanczos.load(repository);
corpusLanczos.getSemanticSpace().calculate();
Matrix actual = corpusLanczos.getSemanticSpace().getTermsDocuments();
Matrix expected = new Matrix(termdocK);
assertEquals(expected.getRowDimension(), actual.getRowDimension());
assertEquals(expected.getColumnDimension(), actual.getColumnDimension());
for(int i=0; i<actual.getRowDimension(); i++) {
for(int j=0; j<actual.getColumnDimension(); j++) {
assertEquals(expected.get(i, j), actual.get(i, j), 0.05);
}
}
}*/
}