/*******************************************************************************
* Copyright 2007, 2009 Jorge Villalon (jorge.villalon@uai.cl)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
package tml.vectorspace.operations;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import tml.utils.MatrixUtils;
import tml.vectorspace.operations.results.PassageDistancesResult;
import Jama.Matrix;
/**
* Calculates the distances (angular) for a whole set of passages
* in a {@link Corpus}.
*
* @author Jorge Villalon
*
*/
public class PassageDistances extends
AbstractOperation<PassageDistancesResult> {
/**
*
*/
public PassageDistances() {
this.name = "Segment distances analysis";
}
public Object[][] getInnerData() {
return getResultsTable();
}
@Override
public Object[][] getResultsTable() {
Object[][] data = new Object[this.results.size()][3];
int i = 0;
for (PassageDistancesResult result : this.results) {
data[i][0] = result.getDocumentAId();
data[i][1] = result.getDocumentBId();
Double value = new Double(result.getDistance());
DecimalFormat df = new DecimalFormat("#0.000");
data[i][2] = df.format(value);
i++;
}
return data;
}
@Override
public Object[] getResultsTableHeader() {
Object[] data = new Object[3];
data[0] = "Document A";
data[1] = "Document B";
data[2] = "Distance";
return data;
}
@Override
public void start() throws Exception {
super.start();
double averageDistance = 0;
this.results = new ArrayList<PassageDistancesResult>();
Matrix m = this.corpus.getSemanticSpace().getVk().times(this.corpus.getSemanticSpace().getSk());
m = MatrixUtils.normalizeRows(m);
m = m.times(m.transpose());
for (int doc1 = 0; doc1 < m.getRowDimension(); doc1++) {
for (int doc2 = doc1 + 1; doc2 < m.getRowDimension(); doc2++) {
if (Math.abs(doc1 - doc2) == 1) {
PassageDistancesResult result = new PassageDistancesResult();
result.setDistance(m.get(doc1, doc2));
result.setDocumentAId(doc1);
result.setDocumentBId(doc2);
averageDistance += result.getDistance();
this.results.add(result);
}
}
}
this.summaryResult = averageDistance / results.size();
Collections.sort(this.results,
new Comparator<PassageDistancesResult>() {
@Override
public int compare(
final PassageDistancesResult arg0,
final PassageDistancesResult arg1) {
return arg0.getDocumentAId() - arg1.getDocumentBId();
}
});
super.end();
}
}