package at.lux.retrieval.suffixtreemodel;
import at.lux.fotoretrieval.lucene.Path;
import at.lux.fotoretrieval.lucene.Graph;
import at.lux.fotoretrieval.lucene.GraphPathExtractor;
import junit.framework.TestCase;
import org.apache.lucene.index.IndexReader;
import java.io.IOException;
import java.text.DecimalFormat;
/**
* <p/>
* Date: 15.02.2006 <br>
* Time: 20:47:52 <br>
* Know-Center Graz, Inffeldgasse 21a, 8010 Graz, AUSTRIA <br>
*
* @author Mathias Lux, mlux@know-center.at
*/
public class SuffixTreeTest extends TestCase {
final static String GRAPH1 = "[9] [15] [26] [30] [31] [32] [locationOf 30 9] [locationOf 9 32] [locationOf 9 31] [locationOf 32 30] [locationOf 9 15] [locationOf 9 26] [timeOf 31 30]";
final static String GRAPH2 = "[9] [15] [26] [30] [31] [locationOf 30 9] [locationOf 9 31] [locationOf 9 15] [locationOf 9 26] [timeOf 31 30]";
public void testPathExtraction() {
Graph g = new Graph(GRAPH1);
for (int j = 1; j < 7; j++) {
Path[] paths = GraphPathExtractor.extractPaths(g.toString(), j);
System.out.println("Path length " + j);
for (int i = 0; i < paths.length; i++) {
Path path = paths[i];
System.out.println(path);
}
System.out.println("---");
}
Path[] paths = GraphPathExtractor.extractPaths(g.toString(), -1);
System.out.println("Path of all lengths");
for (int i = 0; i < paths.length; i++) {
Path path = paths[i];
System.out.println(path);
}
System.out.println("---");
}
public void testStcOnSameGraph() {
Graph g = new Graph(GRAPH1);
SuffixTree st = new SuffixTree();
st.addDocument(createSuffixTreeDocument(g));
st.addDocument(createSuffixTreeDocument(g));
double similarity = st.getSimilarity(SuffixTree.SimilarityType.Unweighted);
System.out.println("Unweighted similarity = " + similarity);
assertTrue(similarity == 1);
st.resetSimilarity();
st.addDocument(createSuffixTreeDocument(g));
st.addDocument(createSuffixTreeDocument(g));
assertTrue(similarity == st.getSimilarity());
similarity = st.getSimilarity(SuffixTree.SimilarityType.TermFrequency);
System.out.println("TermFrequency similarity = " + similarity);
assertTrue(similarity == 1);
}
public void testStcOnTwoGraphs() {
SuffixTree st = new SuffixTree();
st.addDocument(createSuffixTreeDocument(new Graph(GRAPH1)));
st.addDocument(createSuffixTreeDocument(new Graph(GRAPH2)));
double similarity = st.getSimilarity(SuffixTree.SimilarityType.Unweighted);
System.out.println("Unweighted similarity = " + similarity);
// assertTrue(similarity == 1);
similarity = st.getSimilarity(SuffixTree.SimilarityType.TermFrequency);
System.out.println("TermFrequency similarity = " + similarity);
// similarity = st.getSimilarity(SuffixTree.SimilartyType.TFIDF);
// System.out.println("TermFrequency similarity = " + similarity);
// assertTrue(similarity == 1);
}
private String createSuffixTreeDocument(Graph g) {
Path[] paths = GraphPathExtractor.extractPaths(g.toString(), -1);
StringBuilder sb = new StringBuilder(256);
for (int i = 0; i < paths.length; i++) {
Path path = paths[i];
String pathString = path.toString();
sb.append(pathString.substring(1, pathString.length()-1));
sb.append('\n');
}
return sb.toString();
}
public void testTfIDF() throws IOException {
Graph g = new Graph(GRAPH1);
SuffixTree st = new SuffixTree();
st.addCorpusDocument(createSuffixTreeDocument(new Graph(GRAPH1)));
st.addCorpusDocument(createSuffixTreeDocument(new Graph(GRAPH2)));
/*
st.addDocument(createSuffixTreeDocument(g));
st.addDocument(createSuffixTreeDocument(g));
double similarity = st.getSimilarity(SuffixTree.SimilartyType.TFIDF);
System.out.println("similarity = " + similarity);
st.resetSimilarity();
st.addDocument(createSuffixTreeDocument(g));
st.addDocument(createSuffixTreeDocument(new Graph(GRAPH2)));
similarity = st.getSimilarity(SuffixTree.SimilartyType.TFIDF);
System.out.println("similarity = " + similarity);
st.resetSimilarity();
*/
// train on corpus:
IndexReader reader = IndexReader.open("testdata/idx_paths");
System.out.println("Reading graphs from index ...");
for (int i = 0; i< reader.numDocs(); i++) {
Graph g_idx = new Graph(reader.document(i).getField("graph").stringValue());
st.addCorpusDocument(createSuffixTreeDocument(g_idx));
}
System.out.println("Adding docs ...");
st.addDocument(createSuffixTreeDocument(new Graph(reader.document(0).getField("graph").stringValue())));
st.addDocument(createSuffixTreeDocument(new Graph(reader.document(1).getField("graph").stringValue())));
System.out.println("Getting similarity ...");
double similarity = st.getSimilarity(SuffixTree.SimilarityType.TFIDF);
System.out.println("similarity = " + similarity);
double dist[][] = new double[5][5];
DecimalFormat df = (DecimalFormat) DecimalFormat.getInstance();
df.setMaximumFractionDigits(2);
df.setMinimumFractionDigits(2);
for (int i = 0; i < dist.length; i++) {
double[] doubles = dist[i];
for (int j = 0; j < doubles.length; j++) {
st.resetSimilarity();
st.addDocument(createSuffixTreeDocument(new Graph(reader.document(i).getField("graph").stringValue())));
st.addDocument(createSuffixTreeDocument(new Graph(reader.document(j).getField("graph").stringValue())));
doubles[j] = st.getSimilarity(SuffixTree.SimilarityType.TermFrequency);
}
}
for (int i = 0; i < dist.length; i++) {
double[] doubles = dist[i];
for (int j = 0; j < doubles.length; j++) {
double aDouble = doubles[j];
System.out.print(df.format(aDouble) + "\t");
}
System.out.println("");
}
System.out.println("--");
for (int i = 0; i < dist.length; i++) {
double[] doubles = dist[i];
for (int j = 0; j < doubles.length; j++) {
st.resetSimilarity();
st.addDocument(createSuffixTreeDocument(new Graph(reader.document(i).getField("graph").stringValue())));
st.addDocument(createSuffixTreeDocument(new Graph(reader.document(j).getField("graph").stringValue())));
doubles[j] = st.getSimilarity(SuffixTree.SimilarityType.Unweighted);
}
}
for (int i = 0; i < dist.length; i++) {
double[] doubles = dist[i];
for (int j = 0; j < doubles.length; j++) {
double aDouble = doubles[j];
System.out.print(df.format(aDouble) + "\t");
}
System.out.println("");
}
System.out.println("--");
for (int i = 0; i < dist.length; i++) {
double[] doubles = dist[i];
for (int j = 0; j < doubles.length; j++) {
st.resetSimilarity();
st.addDocument(createSuffixTreeDocument(new Graph(reader.document(i).getField("graph").stringValue())));
st.addDocument(createSuffixTreeDocument(new Graph(reader.document(j).getField("graph").stringValue())));
doubles[j] = st.getSimilarity(SuffixTree.SimilarityType.TFIDF);
}
}
for (int i = 0; i < dist.length; i++) {
double[] doubles = dist[i];
for (int j = 0; j < doubles.length; j++) {
double aDouble = doubles[j];
System.out.print(df.format(aDouble) + "\t");
}
System.out.println("");
}
}
}