/*
* This file is part of Caliph & Emir.
*
* Caliph & Emir is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* Caliph & Emir is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Caliph & Emir; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Copyright statement:
* --------------------
* (c) 2002-2005 by Mathias Lux (mathias@juggle.at)
* http://www.juggle.at, http://caliph-emir.sourceforge.net
*/
package at.lux.retrieval.clustering;
import at.lux.fotoretrieval.lucene.Relation;
import java.util.ArrayList;
/**
* <p/>
* Date: 18.10.2005 <br>
* Time: 09:32:39 <br>
*
* @author Mathias Lux, mathias@juggle.at
*/
public class PathSuffixTree extends AbstractSuffixTree{
/**
* Defines the way relations are handled in this tree. Either they are left out or they
* are only used as types with no direction indicator, or they are used as they appear in
* the path.
*/
public enum Type {NoRelations, UndirectedRelation, FullRelations}
private Type type = Type.NoRelations;
/**
* Actually we dont need a stopword list for this one. The Default type
* {@link at.lux.retrieval.clustering.PathSuffixTree.Type} NoRelations
* is used.
* @param stopwordlist should be set to null as it is not used.
*/
public PathSuffixTree() {
super();
MIN_SENTENCE_SIZE = 0;
}
/**
* Create a new PathSuffixTree of given type.
* @param type
*/
public PathSuffixTree(Type type) {
super();
this.type = type;
}
/**
* The Tokens of the Path are created here. There are two possible ways to
* create the tokens: Either we take the relations names into accoutn or we
* do not. a hybrid solution is just to store the type of the relation and
* not its direction.
* Use {@link at.lux.retrieval.clustering.PathSuffixTree.Type}
* in the constructor to select behaviour.
* @param sentence gives the sentence to tokenize.
* @return the tokens.
*/
protected String[] getTokens(String sentence) {
String[] result = sentence.split("\\s");
if (type == Type.NoRelations) {
// strip all relations from the array:
ArrayList<String> r = new ArrayList<String>(result.length / 2 + 1);
for (int i = 0; i < result.length; i++) {
String node = result[i];
// if the node is a number in square brackets
if (node.matches("\\d+")) {
r.add(node);
}
}
result = r.toArray(new String[r.size()]);
} else if (type == Type.FullRelations) {
ArrayList<String> r = new ArrayList<String>(result.length);
for (int i = 0; i < result.length; i++) {
String node = result[i];
// if the node is a number in square brackets
if (node!=null) {
r.add(node);
}
}
result = r.toArray(new String[r.size()]);
} else if (type == Type.UndirectedRelation) {
// strip all relations from the array:
ArrayList<String> r = new ArrayList<String>(result.length);
for (int i = 0; i < result.length; i++) {
String node = result[i];
// if the node is a number in square brackets
if (node != null && node.matches("\\d+")) {
r.add(node);
} else {
// invert the relation if it is no key of the mapping table.
if (!Relation.relationMapping.containsKey(node))
node = Relation.invertRelationType(node);
r.add(node);
}
}
result = r.toArray(new String[r.size()]);
}
return result;
}
/**
* The paths are provided within a String, where each
* line represents one path.This single path is interpreted as sentence.
* @param phrase
* @return one single path as sentence.
*/
protected String[] getSentences(String phrase) {
String[] result = phrase.split("\\n");
return result;
}
protected String[] filterTokens(String[] tokens) {
return tokens;
}
}