PathSuffixTree.java example

Explorer

CaliphEmir-master
- caliphemir
  - src
    - at
  - test
    - at
      - lux
        fotoannotation
        ComponentsTest.java
        FileTreeTest.java
        fotoretrieval
        FastMapGraphTest.java
        FastMapTest.java
        GraphConstructionTest.java
        PointPanel.java
        SimilarityMatrixTest.java
        lucene
        GraphAnalyzerTest.java
        LabeledGraphTest.java
        panels
        TestConfigurationDialog.java
        retrievalengines
        GraphTest.java
        LucenePathIndexRetrievalEngineTest.java
        LuceneRetrievalEngineTest.java
        graphviz
        SpringEmbedderTest.java
        SpringEmbedderVis.java
        imageanalysis
        ColorLayoutTest.java
        ColorStructureTest.java
        ColorTest.java
        DominantColorTest.java
        EdgeHistogramTest.java
        ScalableColorTest.java
        db
        DerbyTest.java
        imaging
        BmpReaderTest.java
        PpmReaderTest.java
        retrieval
        StcTest.java
        evaluation
        SuffixTreeEvaluation.java
        graphisomorphism
        FastSubgraphIsomorphismTest.java
        SubgraphIsomorphismTest.java
        metrics
        BooleanNodeDistanceFunctionTest.java
        SimpleEdgeDistanceFunctionTest.java
        TermVectorNodeDistanceFunctionTest.java
        suffixtreemodel
        SuffixTreeTest.java
        vectorspace
        ElementTextVectorSimilarityTest.java
        GraphVectorSimilarityTest.java

/*
 * This file is part of Caliph & Emir.
 *
 * Caliph & Emir is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * Caliph & Emir is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Caliph & Emir; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 * Copyright statement:
 * --------------------
 * (c) 2002-2005 by Mathias Lux (mathias@juggle.at)
 * http://www.juggle.at, http://caliph-emir.sourceforge.net
 */
package at.lux.retrieval.clustering;

import at.lux.fotoretrieval.lucene.Relation;

import java.util.ArrayList;

/**
 * <p/>
 * Date: 18.10.2005 <br>
 * Time: 09:32:39 <br>
 *
 * @author Mathias Lux, mathias@juggle.at
 */
public class PathSuffixTree extends AbstractSuffixTree{
    /**
     * Defines the way relations are handled in this tree. Either they are left out or they
     * are only used as types with no direction indicator, or they are used as they appear in
     * the path.
     */
    public enum Type {NoRelations, UndirectedRelation, FullRelations}

    private Type type = Type.NoRelations;


    /**
     * Actually we dont need a stopword list for this one. The Default type
     * {@link at.lux.retrieval.clustering.PathSuffixTree.Type} NoRelations
     * is used.
     * @param stopwordlist should be set to null as it is not used.
     */
    public PathSuffixTree() {
        super();
        MIN_SENTENCE_SIZE = 0;
    }

    /**
     * Create a new PathSuffixTree of given type.
     * @param type
     */
    public PathSuffixTree(Type type) {
        super();
        this.type = type;
    }


    /**
     * The Tokens of the Path are created here. There are two possible ways to
     * create the tokens: Either we take the relations names into accoutn or we
     * do not. a hybrid solution is just to store the type of the relation and
     * not its direction.
     * Use {@link at.lux.retrieval.clustering.PathSuffixTree.Type}
     * in the constructor to select behaviour.
     * @param sentence gives the sentence to tokenize.
     * @return the tokens.
     */
    protected String[] getTokens(String sentence) {
        String[] result = sentence.split("\\s");
        if (type == Type.NoRelations) {
            // strip all relations from the array:
            ArrayList<String> r = new ArrayList<String>(result.length / 2 + 1);
            for (int i = 0; i < result.length; i++) {
                String node = result[i];
                // if the node is a number in square brackets
                if (node.matches("\\d+")) {
                    r.add(node);
                }
            }
            result = r.toArray(new String[r.size()]);
        } else if (type == Type.FullRelations) {
            ArrayList<String> r = new ArrayList<String>(result.length);
            for (int i = 0; i < result.length; i++) {
                String node = result[i];
                // if the node is a number in square brackets
                if (node!=null) {
                    r.add(node);
                }
            }
            result = r.toArray(new String[r.size()]);
        } else if (type == Type.UndirectedRelation) {
            // strip all relations from the array:
            ArrayList<String> r = new ArrayList<String>(result.length);
            for (int i = 0; i < result.length; i++) {
                String node = result[i];
                // if the node is a number in square brackets
                if (node != null && node.matches("\\d+")) {
                    r.add(node);
                } else {
                    // invert the relation if it is no key of the mapping table.
                    if (!Relation.relationMapping.containsKey(node))
                        node = Relation.invertRelationType(node);
                    r.add(node);
                }
            }
            result = r.toArray(new String[r.size()]);
        }
        return result;
    }

    /**
     * The paths are provided within a String, where each
     * line represents one path.This single path is interpreted as sentence.
     * @param phrase
     * @return one single path as sentence.
     */
    protected String[] getSentences(String phrase) {
        String[] result = phrase.split("\\n");
        return result;
    }

    protected String[] filterTokens(String[] tokens) {
        return tokens;
    }

}