StcNode.java example

Explorer

CaliphEmir-master
- caliphemir
  - src
    - at
  - test
    - at
      - lux
        fotoannotation
        ComponentsTest.java
        FileTreeTest.java
        fotoretrieval
        FastMapGraphTest.java
        FastMapTest.java
        GraphConstructionTest.java
        PointPanel.java
        SimilarityMatrixTest.java
        lucene
        GraphAnalyzerTest.java
        LabeledGraphTest.java
        panels
        TestConfigurationDialog.java
        retrievalengines
        GraphTest.java
        LucenePathIndexRetrievalEngineTest.java
        LuceneRetrievalEngineTest.java
        graphviz
        SpringEmbedderTest.java
        SpringEmbedderVis.java
        imageanalysis
        ColorLayoutTest.java
        ColorStructureTest.java
        ColorTest.java
        DominantColorTest.java
        EdgeHistogramTest.java
        ScalableColorTest.java
        db
        DerbyTest.java
        imaging
        BmpReaderTest.java
        PpmReaderTest.java
        retrieval
        StcTest.java
        evaluation
        SuffixTreeEvaluation.java
        graphisomorphism
        FastSubgraphIsomorphismTest.java
        SubgraphIsomorphismTest.java
        metrics
        BooleanNodeDistanceFunctionTest.java
        SimpleEdgeDistanceFunctionTest.java
        TermVectorNodeDistanceFunctionTest.java
        suffixtreemodel
        SuffixTreeTest.java
        vectorspace
        ElementTextVectorSimilarityTest.java
        GraphVectorSimilarityTest.java

/*
 * This file is part of Caliph & Emir.
 *
 * Caliph & Emir is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * Caliph & Emir is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Caliph & Emir; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 * Copyright statement:
 * --------------------
 * (c) 2002-2005 by Mathias Lux (mathias@juggle.at)
 * http://www.juggle.at, http://caliph-emir.sourceforge.net
 */
package at.lux.retrieval.clustering.suffixtree;

import at.lux.retrieval.clustering.AbstractSuffixTree;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;

/**
 * @author Mathias Lux, mathias@juggle.at
 *         Date: 03.06.2004
 *         Time: 15:30:41
 */

public class StcNode {
    private HashMap<String, StcNode> children = null;
    private HashSet<StcDocument> documents = null;
    // using as cache:
    private HashSet<StcDocument> allChildrensDocuments = null;

    public StcNode() {
        children = new HashMap<String, StcNode>();
        documents = new HashSet<StcDocument>();
        allChildrensDocuments = new HashSet<StcDocument>();
    }

    public void add(List<String> phrase, StcDocument stcDocument, int suffix, String prefix, AbstractSuffixTree tree) {
        // save state of being a base cluster:
        boolean isBaseClusterBeforeInsertion = tree.getNodeToClusterIndex().containsKey(this);
        if (isBaseClusterBeforeInsertion == true) {
            // Cluster has already been BaseCluster, so delete it before changes occur!!
            BaseCluster cluster = tree.getNodeToClusterIndex().remove(this); // remove from index and retrieve BaseCluster
            assert cluster != null;
            tree.getSimilarityMatrix().removeBaseCluster(cluster);
            boolean clusterRemove = tree.getBaseClusters().remove(cluster);
            assert clusterRemove;  // remove from tree
            tree.getTouchedClusters().remove(cluster);
        }

        // add new phrase:
        if (phrase.size() > 0) {
            String temp = phrase.get(0);
            String newPrefix = new String(prefix);
            if (newPrefix.length() > 0) {
                newPrefix += " " + phrase.remove(0).trim();
            } else {
                newPrefix += phrase.remove(0).trim();
            }
            if (children.containsKey(temp)) {
                // edge with string already exists -> add document
                children.get(temp).add(phrase, stcDocument, suffix, newPrefix, tree);
            } else {
                // edge does not exist -> create -> add document
                StcNode n = new StcNode();
                n.add(phrase, stcDocument, suffix, newPrefix, tree);
                children.put(temp, n);
            }
            // afterwards: update the childrensDocuments:
            allChildrensDocuments.addAll(children.get(temp).getAllChildrensDocuments());
//            allChildrensDocuments.addAll(children.get(temp).documents);
        } else { // add new StcDocument ...
            StcDocument doc = stcDocument;
            documents.add(doc);
        }

        // check if being a base cluster has been changed:
        if (isBaseClusterCandidate() == true && prefix.length() > 0) {
            // the node has changed and became a base cluster.
            // so we have to put it into the newly created base cluster treeset :)
            BaseCluster cluster = new BaseCluster(getAllChildrensDocuments(), this, prefix, tree.getIndex(), tree.getStopwords());
            if (cluster.getScore() > 0.0) {
                boolean addSuccessful = tree.getBaseClusters().add(cluster);
                assert addSuccessful;
                BaseCluster previousValue = tree.getNodeToClusterIndex().put(this, cluster);
                assert previousValue == null;
                boolean addToTouchedClusters = tree.getTouchedClusters().add(cluster);
                assert addToTouchedClusters;
                // we have to add it to the similarity matrix, so the values can be recalculated:
                tree.getSimilarityMatrix().addBaseCluster(cluster);
            }
        } else {
            // this is only a consistency check though there shouldn't be changes from being
            // a base cluster to being none.
            BaseCluster wasCluster = tree.getNodeToClusterIndex().get(this);
            assert wasCluster == null;
        }
    }

    /**
     * Returns all documents of itself and all descending nodes.
     *
     * @return
     */
    private HashSet<StcDocument> getAllChildrensDocuments() {
        // obviously we don't want this being null, cause this would kill performance :)
        if (allChildrensDocuments == null) {
            allChildrensDocuments = new HashSet<StcDocument>();
//            allChildrensDocuments.addAll(documents);
            Iterator<String> childrenIterator = children.keySet().iterator();
            while (childrenIterator.hasNext()) {
                String s = childrenIterator.next();
                StcNode n = children.get(s);
                allChildrensDocuments.addAll(n.getAllChildrensDocuments());
//                allChildrensDocuments.addAll(n.documents);
            }
        }
        HashSet<StcDocument> result = new HashSet<StcDocument>(allChildrensDocuments);
        result.addAll(documents);
        return result;
    }

    /**
     * Checks wether a node can be a base cluster
     *
     * @return true if it is a base cluster, false if not.
     */
    private boolean isBaseClusterCandidate() {
        boolean result = false;
        if (allChildrensDocuments.size() + documents.size() > 1 && !allChildrensDocuments.containsAll(documents) && (children.size() + documents.size() > 1)) {
            result = true;
        }
        return result;
    }

}