MatrixBasedTermSimilarityNetwork.java example

Explorer
Foundry-master
- Components
/*
 * File:                MatrixBasedTermSimilarityNetwork.java
 * Authors:             Justin Basilico
 * Company:             Sandia National Laboratories
 * Project:             Cognitive Foundry
 * 
 * Copyright March 18, 2009, Sandia Corporation.
 * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive 
 * license for use of this work by or on behalf of the U.S. Government. Export 
 * of this program may require a license from the United States Government. 
 * See CopyrightHistory.txt for complete details.
 * 
 */

package gov.sandia.cognition.text.term.relation;

import gov.sandia.cognition.math.matrix.DimensionalityMismatchException;
import gov.sandia.cognition.math.matrix.Matrix;
import gov.sandia.cognition.math.matrix.VectorEntry;
import gov.sandia.cognition.text.relation.RelationNetwork;
import gov.sandia.cognition.text.term.IndexedTerm;
import gov.sandia.cognition.text.term.Term;
import gov.sandia.cognition.text.term.TermIndex;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.Set;

/**
 * A relation network between terms based on their similarity. The similarity
 * values are stored in an underlying matrix.
 * 
 * @author  Justin Basilico
 * @since   3.0
 */
public class MatrixBasedTermSimilarityNetwork
    implements RelationNetwork<IndexedTerm, IndexedTermSimilarityRelation>
{

    /** The index of terms. */
    protected TermIndex termIndex;

    /** The similarities between terms. */
    protected Matrix similarities;

    /**
     * Creates a new {@code MatrixBasedTermSimilarityNetwork}.
     *
     * @param   termIndex
     *      The index of terms that contains the nodes of the network.
     * @param   similarities
     *      The square matrix of similarities between terms. Must have a number
     *      of rows and columns equal to the number of terms in the term index.
     */
    public MatrixBasedTermSimilarityNetwork(
        final TermIndex termIndex,
        final Matrix similarities)
    {
        super();

        if (similarities.getNumRows() != termIndex.getTermCount() ||
            similarities.getNumColumns() != termIndex.getTermCount())
        {
            throw new DimensionalityMismatchException(
                  "the number of terms in the term index must match the "
                + "dimensions of the square similarities matrix");
        }

        this.setTermIndex(termIndex);
        this.setSimilarities(similarities);
    }

    /**
     * Gets the similarity between the two given terms.
     *
     * @param   sourceTerm
     *      The source term.
     * @param   targetTerm
     *      The target term.
     * @return
     *      The similarity between the two given terms if both exist in the
     *      term index. Otherwise, 0.0 is returned.
     */
    public double getSimilarity(
        final Term sourceTerm,
        final Term targetTerm)
    {
        final IndexedTerm source = this.termIndex.getIndexedTerm(sourceTerm);
        final IndexedTerm target = this.termIndex.getIndexedTerm(targetTerm);

        if (source == null || target == null)
        {
            return 0.0;
        }
        else
        {
            return this.getSimilarity(source, target);
        }
    }

    /**
     * Gets the similarity between the two given terms.
     *
     * @param   source
     *      The source term.
     * @param   target
     *      The target term.
     * @return
     *      The similarity between the two given terms if both exist in the
     *      term index. Otherwise, 0.0 is returned.
     */
    public double getSimilarity(
        final IndexedTerm source,
        final IndexedTerm target)
    {
        if (source == null || target == null)
        {
            return 0.0;
        }
        else
        {
// TODO: Should we enforce that the indexed terms are valid here? IE: That they
// actually match with the term index.
            return this.getSimilarity(source.getIndex(), target.getIndex());
        }
    }

    /**
     * Gets the similarity between the two given terms.
     *
     * @param   sourceIndex
     *      The index of the source term.
     * @param   targetIndex
     *      The index of the target term.
     * @return
     *      The similarity between the two given terms if both exist in the
     *      term index. Otherwise, 0.0 is returned.
     */
    public double getSimilarity(
        final int sourceIndex,
        final int targetIndex)
    {
// TODO: Should we enforce the indices here or just let the matrix barf on them?
        return this.similarities.getElement(sourceIndex, targetIndex);
    }

    public int getObjectCount()
    {
        return this.termIndex.getTermCount();
    }

    public Set<IndexedTerm> getObjects()
    {
        return new LinkedHashSet<IndexedTerm>(this.termIndex.getTerms());
    }

    public boolean isObject(
        final Object o)
    {
        return o != null && o instanceof IndexedTerm &&
            this.termIndex.hasIndexedTerm((IndexedTerm) o);
    }

    public boolean hasRelation(
        final IndexedTerm source,
        final IndexedTerm target)
    {
        return source != null && target != null && this.getSimilarity(source,
            target) != 0.0;
    }

    public IndexedTermSimilarityRelation getRelation(
        final IndexedTerm source,
        final IndexedTerm target)
    {
        if (source == null || target == null)
        {
            return null;
        }

        final int sourceIndex = source.getIndex();
        final int targetIndex = target.getIndex();

        final double similarity = this.similarities.getElement(
            sourceIndex, targetIndex);

        if (similarity != 0.0)
        {
            return new IndexedTermSimilarityRelation(
                source, target, similarity);
        }
        else
        {
            return null;
        }
    }

    public Set<IndexedTermSimilarityRelation> getAllRelations(
        final IndexedTerm source,
        final IndexedTerm target)
    {
        // This is a singleton relationship.
        final IndexedTermSimilarityRelation relation =
            this.getRelation(source, target);

        if (relation == null)
        {
            return Collections.emptySet();
        }
        else
        {
            return Collections.singleton(relation);
        }
    }

    public IndexedTerm getRelationSource(
        final IndexedTermSimilarityRelation relation)
    {
        if (relation == null)
        {
            return null;
        }
        else
        {
            return relation.getSource();
        }
    }

    public IndexedTerm getRelationTarget(
        final IndexedTermSimilarityRelation relation)
    {
        if (relation == null)
        {
            return null;
        }
        else
        {
            return relation.getTarget();
        }
    }

    public Set<IndexedTermSimilarityRelation> relationsOf(
        final IndexedTerm term)
    {
        final LinkedHashSet<IndexedTermSimilarityRelation> result =
            new LinkedHashSet<IndexedTermSimilarityRelation>();

        // Using the linked hash set will remove a redundant self-relation.
        result.addAll(this.relationsFrom(term));
        result.addAll(this.relationsTo(term));
        return result;
    }

    public Set<IndexedTermSimilarityRelation> relationsFrom(
        final IndexedTerm source)
    {
        final int sourceIndex = source.getIndex();

        final LinkedHashSet<IndexedTermSimilarityRelation> result =
            new LinkedHashSet<IndexedTermSimilarityRelation>();

        // Walk the rows of the matrix to get the relation.
        for (VectorEntry entry : this.similarities.getRow(sourceIndex))
        {
            final double similarity = entry.getValue();

            if (similarity != 0.0)
            {
                final IndexedTerm target = this.termIndex.getIndexedTerm(
                    entry.getIndex());
                result.add(new IndexedTermSimilarityRelation(source, target,
                    similarity));
            }
        // else - We ignore zero similarities.
        }
        return result;
    }

    public Set<IndexedTermSimilarityRelation> relationsTo(
        final IndexedTerm target)
    {
        final int targetIndex = target.getIndex();

        final LinkedHashSet<IndexedTermSimilarityRelation> result =
            new LinkedHashSet<IndexedTermSimilarityRelation>();

        // Walk the columns of the matrix to get the relation.
        for (VectorEntry entry : this.similarities.getColumn(targetIndex))
        {
            final double similarity = entry.getValue();

            if (similarity != 0.0)
            {
                final IndexedTerm source = this.termIndex.getIndexedTerm(
                    entry.getIndex());
                result.add(new IndexedTermSimilarityRelation(source, target,
                    similarity));
            }
        // else - We ignore zero similarities.
        }
        return result;
    }

    /**
     * Gets the index of terms.
     *
     * @return
     *      The index of terms.
     */
    public TermIndex getTermIndex()
    {
        return this.termIndex;
    }

    /**
     * Sets the index of terms.
     *
     * @param   termIndex
     *      The index of terms.
     */
    protected void setTermIndex(
        final TermIndex termIndex)
    {
        this.termIndex = termIndex;
    }

    /**
     * Gets the similarities between terms.
     *
     * @return
     *      The similarities between terms.
     */
    public Matrix getSimilarities()
    {
        return this.similarities;
    }

    /**
     * Gets the similarities between terms.
     *
     * @param   similarities
     *      The similarities between terms.
     */
    protected void setSimilarities(
        final Matrix similarities)
    {
        this.similarities = similarities;
    }

}