/*
* Copyright 2004-2010 Information & Software Engineering Group (188/1)
* Institute of Software Technology and Interactive Systems
* Vienna University of Technology, Austria
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.ifs.tuwien.ac.at/dm/somtoolbox/license.html
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package at.tuwien.ifs.somtoolbox.data;
import at.tuwien.ifs.somtoolbox.apps.helper.SOMLibInputMerger;
import at.tuwien.ifs.somtoolbox.util.StringUtils;
/**
* This class represents one element or attribute of the {@link TemplateVector}.
*
* @author Rudolf Mayer
* @version $Id: TemplateVectorElement.java 3883 2010-11-02 17:13:23Z frank $
*/
public class TemplateVectorElement implements Comparable<TemplateVectorElement> {
private final TemplateVector tv;
/**
* The label or name associated with this attribute.
*/
private String label;
/**
* Indicates in how many documents or feature vectors this attribute is present, i.e. has an input vector value <>
* 0.
*/
private int documentFrequency = -1;
/**
* The term frequency in the whole collection - how often does this attribute show up in the whole collection of
* feature vectors, i.e. a counter for the attribute, the sum of all values of the attribute (sum across all feature
* vectors).
*/
private int collectionTermFrequency = -1;
/**
* Minimum value of this attribute in the collection of feature vectors.
*/
private int minimumTermFrequency = -1;
/**
* Maximum value of this attribute in the collection of feature vectors.
*/
private int maximumTermFrequency = -1;
/**
* Mean value of this attribute in the collection of feature vectors.
*/
private double meanTermFrequency = -1;
/**
* Optional comment for this attribute.
*/
private String comment = null;
private int index;
public int getIndex() {
return index;
}
public TemplateVectorElement(TemplateVector tv, String label, int index) {
this(tv, label, index, -1, -1);
}
public TemplateVectorElement(TemplateVector tv, String label, int index, int documentFrequency,
int documentTermFrequency) {
this.tv = tv;
this.index = index;
this.label = label;
this.documentFrequency = documentFrequency;
this.collectionTermFrequency = documentTermFrequency;
}
/**
* Gets the document frequency.
*
* @return the document frequency
*/
public int getDocumentFrequency() {
return documentFrequency;
}
public void setDocumentFrequency(int documentFrequency) {
this.documentFrequency = documentFrequency;
}
/**
* Gets the term frequency in the whole collection.
*
* @return the frequency of this term in the whole collection
*/
public int getCollectionTermFrequency() {
return collectionTermFrequency;
}
public void setCollectionTermFrequency(int collectionTermFrequency) {
this.collectionTermFrequency = collectionTermFrequency;
}
/**
* Gets the label.
*
* @return the label of this attribute
*/
public String getLabel() {
return label;
}
protected void setLabel(String label) {
this.label = label;
}
/**
* Gets the comment.
*
* @return the comment attached to this attribute
*/
public String getComment() {
return comment;
}
public void setComment(String comment) {
this.comment = comment;
}
/**
* Gets the maximum tf.
*
* @return the maximum value of this attribute in the collection of feature vectors
*/
public int getMaximumTermFrequency() {
return maximumTermFrequency;
}
public void setMaximumTermFrequency(int maximumTermFrequency) {
this.maximumTermFrequency = maximumTermFrequency;
}
/**
* Gets the mean tf.
*
* @return the mean value of this attribute in the collection of feature vectors
*/
public double getMeanTermFrequency() {
return meanTermFrequency;
}
public void setMeanTermFrequency(double meanTermFrequency) {
this.meanTermFrequency = meanTermFrequency;
}
/**
* Gets the minimum tf.
*
* @return the minimum value of this attribute in the collection of feature vectors
*/
public int getMinimumTermFrequency() {
return minimumTermFrequency;
}
public void setMinimumTermFrequency(int minimumTermFrequency) {
this.minimumTermFrequency = minimumTermFrequency;
}
/**
* Compares two {@link TemplateVectorElement}s by comparing the two labels.
*
* @see String#compareTo(String)
*/
@Override
public int compareTo(TemplateVectorElement o) {
return label.compareTo(o.getLabel());
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append(label).append(StringUtils.getSpaces(tv.getLongestStringLength() - label.length()));
if (documentFrequency != -1) {
sb.append("\tdf: " + documentFrequency);
}
if (collectionTermFrequency != -1) {
sb.append("\ttf: " + collectionTermFrequency);
}
if (minimumTermFrequency != -1) {
sb.append("\tminTf: " + minimumTermFrequency);
}
if (maximumTermFrequency != -1) {
sb.append("\tmaxTf: " + maximumTermFrequency);
}
if (meanTermFrequency != -1) {
sb.append("\tmeanTf: " + StringUtils.format(meanTermFrequency, 1));
}
if (org.apache.commons.lang.StringUtils.isNotBlank(comment)) {
sb.append("\tcomment: " + comment);
}
return sb.toString();
}
/**
* Merge the statistical information of the current template vector element with another element, used e.g. in
* {@link SOMLibInputMerger}.
*/
public void mergeStatiscticsWithOtherElement(TemplateVectorElement other) {
int TF = getCollectionTermFrequency() == -1 ? 0 : getCollectionTermFrequency();
int df = getDocumentFrequency() == -1 ? 0 : getDocumentFrequency();
int maxFreq = getMaximumTermFrequency() == -1 ? 0 : getMaximumTermFrequency();
int minFreq = getMinimumTermFrequency() == -1 ? Integer.MAX_VALUE : getMinimumTermFrequency();
double meanFreq = getMeanTermFrequency() == -1 ? 0 : getMeanTermFrequency();
setCollectionTermFrequency(TF + other.getCollectionTermFrequency());
setDocumentFrequency(df + other.getDocumentFrequency());
setMaximumTermFrequency(Math.max(maxFreq, other.getMaximumTermFrequency()));
setMeanTermFrequency((tv.numVectors() * meanFreq + other.tv.numVectors() * other.meanTermFrequency)
/ (tv.numVectors() + other.tv.numVectors()));
setMinimumTermFrequency(Math.min(minFreq, other.getMinimumTermFrequency()));
}
/** Returns the {@link TemplateVector} this element is associated to. */
public TemplateVector getTemplateVector() {
return tv;
}
}