/*
* Copyright 2004-2010 Information & Software Engineering Group (188/1)
* Institute of Software Technology and Interactive Systems
* Vienna University of Technology, Austria
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.ifs.tuwien.ac.at/dm/somtoolbox/license.html
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package at.tuwien.ifs.somtoolbox.data;
import java.util.ArrayList;
import java.util.Hashtable;
import at.tuwien.ifs.somtoolbox.util.StringUtils;
import at.tuwien.ifs.somtoolbox.util.VectorTools;
/**
* This abstract implementation provides basic support for operating on a {@link TemplateVector}. Sub-classes have to
* implement constructors and methods to read and create a template vector, e.g. from a file or a database.
*
* @author Michael Dittenbach
* @author Rudolf Mayer
* @version $Id: AbstractSOMLibTemplateVector.java 3883 2010-11-02 17:13:23Z frank $
*/
public abstract class AbstractSOMLibTemplateVector implements TemplateVector {
/**
* The dimension of the template vector, i.e. the number of attributes.
*/
protected int dim = 0;
protected int numInfo = 0;
protected int numVectors = 0;
protected String templateFileName = null;
/**
* The attributes of the template vector.
*/
protected TemplateVectorElement[] elements = null;
/**
* A mapping label --> attribute to allow fast access.
*/
protected Hashtable<String, TemplateVectorElement> elementMap = new Hashtable<String, TemplateVectorElement>();
protected int longestStringLength = -1;
@Override
public int dim() {
return dim;
}
@Override
public int numVectors() {
return numVectors;
}
@Override
public int numinfo() {
return numInfo;
}
@Override
public String getLabel(int i) {
return elements[i].getLabel();
}
@Override
public String[] getLabels() {
String[] res = new String[elements.length];
for (int i = 0; i < res.length; i++) {
res[i] = elements[i].getLabel();
}
return res;
}
@Override
public ArrayList<String> getLabelsAsList() {
ArrayList<String> res = new ArrayList<String>(elements.length);
for (TemplateVectorElement element : elements) {
res.add(element.getLabel());
}
return res;
}
@Override
public int getIndexOfFeature(String label) {
if (containsLabel(label)) {
return elementMap.get(label).getIndex();
} else {
return -1;
}
}
public int getIndex(String label) {
TemplateVectorElement templateVectorElement = elementMap.get(label);
if (templateVectorElement != null) {
return templateVectorElement.getIndex();
} else {
return -1;
}
}
public TemplateVectorElement getElement(String label) {
return elementMap.get(label);
}
@Override
public boolean containsLabel(String label) {
return elementMap.containsKey(label);
}
/**
* @param label the name of the term.
* @return The document frequency of the given term
*/
public int getDocumentFrequency(String label) {
return elementMap.get(label).getDocumentFrequency();
}
/**
* @param queryTerms A map containing <label, frequency> pairs for each term.
* @return A vector according to the tfxidf weighting scheme
*/
public double[] getTFxIDFVectorFromTerms(Hashtable<String, Integer> queryTerms) {
double[] vector = new double[dim];
for (int i = 0; i < dim; i++) {
if (queryTerms.get(elements[i].getLabel()) != null) {
double tf = queryTerms.get(elements[i].getLabel()).intValue();
vector[i] = tf * Math.log((double) elements.length / (double) elements[i].getDocumentFrequency());
} else {
vector[i] = 0;
}
}
// FIXME: normalise only when input is normalised?
vector = VectorTools.normaliseVectorToUnitLength(vector);
return vector;
}
@Override
public TemplateVectorElement getElement(int index) {
return elements[index];
}
@Override
public int getLongestStringLength() {
if (longestStringLength == -1) {
longestStringLength = StringUtils.getLongestStringLength(elementMap.keySet());
}
return longestStringLength;
}
@Override
public void incNumVectors(int numVectors) {
this.numVectors += numVectors;
}
}