/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
http://www.cs.umass.edu/~mccallum/mallet
This software is provided under the terms of the Common Public License,
version 1.0, as published by http://www.opensource.org. For further
information, see the file `LICENSE' included with this distribution. */
/**
A FeatureVector for which you can efficiently get the feature with
highest value, and other ranks.
@author Andrew McCallum <a href="mailto:mccallum@cs.umass.edu">mccallum@cs.umass.edu</a>
@author David North <a href="mailto:dtn-mallet@corefiling.co.uk">dtn-mallet@corefiling.co.uk</a>
*/
package cc.mallet.types;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.Collections;
import java.util.LinkedList;
public class RankedFeatureVector extends FeatureVector
{
int[] rankOrder;
private static final int SORTINIT = -1;
int sortedTo = SORTINIT; /* Extent of latest sort */
public RankedFeatureVector (final Alphabet dict,
final int[] indices,
final double[] values)
{
super (dict, indices, values);
}
public RankedFeatureVector (final Alphabet dict, final double[] values)
{
super (dict, values);
}
private static double[] subArray (final double[] a, final int begin, final int length)
{
double[] ret = new double[length];
System.arraycopy(a, begin, ret, 0, length);
return ret;
}
public RankedFeatureVector (final Alphabet dict, final double[] values, final int begin, final int length)
{
super (dict, subArray(values, begin, length));
}
public RankedFeatureVector (final Alphabet dict, final DenseVector v)
{
this (dict, v.values);
}
public RankedFeatureVector (final Alphabet dict, final AugmentableFeatureVector v)
{
super (dict, v.indices, v.values, v.size, v.size,
true, true, true);
}
public RankedFeatureVector (final Alphabet dict, final SparseVector v)
{
super (dict, v.indices, v.values);
}
protected void setRankOrder ()
{
this.rankOrder = new int[values.length];
final java.util.List<EntryWithOriginalIndex> rankedEntries = new LinkedList<EntryWithOriginalIndex>();
for (int i = 0; i < rankOrder.length; i++) {
assert (!Double.isNaN(values[i]));
rankedEntries.add(new EntryWithOriginalIndex(values[i], i));
}
Collections.sort(rankedEntries);
int i = 0;
for (EntryWithOriginalIndex entry : rankedEntries) {
rankOrder[i++] = entry._originalIndex;
}
}
protected void setRankOrder (final int extent, final boolean reset)
{
int sortExtent;
// Set the number of cells to sort, making sure we don't go past the max.
// Since we are using insertion sort, sorting n-1 sorts the whole array.
sortExtent = (extent >= values.length) ? values.length - 1: extent;
if (sortedTo == SORTINIT || reset) { // reinitialize and sort
this.rankOrder = new int[values.length];
for (int i = 0; i < rankOrder.length; i++) {
rankOrder[i] = i;
assert (!Double.isNaN(values[i]));
}
}
// Selection sort
for (int i = sortedTo+1; i <= sortExtent; i++) {
double max = values[rankOrder[i]];
int maxIndex = i;
for(int j = i+1; j < rankOrder.length; j++) {
if (values[rankOrder[j]] > max) {
max = values[rankOrder[j]];
maxIndex = j;
}
}
//swap
int r = rankOrder[maxIndex];
rankOrder[maxIndex] = rankOrder[i];
rankOrder[i] = r;
sortedTo = i;
}
}
//added by Limin Yao, rank the elements ascendingly, the smaller is in the front
protected void setReverseRankOrder (final int extent, final boolean reset)
{
int sortExtent;
// Set the number of cells to sort, making sure we don't go past the max.
// Since we are using insertion sort, sorting n-1 sorts the whole array.
sortExtent = (extent >= values.length) ? values.length - 1: extent;
if (sortedTo == SORTINIT || reset) { // reinitialize and sort
this.rankOrder = new int[values.length];
for (int i = 0; i < rankOrder.length; i++) {
rankOrder[i] = i;
assert (!Double.isNaN(values[i]));
}
}
// Selection sort
for (int i = sortedTo+1; i <= sortExtent; i++) {
double min = values[rankOrder[i]];
int minIndex = i;
for(int j = i+1; j < rankOrder.length; j++) {
if (values[rankOrder[j]] < min) {
min = values[rankOrder[j]];
minIndex = j;
}
}
//swap
int r = rankOrder[minIndex];
rankOrder[minIndex] = rankOrder[i];
rankOrder[i] = r;
sortedTo = i;
}
}
protected void setRankOrder (final int extent) {
setRankOrder(extent, false);
}
public int getMaxValuedIndex ()
{
if (rankOrder == null) {
setRankOrder (0);
}
return getIndexAtRank(0); // was return rankOrder[0];
}
public Object getMaxValuedObject ()
{
return dictionary.lookupObject (getMaxValuedIndex());
}
public int getMaxValuedIndexIn (final FeatureSelection fs)
{
if (fs == null) {
return getMaxValuedIndex();
}
assert (fs.getAlphabet() == dictionary);
// xxx Make this more efficient! I'm pretty sure that Java BitSet's can do this more efficiently
int i = 0;
while (!fs.contains(rankOrder[i])) {
setRankOrder (i);
i++;
}
//System.out.println ("RankedFeatureVector.getMaxValuedIndexIn feature="
//+dictionary.lookupObject(rankOrder[i]));
return getIndexAtRank(i); // was return rankOrder[i]
}
public Object getMaxValuedObjectIn (final FeatureSelection fs)
{
return dictionary.lookupObject (getMaxValuedIndexIn(fs));
}
public double getMaxValue ()
{
if (rankOrder == null) {
setRankOrder (0);
}
return values[rankOrder[0]];
}
public double getMaxValueIn (final FeatureSelection fs)
{
if (fs == null) {
return getMaxValue();
}
int i = 0;
while (!fs.contains(i)) {
setRankOrder (i);
i++;
}
return values[rankOrder[i]];
}
public int getIndexAtRank (final int rank)
{
setRankOrder (rank);
return indexAtLocation(rankOrder[rank]); // was return rankOrder[rank]
}
public Object getObjectAtRank (final int rank)
{
setRankOrder (rank);
return dictionary.lookupObject (getIndexAtRank(rank)); // was return dictionary.lookupObject (rankOrder[rank]);
}
public double getValueAtRank (int rank)
{
if (values == null) {
return 1.0;
}
setRankOrder (rank);
if (rank >= rankOrder.length) {
rank = rankOrder.length -1;
System.err.println("rank larger than rankOrder.length. rank = " + rank + "rankOrder.length = " + rankOrder.length);
}
if (rankOrder[rank] >= values.length) {
System.err.println("rankOrder[rank] out of range.");
return 1.0;
}
return values[rankOrder[rank]];
}
/**
* Prints a human-readable version of this vector, with features listed in ranked order.
* @param out Stream to write to
*/
public void printByRank (final OutputStream out)
{
printByRank(new PrintWriter (new OutputStreamWriter (out), true));
}
/**
* Prints a human-readable version of this vector, with features listed in ranked order.
* @param out Writer to write to
*/
public void printByRank (final PrintWriter out)
{
for (int rank = 0; rank < numLocations (); rank++) {
int idx = getIndexAtRank (rank);
double val = getValueAtRank (rank);
Object obj = dictionary.lookupObject (idx);
out.print (obj+":"+val + " ");
}
}
//added by Limin Yao
public void printTopK (final PrintWriter out, int num)
{
int length = numLocations();
if(num>length) {
num=length;
}
for (int rank = 0; rank < num; rank++) {
int idx = getIndexAtRank (rank);
double val = getValueAtRank (rank);
Object obj = dictionary.lookupObject (idx);
out.print (obj+":"+val + " ");
}
}
public void printLowerK (final PrintWriter out, final int num)
{
int length = numLocations();
assert(num < length);
for (int rank = length-num ; rank < length; rank++) {
int idx = getIndexAtRank (rank);
double val = getValueAtRank (rank);
Object obj = dictionary.lookupObject (idx);
out.print (obj+":"+val + " ");
}
}
public int getRank (final Object o)
{
throw new UnsupportedOperationException ("Not yet implemented");
}
public int getRank (final int index)
{
throw new UnsupportedOperationException ("Not yet implemented");
}
public void set (final int i, final double v)
{
throw new UnsupportedOperationException (RankedFeatureVector.class.getName() + " is immutable");
}
public interface Factory
{
public RankedFeatureVector newRankedFeatureVector (InstanceList ilist);
}
public interface PerLabelFactory
{
public RankedFeatureVector[] newRankedFeatureVectors (InstanceList ilist);
}
private static class EntryWithOriginalIndex implements Comparable<EntryWithOriginalIndex> {
private final double _value;
private final int _originalIndex;
public EntryWithOriginalIndex(final double value, final int originalIndex) {
_value = value;
_originalIndex = originalIndex;
}
/**
* Sort by value. Greater comes to the left of smaller.
*/
public int compareTo(final EntryWithOriginalIndex other) {
return Double.compare(other._value, _value);
}
}
}