package org.shanbo.feluca.vectors;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import org.apache.commons.lang3.text.StrBuilder;
import org.shanbo.feluca.data2.HashPartitioner;
import org.shanbo.feluca.data2.Vector;
import org.shanbo.feluca.data2.DataSetInfo.Statistic;
import org.shanbo.feluca.data2.util.BytesUtil;
import org.shanbo.feluca.data2.util.NumericTokenizer;
import org.shanbo.feluca.data2.util.NumericTokenizer.FeatureWeight;
import gnu.trove.list.array.TFloatArrayList;
import gnu.trove.list.array.TIntArrayList;
/**
* libsvm format except the label is real number <p></p> For regression
* @author lgn
*
*/
public class NumberVector extends GeneralVector{
/**
* used for averaging regression value
* @author lgn
*
*/
public static class VectorValueStatistic extends Statistic{
double valueSum = 0;
@Override
public void statAsOne() {
valueSum += ((NumberVector)current).getVectorNumber();
}
@Override
public void statOnFeature(int index) {
}
@Override
public Properties getStatInfo() {
Properties p = new Properties();
p.put(SUM_VEC_VALUE, this.valueSum);
return p;
}
}
public NumberVector(){
this.inputType = VectorType.NUMBER_FID_WEIGHT;
this.outputType = VectorType.NUMBER_FID_WEIGHT;
}
public float getVectorNumber(){
return BytesUtil.bytes2Float(head);
}
@Override
public boolean parseLine(String line) {
if (fids == null){ //don't know
fids = new TIntArrayList(1024);
weights = new TFloatArrayList(1024);
head = new byte[4];
}else{
fids.resetQuick();
weights.resetQuick();
}
NumericTokenizer nt = new NumericTokenizer();
nt.load(line);
BytesUtil.float2Bytes(nt.nextNumber().floatValue(), head);
while(nt.hasNext()){
FeatureWeight nextKeyWeight = nt.nextKeyWeight();
fids.add(nextKeyWeight.getId());
weights.add(nextKeyWeight.getWeight());
}
if (fids.size() == 0){
return false;
}
return true;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder(String.format("%.3f", BytesUtil.bytes2Float(head)));
for(int i = 0 ; i < fids.size(); i++){
sb.append(String.format(" %d:%.4f", fids.getQuick(i), weights.getQuick(i) ));
}
return sb.toString();
}
@Deprecated
public List<Vector> divideByFeature(HashPartitioner partitioner) {
List<Vector> vectors = new ArrayList<Vector>(partitioner.getMaxShards());
List<StrBuilder> lines = new ArrayList<StrBuilder>(partitioner.getMaxShards());
for(int i = 0 ; i < partitioner.getMaxShards(); i++){
vectors.add(new LabelVector());
lines.add(new StrBuilder().append(String.format("%.3f" ,getVectorNumber()))); //label
}
for(int i = 0 ; i < getSize(); i++){
int shardId = partitioner.decideShard(getFId(i));
lines.get(shardId).append(String.format(" %d:%.4f", fids.getQuick(i), weights.getQuick(i) ));
}
for(int i = 0; i < vectors.size(); i++){
vectors.get(i).parseLine(lines.get(i).toString());
}
return vectors;
}
@Override
public void swallow(Vector v) {
// TODO Auto-generated method stub
}
@Override
public List<Statistic> getStat() {
List<Statistic> stats = new ArrayList<Statistic>();
stats.add(new VectorValueStatistic());
stats.add(new BasicStatistic());
return stats;
}
}