package org.shanbo.feluca.data2;
import java.util.List;
import org.shanbo.feluca.data2.util.NumericTokenizer;
public class Tuple {
public static enum AlignColumn{
FIRST,
SECOND,
}
public static enum TupleType{
NOT_TUPLE,
ONLY_TWO_COLUMNS,
WEIGHT_TYPE;
}
int firstColumn;
int secondColumn;
public Tuple(int firstColumn, int secondColumn){
this.firstColumn = firstColumn;
this.secondColumn = secondColumn;
}
public int getFirstColumn() {
return firstColumn;
}
public int getSecondColumn() {
return secondColumn;
}
public String payload(){
return "1";
}
/**
* <b>NOT</b> for FID_ONLY format! Because tuple contains a primary ID
* @param tuples
* @return
*/
public String toVectorFormat(List<Tuple> tuples, AlignColumn alignColumn){
StringBuilder builder = new StringBuilder();
if (alignColumn == AlignColumn.FIRST){
builder.append(tuples.get(0).getFirstColumn());
for(Tuple tuple : tuples){
builder.append(" ").append(tuple.getSecondColumn()).append(":").append(tuple.payload());
}
}else{
builder.append(tuples.get(0).getSecondColumn());
for(Tuple tuple : tuples){
builder.append(" ").append(tuple.getFirstColumn()).append(":").append(tuple.payload());
}
}
return builder.toString();
}
/**
* NumericTokenizer is about 30% faster than Splitter-NumericParser
* @param line
*/
public void parseLine(String line){
NumericTokenizer nt = new NumericTokenizer();
nt.load(line);
firstColumn = (Integer)nt.nextNumber();
secondColumn = (Integer)nt.nextNumber();
}
public static Tuple convert(TupleType tupleType, String line){
if (tupleType == TupleType.ONLY_TWO_COLUMNS){
Tuple t = new Tuple(0, 0);
t.parseLine(line);
return t;
}else if (tupleType == TupleType.WEIGHT_TYPE){
WeightTuple wt = new WeightTuple(0, 0, 0);
wt.parseLine(line);
return wt;
}else{
return null;
}
}
public static class WeightTuple extends Tuple{
float weight;
public WeightTuple(int firstColumn, int secondColumn, float weight) {
super(firstColumn, secondColumn);
this.weight = weight;
}
@Override
public String payload() {
return String.format("%.5f", weight);
}
/**
* NumericTokenizer is about 30% faster than Splitter-NumericParser
*/
public void parseLine(String line){
NumericTokenizer nt = new NumericTokenizer();
nt.load(line);
firstColumn = (Integer)nt.nextNumber();
secondColumn = (Integer)nt.nextNumber();
Object value = nt.nextNumber();
weight = value instanceof Float ? (Float)value : (Integer)value;
}
}
}