package sensim;
import org.apache.pig.EvalFunc;
import org.apache.pig.FuncSpec;
import org.apache.pig.builtin.OutputSchema;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.logicalLayer.FrontendException;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
@OutputSchema("index:long")
public class IndexToIndexMapper extends EvalFunc<Integer> {
private final File afile;
private Map<Integer, Integer> idxMap;
public IndexToIndexMapper( String pathToIdxMap ) {
afile = new File( pathToIdxMap ) ;
}
@Override
public Integer exec(Tuple input) throws IOException {
if (input == null || input.size() == 0 || input.get(0) == null) {
return null ;
}
// check if idxMap exists, else instantiate it
// Note: can't instantiate it in Constructor because afile doesn't exist
// yet, s. make_feature_vectors.pig
if ( idxMap == null ) {
// Note: a Scanner tokenizes input stream by whitespace (per default)
Scanner sc = new Scanner( afile ) ;
idxMap = new HashMap<Integer, Integer>() ;
while ( sc.hasNext() ) {
// pick pairs of tokens
idxMap.put(sc.nextInt(), sc.nextInt()) ;
}
sc.close() ;
}
int id = (Integer)input.get(0) ;
int idx ;
if ( idxMap.containsKey( id )) {
// Note: HashMap.get( key ) returns value
idx = idxMap.get( id ) ;
} else {
return null ;
}
return idx;
}
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
return super.getArgToFuncMapping();
}
}