package edu.umd.hooka;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.*;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.Text;
import org.apache.log4j.Logger;
public class VocabularyWritable implements Writable, Vocab {
private static final Logger sLogger = Logger.getLogger(VocabularyWritable.class);
ArrayList<String> strings;
HashMap<String, Integer> map;
public VocabularyWritable()
{
strings = new ArrayList<String>();
strings.add("NULL");
map = new HashMap<String, Integer>();
map.put("NULL", new Integer(0));
}
public int size() {
return strings.size();
}
public int addOrGet(String word)
{
Integer i = map.get(word);
if (i == null) {
i = new Integer(strings.size());
strings.add(word);
map.put(word, i);
}
return i.intValue();
}
public int get(String word) {
if(map.get(word)==null){
return -1;
}else{
return map.get(word).intValue();
}
}
public String get(int index) {
return strings.get(index);
}
public void readFields(DataInput in) throws IOException {
int s = in.readInt();
sLogger.info("VOCAB SIZE "+s);
strings = new ArrayList<String>(s);
map = new HashMap<String, Integer>();
Text t = new Text();
for (int i=0; i<s; i++) {
t.readFields(in);
String str = t.toString();
strings.add(i, str);
map.put(str, new Integer(i));
}
}
public void write(DataOutput out) throws IOException {
out.writeInt(strings.size());
Text t= new Text();
for (String s: strings) {
t.set(s);
t.write(out);
}
}
public String toString(){
return strings.toString();
}
}