package crosby.binary;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import com.google.protobuf.ByteString;
/**
* Class for mapping a set of strings to integers, giving frequently occuring
* strings small integers.
*/
public class StringTable {
public StringTable() {
clear();
}
private HashMap<String, Integer> counts;
private HashMap<String, Integer> stringmap;
private String set[];
public void incr(String s) {
if (counts.containsKey(s)) {
counts.put(s, new Integer(counts.get(s).intValue() + 1));
} else {
counts.put(s, new Integer(1));
}
}
/** After the stringtable has been built, return the offset of a string in it.
*
* Note, value '0' is reserved for use as a delimiter and will not be returned.
* @param s
* @return
*/
public int getIndex(String s) {
return stringmap.get(s).intValue();
}
public void finish() {
Comparator<String> comparator = new Comparator<String>() {
@Override
public int compare(final String s1, String s2) {
int diff = counts.get(s2) - counts.get(s1);
return diff;
}
};
set = counts.keySet().toArray(new String[0]);
if (set.length > 0) {
// Sort based on the frequency.
Arrays.sort(set, comparator);
// Each group of keys that serializes to the same number of bytes is
// sorted lexiconographically.
// to maximize deflate compression.
// Don't sort the first array. There's not likely to be much benefit, and we want frequent values to be small.
//Arrays.sort(set, Math.min(0, set.length-1), Math.min(1 << 7, set.length-1));
Arrays.sort(set, Math.min(1 << 7, set.length-1), Math.min(1 << 14,
set.length-1));
Arrays.sort(set, Math.min(1 << 14, set.length-1), Math.min(1 << 21,
set.length-1), comparator);
}
stringmap = new HashMap<String, Integer>(2 * set.length);
for (int i = 0; i < set.length; i++) {
stringmap.put(set[i], new Integer(i+1)); // Index 0 is reserved for use as a delimiter.
}
counts = null;
}
public void clear() {
counts = new HashMap<String, Integer>(100);
stringmap = null;
set = null;
}
public Osmformat.StringTable.Builder serialize() {
Osmformat.StringTable.Builder builder = Osmformat.StringTable
.newBuilder();
builder.addS(ByteString.copyFromUtf8("")); // Add a unused string at offset 0 which is used as a delimiter.
for (int i = 0; i < set.length; i++)
builder.addS(ByteString.copyFromUtf8(set[i]));
return builder;
}
}