package water.parser;
import java.util.*;
import java.util.Map.Entry;
import java.util.concurrent.atomic.AtomicInteger;
import water.AutoBuffer;
import water.H2O;
import water.Iced;
import water.nbhm.NonBlockingHashMap;
/**
* Class for tracking enum columns.
*
* Basically a wrapper around non blocking hash map.
* In the first pass, we just collect set of unique strings per column
* (if there are less than H2O.DATA_MAX_FACTOR_LEVELS unique elements).
*
* After pass1, the keys are sorted and indexed alphabetically.
* In the second pass, map is used only for lookup and never updated.
*
* Enum objects are shared among threads on the local nodes!
*
* @author tomasnykodym
*
*/
public final class Enum extends Iced implements Cloneable{
AtomicInteger _id = new AtomicInteger();
int _maxId = -1;
long _nElems;
volatile NonBlockingHashMap<ValueString, Integer> _map;
public Enum(){_map = new NonBlockingHashMap<ValueString, Integer>();}
private Enum(int id, long nElems, NonBlockingHashMap<ValueString,Integer>map){
_id = new AtomicInteger(id);
_nElems = nElems;
_map = map;
}
public Enum clone(){
NonBlockingHashMap<ValueString,Integer> map = _map;
if(map != null)map = (NonBlockingHashMap<ValueString,Integer>)map.clone();
return new Enum(_id.get(),_nElems,map);
}
/**
* Add key to this map (treated as hash set in this case).
* All keys are added with value = 1.
* @param str
*/
public int addKey(ValueString str) {
// _map is shared and be cast to null (if enum is killed) -> grab local copy
NonBlockingHashMap<ValueString, Integer> m = _map;
if( m == null ) return Integer.MAX_VALUE; // Nuked already
Integer res = m.get(str);
if(res != null ) return res; // Recorded already
assert str.get_length() < 65535; // Length limit so 65535 can be used as a sentinel
Integer newVal = new Integer(_id.incrementAndGet());
res = m.putIfAbsent(new ValueString(str.toString()), newVal);
if(res != null)return res;
if(m.size() > H2O.DATA_MAX_FACTOR_LEVELS){
kill();
return Integer.MAX_VALUE;
}
return newVal;
}
public final boolean containsKey(Object key){return _map.containsKey(key);}
public void addKey(String str) {
addKey(new ValueString(str));
}
public int getTokenId(String str) {
return getTokenId(new ValueString(str));
}
public String toString(){
StringBuilder sb = new StringBuilder("{");
for(Entry e: _map.entrySet())sb.append(" " + e.getKey().toString() + "->" + e.getValue().toString());
sb.append(" }");
return sb.toString();
}
public long addedElems(){return _nElems;}
public int getTokenId(ValueString str){
Integer I = _map.get(str);
assert I != null : "missing value! " + str.toString();
return I;
}
public void merge(Enum other){
if( this == other ) return;
if( isKilled() ) return;
if( !other.isKilled() ) { // do the merge
Map<ValueString, Integer> myMap = _map;
Map<ValueString, Integer> otMap = other._map;
if( myMap == otMap ) return;
for( ValueString str : otMap.keySet() )
myMap.put(str, 1);
if( myMap.size() <= H2O.DATA_MAX_FACTOR_LEVELS ) return;
}
kill(); // too many values, enum should be killed!
}
public int maxId(){return _maxId == -1?_id.get():_maxId;}
public int size() { return _map.size(); }
public boolean isKilled() { return _map == null; }
public void kill() { _map = null; }
// assuming single threaded
public ValueString [] computeColumnDomain(){
if( isKilled() ) return null;
ValueString vs[] = _map.keySet().toArray(new ValueString[_map.size()]);
Arrays.sort(vs); // Alpha sort to be nice
for( int j = 0; j < vs.length; ++j )
_map.put(vs[j], j); // Renumber in the map
return vs;
}
// Since this is a *concurrent* hashtable, writing it whilst its being
// updated is tricky. If the table is NOT being updated, then all is written
// as expected. If the table IS being updated we only promise to write the
// Keys that existed at the time the table write began. If elements are
// being deleted, they may be written anyways. If the Values are changing, a
// random Value is written.
@Override public AutoBuffer write( AutoBuffer ab ) {
if( _map == null ) return ab.put1(1); // Killed map marker
ab.put1(0); // Not killed
ab.put4(maxId());
for( ValueString key : _map.keySet() )
ab.put2((char)key.get_length()).putA1(key.get_buf(),key.get_length()).put4(_map.get(key));
return ab.put2((char)65535); // End of map marker
}
@Override public Enum read( AutoBuffer ab ) {
assert _map == null || _map.size()==0;
_map = null;
if( ab.get1() == 1 ) return this; // Killed?
_maxId = ab.get4();
_map = new NonBlockingHashMap<ValueString, Integer>();
int len = 0;
while( (len = ab.get2()) != 65535 ) // Read until end-of-map marker
_map.put(new ValueString(ab.getA1(len)),ab.get4());
return this;
}
}