package water.fvec;
import water.*;
import water.util.Log;
import water.util.MathUtils;
import water.util.UnsafeUtils;
public abstract class FileVec extends ByteVec {
long _len; // File length
final byte _be;
// Returns String with path for given key.
public static String getPathForKey(Key k) {
final int off = k._kb[0]==Key.CHK || k._kb[0]==Key.VEC ? Vec.KEY_PREFIX_LEN : 0;
String p = new String(k._kb,off,k._kb.length-off);
if(p.startsWith("nfs:/"))
p = p.substring("nfs:/".length());
else if (p.startsWith("nfs:\\"))
p = p.substring("nfs:\\".length());
return p;
}
/** Log-2 of Chunk size. */
public static final int DFLT_LOG2_CHUNK_SIZE = 20/*1Meg*/+2/*4Meg*/;
/** Default Chunk size in bytes, useful when breaking up large arrays into
* "bite-sized" chunks. Bigger increases batch sizes, lowers overhead
* costs, lower increases fine-grained parallelism. */
public static final int DFLT_CHUNK_SIZE = 1 << DFLT_LOG2_CHUNK_SIZE;
public int _chunkSize = DFLT_CHUNK_SIZE;
public int _nChunks = -1;
protected FileVec(Key key, long len, byte be) {
super(key,-1/*no rowLayout*/);
_len = len;
_be = be;
}
public void setNChunks(int n){
_nChunks = n;
setChunkSize((int)length()/n);
}
/**
* Chunk size must be positive, 1G or less, and a power of two.
* Any values that aren't a power of two will be reduced to the
* first power of two lower than the provided chunkSize.
* <p>
* Since, optimal chunk size is not known during FileVec instantiation,
* setter is required to both set it, and keep it in sync with
* _log2ChkSize.
* </p>
* @param chunkSize requested chunk size to be used when parsing
* @return actual _chunkSize setting
*/
public int setChunkSize(int chunkSize) { return setChunkSize(null, chunkSize); }
public int setChunkSize(Frame fr, int chunkSize) {
// Clear cached chunks first
// Peeking into a file before the chunkSize has been set
// will load chunks of the file in DFLT_CHUNK_SIZE amounts.
// If this side-effect is not reversed when _chunkSize differs
// from the default value, parsing will either double read
// sections (_chunkSize < DFLT_CHUNK_SIZE) or skip data
// (_chunkSize > DFLT_CHUNK_SIZE). This reverses this side-effect.
Futures fs = new Futures();
Keyed.remove(_key, fs);
fs.blockForPending();
if (chunkSize <= 0) throw new IllegalArgumentException("Chunk sizes must be > 0.");
if (chunkSize > (1<<30) ) throw new IllegalArgumentException("Chunk sizes must be < 1G.");
_chunkSize = chunkSize;
//Now reset the chunk size on each node
fs = new Futures();
DKV.put(_key, this, fs);
// also update Frame to invalidate local caches
if (fr != null ) {
fr.reloadVecs();
DKV.put(fr._key, fr, fs);
}
fs.blockForPending();
return _chunkSize;
}
@Override public long length() { return _len; }
@Override public int nChunks() {
if(_nChunks != -1) // number of chunks can be set explicitly
return _nChunks;
return (int)Math.max(1,_len / _chunkSize + ((_len % _chunkSize != 0)?1:0));
}
@Override public boolean writable() { return false; }
/** Size of vector data. */
@Override public long byteSize(){return length(); }
// Convert a row# to a chunk#. For constant-sized chunks this is a little
// shift-and-add math. For variable-sized chunks this is a binary search,
// with a sane API (JDK has an insane API).
@Override
public int elem2ChunkIdx(long i) {
assert 0 <= i && i <= _len : " "+i+" < "+_len;
int cidx = (int)(i/_chunkSize);
int nc = nChunks();
if( i >= _len ) return nc;
if( cidx >= nc ) cidx=nc-1; // Last chunk is larger
assert 0 <= cidx && cidx < nc;
return cidx;
}
// Convert a chunk-index into a starting row #. Constant sized chunks
// (except for the last, which might be a little larger), and size-1 rows so
// this is a little shift-n-add math.
@Override long chunk2StartElem( int cidx ) { return (long)cidx*_chunkSize; }
/** Convert a chunk-key to a file offset. Size 1-byte "rows", so this is a
* direct conversion.
* @return The file offset corresponding to this Chunk index */
public static long chunkOffset ( Key ckey ) { return (long)chunkIdx(ckey)*((FileVec)Vec.getVecKey(ckey).get())._chunkSize; }
// Reverse: convert a chunk-key into a cidx
static int chunkIdx(Key ckey) { assert ckey._kb[0]==Key.CHK; return UnsafeUtils.get4(ckey._kb, 1 + 1 + 4); }
// Convert a chunk# into a chunk - does lazy-chunk creation. As chunks are
// asked-for the first time, we make the Key and an empty backing DVec.
// Touching the DVec will force the file load.
@Override public Value chunkIdx( int cidx ) {
final long nchk = nChunks();
assert 0 <= cidx && cidx < nchk;
Key dkey = chunkKey(cidx);
Value val1 = DKV.get(dkey);// Check for an existing one... will fetch data as needed
if( val1 != null ) return val1; // Found an existing one?
// Lazily create a DVec for this chunk
int len = (int)(cidx < nchk-1 ? _chunkSize : (_len-chunk2StartElem(cidx)));
// DVec is just the raw file data with a null-compression scheme
Value val2 = new Value(dkey,len,null,TypeMap.C1NCHUNK,_be);
val2.setDsk(); // It is already on disk.
// If not-home, then block till the Key is everywhere. Most calls here are
// from the parser loading a text file, and the parser splits the work such
// that most puts here are on home - so this is a simple speed optimization:
// do not make a Futures nor block on it on home.
Futures fs = dkey.home() ? null : new Futures();
// Atomically insert: fails on a race, but then return the old version
Value val3 = DKV.DputIfMatch(dkey,val2,null,fs);
if( !dkey.home() && fs != null ) fs.blockForPending();
return val3 == null ? val2 : val3;
}
/**
* Calculates safe and hopefully optimal chunk sizes. Four cases
* exist.
* <p>
* very small data < 64K per core - uses default chunk size and
* all data will be in one chunk
* <p>
* small data - data is partitioned into chunks that at least
* 4 chunks per core to help keep all cores loaded
* <p>
* default - chunks are {@value #DFLT_CHUNK_SIZE}
* <p>
* large data - if the data would create more than 2M keys per
* node, then chunk sizes larger than DFLT_CHUNK_SIZE are issued.
* <p>
* Too many keys can create enough overhead to blow out memory in
* large data parsing. # keys = (parseSize / chunkSize) * numCols.
* Key limit of 2M is a guessed "reasonable" number.
*
* @param totalSize - parse size in bytes (across all files to be parsed)
* @param numCols - number of columns expected in dataset
* @param cores - number of processing cores per node
* @param cloudsize - number of compute nodes
* @param verbose - print the parse heuristics
* @return - optimal chunk size in bytes (always a power of 2).
*/
public static int calcOptimalChunkSize(long totalSize, int numCols, long maxLineLength, int cores, int cloudsize,
boolean oldHeuristic, boolean verbose) {
long localParseSize = (long) (double) totalSize / cloudsize;
if (oldHeuristic) {
long chunkSize = (localParseSize / (cores * 4));
// Super small data check - less than 64K/thread
if (chunkSize <= (1 << 16)) {
return DFLT_CHUNK_SIZE;
}
// Small data check
chunkSize = 1L << MathUtils.log2(chunkSize); //closest power of 2
if (chunkSize < DFLT_CHUNK_SIZE
&& (localParseSize/chunkSize)*numCols < (1 << 21)) { // ignore if col cnt is high
return (int)chunkSize;
}
// Big data check
long tmp = (localParseSize * numCols / (1 << 21)); // ~ 2M keys per node
if (tmp > (1 << 30)) return (1 << 30); // Max limit is 1G
if (tmp > DFLT_CHUNK_SIZE) {
chunkSize = 1 << MathUtils.log2((int) tmp); //closest power of 2
return (int)chunkSize;
} else return DFLT_CHUNK_SIZE;
}
else {
// New Heuristic
int minNumberRows = 10; // need at least 10 rows (lines) per chunk (core)
int perNodeChunkCountLimit = 1<<21; // don't create more than 2M Chunk POJOs per node
int minParseChunkSize = 1<<12; // don't read less than this many bytes
int maxParseChunkSize = (1<<28)-1; // don't read more than this many bytes per map() thread (needs to fit into a Value object)
long chunkSize = Math.max((localParseSize / (4*cores))+1, minParseChunkSize); //lower hard limit
if(chunkSize > 1024*1024)
chunkSize = (chunkSize & 0xFFFFFE00) + 512; // align chunk size to 512B
// Super small data check - file size is smaller than 64kB
if (totalSize <= 1<<16) {
chunkSize = Math.max(DFLT_CHUNK_SIZE, (int) (minNumberRows * maxLineLength));
} else {
//round down to closest power of 2
// chunkSize = 1L << MathUtils.log2(chunkSize);
// Small data check
if (chunkSize < DFLT_CHUNK_SIZE && (localParseSize / chunkSize) * numCols < perNodeChunkCountLimit) {
chunkSize = Math.max((int)chunkSize, (int) (minNumberRows * maxLineLength));
} else {
// Adjust chunkSize such that we don't create too many chunks
int chunkCount = cores * 4 * numCols;
if (chunkCount > perNodeChunkCountLimit) {
double ratio = 1 << Math.max(2, MathUtils.log2((int) (double) chunkCount / perNodeChunkCountLimit)); //this times too many chunks globally on the cluster
chunkSize *= ratio; //need to bite off larger chunks
}
chunkSize = Math.min(maxParseChunkSize, chunkSize); // hard upper limit
// if we can read at least minNumberRows and we don't create too large Chunk POJOs, we're done
// else, fix it with a catch-all heuristic
if (chunkSize <= minNumberRows * maxLineLength) {
// might be more than default, if the max line length needs it, but no more than the size limit(s)
// also, don't ever create too large chunks
chunkSize = (int) Math.max(
DFLT_CHUNK_SIZE, //default chunk size is a good lower limit for big data
Math.min(maxParseChunkSize, minNumberRows * maxLineLength) //don't read more than 1GB, but enough to read the minimum number of rows
);
}
}
}
assert(chunkSize >= minParseChunkSize);
assert(chunkSize <= maxParseChunkSize);
if (verbose)
Log.info("ParseSetup heuristic: "
+ "cloudSize: " + cloudsize
+ ", cores: " + cores
+ ", numCols: " + numCols
+ ", maxLineLength: " + maxLineLength
+ ", totalSize: " + totalSize
+ ", localParseSize: " + localParseSize
+ ", chunkSize: " + chunkSize
+ ", numChunks: " + Math.max(1,totalSize/chunkSize)
+ ", numChunks * cols: " + (Math.max(1,totalSize/chunkSize) * numCols)
);
return (int)chunkSize;
}
}
}