package water.fvec;
import java.io.File;
import java.io.IOException;
import water.*;
import water.persist.PersistNFS;
import water.util.FileUtils;
/** A NFS distributed file-backed Vector
* <p>
* Vec will be lazily loaded from the NFS file on-demand. Each machine is
* expected to have the <b>same</b> filesystem view onto a file with the same
* byte contents. Each machine will lazily load only the sections of the file
* that are assigned to that machine. Basically, the file starts striped
* across some globally visible file system (e.g. NFS, or just replicated on
* local disk) and is loaded into memory - again striped across the machines -
* without any network traffic or data-motion.
* <p>
* Useful to "memory map" into RAM large datafiles, often pure text files.
*/
public class NFSFileVec extends FileVec {
/** Make a new NFSFileVec key which holds the filename implicitly. This name
* is used by the Chunks to load data on-demand. Blocking
* @return A NFSFileVec mapped to this file. */
public static NFSFileVec make(File f) {
Futures fs = new Futures();
NFSFileVec nfs = make(f, fs);
fs.blockForPending();
return nfs;
}
public static NFSFileVec make(String fname) throws IOException {
File f = FileUtils.getFile(fname);
return NFSFileVec.make(f);
}
/** Make a new NFSFileVec key which holds the filename implicitly. This name
* is used by the Chunks to load data on-demand.
* @return A NFSFileVec mapped to this file. */
public static NFSFileVec make(File f, Futures fs) {
if( !f.exists() ) throw new IllegalArgumentException("File not found: "+f.toString());
long size = f.length();
Key k = Vec.newKey(PersistNFS.decodeFile(f));
// Insert the top-level FileVec key into the store
NFSFileVec nfs = new NFSFileVec(k,size);
DKV.put(k,nfs,fs);
return nfs;
}
private NFSFileVec(Key key, long len) {super(key,len,Value.NFS);}
}