package water.util; import java.io.File; import java.util.ArrayList; import water.*; import water.fvec.*; import water.persist.PersistNFS; public class FileIntegrityChecker extends DRemoteTask<FileIntegrityChecker> { final String _root; // Root of directory final String[] _files; // File names found locally final long [] _sizes; // File sizes found locally int[][] _ok; // OUTPUT: files which are globally compatible @Override public void lcompute() { _ok = new int[_files.length][H2O.CLOUD.size()]; for (int i = 0; i < _files.length; ++i) { File f = new File(_files[i]); if (f.exists() && (f.length()==_sizes[i])) _ok[i][H2O.SELF.index()] = 1; } tryComplete(); } @Override public void reduce(FileIntegrityChecker o) { if( _ok == null ) _ok = o._ok; else Utils.add(_ok,o._ok); } @Override public byte priority() { return H2O.GUI_PRIORITY; } private void addFolder(File folder, ArrayList<File> filesInProgress ) { if( !folder.canRead() ) return; if (folder.isDirectory()) { for (File f: folder.listFiles()) { if( !f.canRead() ) continue; // Ignore unreadable files if( f.isHidden() && !folder.isHidden() ) continue; // Do not dive into hidden dirs unless asked if (f.isDirectory()) addFolder(f,filesInProgress); else filesInProgress.add(f); } } else { filesInProgress.add(folder); } } public static FileIntegrityChecker check(File r) { return new FileIntegrityChecker(r).invokeOnAllNodes(); } public FileIntegrityChecker(File root) { _root = PersistNFS.decodeFile(new File(root.getAbsolutePath())).toString(); ArrayList<File> filesInProgress = new ArrayList(); addFolder(root,filesInProgress); _files = new String[filesInProgress.size()]; _sizes = new long[filesInProgress.size()]; for (int i = 0; i < _files.length; ++i) { File f = filesInProgress.get(i); _files[i] = f.getAbsolutePath(); _sizes[i] = f.length(); } } public int size() { return _files.length; } public String getFileName(int i) { return _files[i]; } // Sync this directory with H2O. Record all files that appear to be visible // to the entire cloud, and give their Keys. List also all files which appear // on this H2O instance but are not consistent around the cluster, and Keys // which match the directory name but are not on disk. public Key syncDirectory(ArrayList<String> files, ArrayList<String> keys, ArrayList<String> fails, ArrayList<String> dels) { Futures fs = new Futures(); Key k = null; // Find all Keys which match ... for( int i = 0; i < _files.length; ++i ) { boolean failed = false; for (int j = 0; j < H2O.CLOUD.size(); ++j) { if (_ok[i][j] == 0) { failed = true; fails.add("missing file " + _files[i] + " at node " + H2O.CLOUD._memary[j]); } } if(!failed){ File f = new File(_files[i]); k = PersistNFS.decodeFile(f); if( files != null ) files.add(_files[i]); if( keys != null ) keys .add(k.toString()); if(DKV.get(k) != null)dels.add(k.toString()); new Frame(k).delete_and_lock(null); NFSFileVec nfs = DKV.get(NFSFileVec.make(f, fs)).get(); Frame fr = new Frame(k,new String[] { "0" }, new Vec[] { nfs }); fr.update(null); fr.unlock(null); } } fs.blockForPending(); return k; } }