package water.util; import java.io.File; import java.util.ArrayList; import water.*; import water.fvec.*; import water.persist.PersistNFS; public class FileIntegrityChecker extends MRTask<FileIntegrityChecker> { final String[] _files; // File names found locally final long [] _sizes; // File sizes found locally int[] _ok; // OUTPUT: files which are globally compatible @Override public void setupLocal() { _ok = new int[_files.length]; for( int i = 0; i < _files.length; ++i ) { File f = new File(_files[i]); if( f.exists() && (f.length()==_sizes[i]) ) _ok[i] = 1; } } @Override public void reduce( FileIntegrityChecker o ) { ArrayUtils.add(_ok,o._ok); } private void addFolder(File path, ArrayList<File> filesInProgress ) { if( !path.canRead() ) return; File[] files = path.listFiles(); if( files != null ) { //path is a dir, and these are the files for( File f : files ) { if( !f.canRead() ) continue; // Ignore unreadable files if( f.length() == 0 ) continue; // Ignore 0-byte files if( f.isHidden() && !path.isHidden() ) continue; // Do not dive into hidden dirs unless asked if (f.isDirectory()) addFolder(f,filesInProgress); else filesInProgress.add(f); } } else if (path.length() > 0) { //path is a non-zero byte file filesInProgress.add(path); } } public static FileIntegrityChecker check(File r) { return new FileIntegrityChecker(r).doAllNodes(); } public FileIntegrityChecker(File root) { super(H2O.GUI_PRIORITY); ArrayList<File> filesInProgress = new ArrayList<>(); addFolder(root,filesInProgress); _files = new String[filesInProgress.size()]; _sizes = new long[filesInProgress.size()]; for( int i = 0; i < _files.length; ++i ) { File f = filesInProgress.get(i); _files[i] = f.getAbsolutePath(); _sizes[i] = f.length(); } } public int size() { return _files.length; } // Sync this directory with H2O. Record all files that appear to be visible // to the entire cloud, and give their Keys. List also all files which appear // on this H2O instance but are not consistent around the cluster, and Keys // which match the directory name but are not on disk. public Key syncDirectory(ArrayList<String> files, ArrayList<String> keys, ArrayList<String> fails, ArrayList<String> dels) { Futures fs = new Futures(); Key k = null; // Find all Keys which match ... for( int i = 0; i < _files.length; ++i ) { if( _ok[i] < H2O.CLOUD.size() ) { if( fails != null ) fails.add(_files[i]); } else { File f = new File(_files[i]); // Do not call getCanonicalFile - which resolves symlinks - breaks test harness // try { f = f.getCanonicalFile(); _files[i] = f.getPath(); } // Attempt to canonicalize // catch( IOException ignore ) {} k = PersistNFS.decodeFile(f); if( files != null ) files.add(_files[i]); if( keys != null ) keys .add(k.toString()); if( DKV.get(k) != null ) dels.add(k.toString()); new Frame(k).delete_and_lock((Key)null); // Lock before making the NFS; avoids racing ImportFiles creating same Frame NFSFileVec nfs = NFSFileVec.make(f, fs); new Frame(k,new String[]{"C1"}, new Vec[]{nfs}).update((Key)null).unlock((Key)null); } } fs.blockForPending(); return k; } }