/* * Copyright 2015-2016 OpenCB * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.opencb.opencga.catalog.utils; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.commons.datastore.core.QueryResult; import org.opencb.opencga.catalog.db.api.FileDBAdaptor; import org.opencb.opencga.catalog.exceptions.CatalogException; import org.opencb.opencga.catalog.io.CatalogIOManager; import org.opencb.opencga.catalog.managers.CatalogFileUtils; import org.opencb.opencga.catalog.managers.CatalogManager; import org.opencb.opencga.catalog.managers.FileManager; import org.opencb.opencga.catalog.models.File; import org.opencb.opencga.catalog.models.Study; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.net.URI; import java.nio.file.Paths; import java.util.*; import java.util.function.Predicate; import java.util.stream.Stream; /** * @author Jacobo Coll <jacobo167@gmail.com> */ public class FileScanner { private static Logger logger = LoggerFactory.getLogger(FileScanner.class); protected final CatalogManager catalogManager; private CatalogFileUtils catalogFileUtils; public enum FileScannerPolicy { DELETE, //Delete file and file entry. Then create a new one REPLACE, //Delete the file, but not the file entry. Updates the file information. // DO_ERROR, // RENAME, } public FileScanner(CatalogManager catalogManager) { this.catalogManager = catalogManager; catalogFileUtils = new CatalogFileUtils(catalogManager); } /** * Check tracking from all files from a study. * * Set file status {@link File.FileStatus#MISSING} if the file (fileUri) is unreachable * Set file status to {@link File.FileStatus#READY} if was {@link File.FileStatus#MISSING} and file (fileUri) is reachable * * @param study The study to check * @param sessionId User sessionId * @param calculateChecksum Calculate checksum for "found files" * @throws CatalogException if a Catalog error occurs * @return found and lost files */ public List<File> checkStudyFiles(Study study, boolean calculateChecksum, String sessionId) throws CatalogException { Query query = new Query(); query.put(FileDBAdaptor.QueryParams.STATUS_NAME.key(), Arrays.asList( File.FileStatus.READY, File.FileStatus.MISSING, File.FileStatus.TRASHED)); QueryResult<File> files = catalogManager.getAllFiles(study.getId(), query, new QueryOptions(), sessionId); List<File> modifiedFiles = new LinkedList<>(); for (File file : files.getResult()) { File checkedFile = catalogFileUtils.checkFile(file, calculateChecksum, sessionId); if (checkedFile != file) { modifiedFiles.add(checkedFile); } } return modifiedFiles; } /** * Scan the study folder, add all untracked files and check tracking. * * @param study Study to resync * @param calculateChecksum Calculates checksum of all the files in the directory to scan * @param sessionId User sessionId * @return New, lost and found files * @throws CatalogException if a Catalog error occurs * @throws IOException if an I/O error occurs */ public List<File> reSync(Study study, boolean calculateChecksum, String sessionId) throws CatalogException, IOException { long studyId = study.getId(); // File root = catalogManager.getAllFiles(studyId, new QueryOptions("path", ""), sessionId).first(); Query query = new Query(); query.put(FileDBAdaptor.QueryParams.URI.key(), "~.*"); //Where URI exists query.put(FileDBAdaptor.QueryParams.TYPE.key(), File.Type.DIRECTORY); List<File> files = catalogManager.searchFile(studyId, query, sessionId).getResult(); List<File> scan = new LinkedList<>(); for (File file : files) { scan.addAll(scan(file, catalogManager.getFileUri(file), FileScannerPolicy.REPLACE, calculateChecksum, false, sessionId)); } // TODO: Scan per file scan.addAll(checkStudyFiles(study, calculateChecksum, sessionId)); return scan; } /** * Return all untracked files in a study folder. * * @param study Study to scan * @param sessionId User sessionId * @return Untracked files * @throws CatalogException if a Catalog error occurs */ public Map<String, URI> untrackedFiles(Study study, String sessionId) throws CatalogException { long studyId = study.getId(); URI studyUri = catalogManager.getStudyUri(studyId); CatalogIOManager ioManager = catalogManager.getCatalogIOManagerFactory().get(studyUri); Map<String, URI> linkedFolders = new HashMap<>(); linkedFolders.put("", studyUri); Query query = new Query(FileDBAdaptor.QueryParams.URI.key(), "~.*"); //Where URI exists) QueryOptions queryOptions = new QueryOptions("include", "projects.studies.files.path,projects.studies.files.uri"); catalogManager.getAllFiles(studyId, query, queryOptions, sessionId).getResult() .forEach(f -> linkedFolders.put(f.getPath(), f.getUri())); Map<String, URI> untrackedFiles = new HashMap<>(); for (Map.Entry<String, URI> entry : linkedFolders.entrySet()) { if (!ioManager.exists(entry.getValue())) { untrackedFiles.put(entry.getKey(), entry.getValue()); continue; } Stream<URI> files = ioManager.listFilesStream(entry.getValue()); Iterator<URI> iterator = files.iterator(); while (iterator.hasNext()) { URI uri = iterator.next(); String filePath = entry.getKey() + entry.getValue().relativize(uri).toString(); QueryResult<File> searchFile = catalogManager.searchFile(studyId, new Query("path", filePath), new QueryOptions("include", "projects.studies.files.id"), sessionId); if (searchFile.getResult().isEmpty()) { untrackedFiles.put(filePath, uri); } /*else { iterator.remove(); //Remove the ones that have an entry in Catalog }*/ } } return untrackedFiles; } /** * Scans the files inside the specified URI and adds to the provided directory. * * @param directory Directory where add found files * @param directoryToScan Directory to scan * @param policy What to do when there is a file in the target path. See {@link FileScannerPolicy} * @param calculateChecksum Calculates checksum of all the files in the directory to scan * @param deleteSource After moving, deletes the source file. If false, force copy. * @param sessionId User sessionId * @return found and new files. * @throws IOException if an I/O error occurs * @throws CatalogException if a Catalog error occurs */ public List<File> scan(File directory, URI directoryToScan, FileScannerPolicy policy, boolean calculateChecksum, boolean deleteSource, String sessionId) throws IOException, CatalogException { return scan(directory, directoryToScan, policy, calculateChecksum, deleteSource, uri -> true, -1, sessionId); } /** * Scans the files inside the specified URI and adds to the provided directory. * * @param directory Directory where add found files * @param directoryToScan Directory to scan * @param policy What to do when there is a file in the target path. See {@link FileScannerPolicy} * @param calculateChecksum Calculates checksum of all the files in the directory to scan * @param deleteSource After moving, deletes the source file. If false, force copy. * @param filter File filter. Excludes the file when this predicate returns false. * @param jobId If any, the job that has generated this files * @param sessionId User sessionId * @return found and new files. * @throws IOException if an I/O error occurs * @throws CatalogException if a Catalog error occurs */ public List<File> scan(File directory, URI directoryToScan, FileScannerPolicy policy, boolean calculateChecksum, boolean deleteSource, Predicate<URI> filter, long jobId, String sessionId) throws IOException, CatalogException { if (filter == null) { filter = uri -> true; } if (directoryToScan == null) { directoryToScan = catalogManager.getFileUri(directory); } if (!directoryToScan.getPath().endsWith("/")) { directoryToScan = URI.create(directoryToScan.toString() + "/"); } if (!directory.getType().equals(File.Type.DIRECTORY)) { throw new CatalogException("Expected folder where place the found files."); } long studyId = catalogManager.getStudyIdByFileId(directory.getId()); long createFilesTime = 0, uploadFilesTime = 0, metadataReadTime = 0; Stream<URI> uris = catalogManager.getCatalogIOManagerFactory().get(directoryToScan).listFilesStream(directoryToScan); List<File> files = new LinkedList<>(); FileMetadataReader fileMetadataReader = FileMetadataReader.get(catalogManager); Iterator<URI> iterator = uris.iterator(); while (iterator.hasNext()) { long fileScanStart = System.currentTimeMillis(); URI uri = iterator.next(); if (!filter.test(uri)) { continue; } URI generatedFile = directoryToScan.relativize(uri); String filePath = URI.create(directory.getPath()).resolve(generatedFile).toString(); // String filePath = Paths.get(directory.getPath(), generatedFile.toString()).toString(); if (generatedFile.getPath().endsWith("/") && !filePath.endsWith("/")) { filePath += "/"; } Query query = new Query(FileDBAdaptor.QueryParams.PATH.key(), filePath); QueryResult<File> searchFile = catalogManager.searchFile(studyId, query, sessionId); File file = null; boolean returnFile = false; if (searchFile.getNumResults() != 0) { File existingFile = searchFile.first(); logger.info("File already existing in target \"" + filePath + "\". FileScannerPolicy = " + policy); switch (policy) { case DELETE: logger.info("Deleting file { id:" + existingFile.getId() + ", path:\"" + existingFile.getPath() + "\" }"); // Delete completely the file/folder ! catalogManager.getFileManager().delete(Long.toString(existingFile.getId()), null, new QueryOptions(FileManager.SKIP_TRASH, true), sessionId); break; case REPLACE: file = existingFile; break; // case RENAME: // throw new UnsupportedOperationException("Unimplemented policy 'rename'"); // case DO_ERROR: // throw new UnsupportedOperationException("Unimplemented policy 'error'"); default: throw new UnsupportedOperationException("Unimplemented policy '" + policy + "'"); } } long createFileTime = 0, uploadFileTime = 0, metadataFileTime = 0; if (file == null) { long start, end; if (uri.getPath().endsWith("/")) { file = catalogManager.getFileManager().createFolder(Long.toString(studyId), Paths.get(filePath).toString(), null, true, null, QueryOptions.empty(), sessionId).first(); } else { start = System.currentTimeMillis(); File.Format format = FormatDetector.detect(uri); File.Bioformat bioformat = BioformatDetector.detect(uri); file = catalogManager.createFile(studyId, format, bioformat, filePath, "", true, jobId, sessionId).first(); end = System.currentTimeMillis(); createFileTime = end - start; createFilesTime += createFileTime; /** Moves the file to the read output **/ start = System.currentTimeMillis(); catalogFileUtils.upload(uri, file, null, sessionId, false, false, deleteSource, calculateChecksum); end = System.currentTimeMillis(); uploadFileTime = end - start; uploadFilesTime += uploadFileTime; returnFile = true; //Return file because is new } logger.debug("Created new file entry for " + uri + " { id:" + file.getId() + ", path:\"" + file.getPath() + "\" } "); } else { if (file.getType() == File.Type.FILE) { if (file.getStatus().getName().equals(File.FileStatus.MISSING)) { logger.info("File { id:" + file.getId() + ", path:\"" + file.getPath() + "\" } recover tracking from file " + uri); logger.debug("Set status to " + File.FileStatus.READY); returnFile = true; //Return file because was missing } long start = System.currentTimeMillis(); catalogFileUtils.upload(uri, file, null, sessionId, true, true, deleteSource, calculateChecksum); long end = System.currentTimeMillis(); uploadFilesTime += end - start; } } try { long start = System.currentTimeMillis(); fileMetadataReader.setMetadataInformation(file, null, null, sessionId, false); long end = System.currentTimeMillis(); metadataFileTime = end - start; metadataReadTime += metadataFileTime; } catch (Exception e) { logger.error("Unable to read metadata information from file " + "{ id:" + file.getId() + ", name: \"" + file.getName() + "\" }", e); } if (returnFile) { //Return only new and found files. files.add(catalogManager.getFile(file.getId(), sessionId).first()); } logger.info("Added file {}", filePath); logger.debug("{}s (create {}s, upload {}s, metadata {}s)", (System.currentTimeMillis() - fileScanStart) / 1000.0, createFileTime / 1000.0, uploadFileTime / 1000.0, metadataFileTime / 1000.0); } logger.debug("Create catalog file entries: " + createFilesTime / 1000.0 + "s"); logger.debug("Upload files: " + uploadFilesTime / 1000.0 + "s"); logger.debug("Read metadata information: " + metadataReadTime / 1000.0 + "s"); return files; } }