package com.constellio.model.services.contents; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.commons.io.FileUtils; import org.apache.commons.io.filefilter.TrueFileFilter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.constellio.data.io.services.facades.IOServices; import com.constellio.data.utils.BigFileEntry; import com.constellio.data.utils.BigFileIterator; import com.constellio.data.utils.Factory; import com.constellio.data.utils.PropertyFileUtils; import com.constellio.data.utils.TimeProvider; import com.constellio.model.services.factories.ModelLayerFactory; import com.constellio.model.services.migrations.ConstellioEIMConfigs; public class ContentManagerImportThreadServices { private static final int DEFAULT_BATCH_SIZE = 1000; private static final int THREADS = 3; private static final Logger LOGGER = LoggerFactory.getLogger(ContentManagerImportThreadServices.class); private static final String READ_FILE_INPUTSTREAM = "ContentManagerImportThreadServices-ReadFileInputStream"; private static final String BIGFILE_EXTRACT_TEMP_FOLDER = "ContentManagerImportThreadServices-BigFileExtractTempFolder"; private ModelLayerFactory modelLayerFactory; private ContentManager contentManager; private File contentImportFolder; private File toImportFolder; private File errorsEmptyFolder; private File errorsUnparsableFolder; private File indexProperties; private File filesExceedingParsingSizeLimit; private File tempFolder; private IOServices ioServices; private int batchSize; private boolean deleteUnusedContentEnabled; public ContentManagerImportThreadServices(ModelLayerFactory modelLayerFactory) { this(modelLayerFactory, DEFAULT_BATCH_SIZE); } public ContentManagerImportThreadServices(ModelLayerFactory modelLayerFactory, int batchSize) { this.batchSize = batchSize; this.modelLayerFactory = modelLayerFactory; this.ioServices = modelLayerFactory.getIOServicesFactory().newIOServices(); this.contentManager = modelLayerFactory.getContentManager(); this.contentImportFolder = modelLayerFactory.getConfiguration().getContentImportThreadFolder(); this.deleteUnusedContentEnabled = modelLayerFactory.getConfiguration().isDeleteUnusedContentEnabled(); this.tempFolder = new File(contentImportFolder, "temp"); this.toImportFolder = new File(contentImportFolder, "toImport"); this.errorsEmptyFolder = new File(contentImportFolder, "errors-empty"); this.errorsUnparsableFolder = new File(contentImportFolder, "errors-unparsable"); this.indexProperties = new File(contentImportFolder, "filename-sha1-index.properties"); this.filesExceedingParsingSizeLimit = new File(contentImportFolder, "files-exceeding-parsing-size-limit.txt"); } public void importFiles() { createFolders(); if (deleteUnusedContentEnabled) { LOGGER.warn("Content import thread requires that configuration 'content.delete.unused.enabled' is set to false"); } else { List<File> files = getFilesReadyToImport(); if (!files.isEmpty()) { importFiles(files); ioServices.deleteEmptyDirectoriesExceptThisOneIn(toImportFolder); ioServices.deleteEmptyDirectoriesExceptThisOneIn(errorsEmptyFolder); ioServices.deleteEmptyDirectoriesExceptThisOneIn(errorsUnparsableFolder); ioServices.deleteEmptyDirectoriesExceptThisOneIn(tempFolder); } } } private void importFiles(List<File> files) { LOGGER.info("importing files " + files + ""); BulkUploader uploader = new BulkUploader(modelLayerFactory, THREADS); uploader.setHandleDeletionOfUnreferencedHashes(false); List<File> extractedBigFileFolders = new ArrayList<>(); Set<String> emptyFileKeys = new HashSet<>(); for (File file : files) { if (file.getName().endsWith(".bigf")) { File bigFileTempFolder = new File(tempFolder, toKey(file)); extractedBigFileFolders.add(bigFileTempFolder); } else { String key = toKey(file); if (file.length() > 0) { uploader.uploadAsync(key, ioServices.newInputStreamFactory(file, READ_FILE_INPUTSTREAM), key); } else { emptyFileKeys.add(key); File dest = new File(errorsEmptyFolder, key.replace("/", File.separator)); dest.getParentFile().mkdirs(); try { ioServices.moveFile(file, dest); } catch (Exception e) { LOGGER.warn("Failed to move empty file, deleting it...", e); FileUtils.deleteQuietly(file); } } } } for (File extractedBigFileFolder : extractedBigFileFolders) { for (File file : allFilesRecursivelyIn(extractedBigFileFolder)) { String key = toBigFileKey(extractedBigFileFolder, file); if (file.length() > 0) { uploader.uploadAsync(key, ioServices.newInputStreamFactory(file, READ_FILE_INPUTSTREAM), key); } else { emptyFileKeys.add(key); File dest = new File(errorsEmptyFolder, key.replace("/", File.separator)); dest.getParentFile().mkdirs(); try { ioServices.moveFile(file, dest); } catch (Exception e) { LOGGER.warn("Failed to move empty file, deleting it...", e); FileUtils.deleteQuietly(file); } } } } uploader.close(); Map<String, ContentVersionDataSummary> newEntriesInIndex = new HashMap<>(); try { for (File extractedBigFileFolder : extractedBigFileFolders) { for (File file : allFilesRecursivelyIn(extractedBigFileFolder)) { String key = toBigFileKey(extractedBigFileFolder, file); ContentVersionDataSummary dataSummary = uploader.get(key); newEntriesInIndex.put(key, dataSummary); if (contentManager.getParsedContent(dataSummary.getHash()).getParsedContent().isEmpty()) { if (fileNotExceedingParsingLimit(file)) { ioServices.moveFile(file, new File(errorsUnparsableFolder, key.replace("/", File.separator))); } else { try { ioServices.appendFileContent(filesExceedingParsingSizeLimit, key + "\n"); } catch (IOException e) { throw new RuntimeException(e); } ioServices.deleteQuietly(file); } } else { ioServices.deleteQuietly(file); } //uploader.uploadAsync(toKey(file), ioServices.newInputStreamFactory(file, READ_FILE_INPUTSTREAM)); } } for (File file : files) { if (!file.getName().endsWith(".bigf")) { String key = toKey(file); if (!emptyFileKeys.contains(key)) { ContentVersionDataSummary dataSummary = uploader.get(key); newEntriesInIndex.put(key, dataSummary); if (contentManager.getParsedContent(dataSummary.getHash()).getParsedContent().isEmpty()) { if (fileNotExceedingParsingLimit(file)) { ioServices.moveFile(file, new File(errorsUnparsableFolder, key.replace("/", File.separator))); } else { try { ioServices.appendFileContent(filesExceedingParsingSizeLimit, key + "\n"); } catch (IOException e) { throw new RuntimeException(e); } ioServices.deleteQuietly(file); } } else { ioServices.deleteQuietly(file); } } } else { ioServices.deleteQuietly(file); } } } finally { writeNewEntriesInIndex(newEntriesInIndex); } } private boolean fileNotExceedingParsingLimit(File file) { long limit = (int) modelLayerFactory.getSystemConfigurationsManager() .getValue(ConstellioEIMConfigs.CONTENT_MAX_LENGTH_FOR_PARSING_IN_MEGAOCTETS) * 1024 * 1024; return file.length() <= limit; } private void writeNewEntriesInIndex(Map<String, ContentVersionDataSummary> newEntriesInIndex) { Map<String, String> map = indexProperties.exists() ? new HashMap<>(PropertyFileUtils.loadKeyValues(indexProperties)) : new HashMap<String, String>(); for (Map.Entry<String, ContentVersionDataSummary> entry : newEntriesInIndex.entrySet()) { map.put(entry.getKey(), toStringValue(entry.getValue())); } PropertyFileUtils.writeMap(indexProperties, map); } private String toStringValue(ContentVersionDataSummary value) { return value.getHash() + ":" + value.getLength() + ":" + value.getMimetype(); } private String toBigFileKey(File extractedBigFileFolder, File file) { String bigFile = extractedBigFileFolder.getAbsolutePath() .replace(tempFolder.getAbsolutePath() + File.separator, ""); String entryPath = file.getAbsolutePath().replace(extractedBigFileFolder.getAbsolutePath() + File.separator, ""); return (bigFile + "/" + entryPath).replace("/", File.separator); } private String toKey(File file) { return file.getAbsolutePath().replace(toImportFolder.getAbsolutePath() + File.separator, "") .replace(File.separator, "/"); } private int extractBigFile(File bigFile) { File bigFileTempFolder = new File(tempFolder, toKey(bigFile)); bigFileTempFolder.mkdirs(); int entriesCount = 0; InputStream inputStream = ioServices.newBufferedFileInputStreamWithoutExpectableFileNotFoundException( bigFile, READ_FILE_INPUTSTREAM); try { BigFileIterator bigFileIterator = new BigFileIterator(inputStream); while (bigFileIterator.hasNext()) { BigFileEntry entry = bigFileIterator.next(); entriesCount++; File destFileForCopy = new File(bigFileTempFolder, entry.getFileName().replace("/", File.separator)); try { FileUtils.forceMkdir(destFileForCopy.getParentFile()); ioServices.replaceFileContent(destFileForCopy, entry.getBytes()); } catch (IOException e) { throw new RuntimeException(e); } } } finally { ioServices.closeQuietly(inputStream); } return entriesCount; } private void uploadBigFile(File file, BulkUploader uploader) { } private List<File> getFilesReadyToImport() { List<File> filesReadyToImport = new ArrayList<>(); int currentBatchSize = 0; for (File fileToImport : allFilesRecursivelyIn(toImportFolder)) { if (currentBatchSize < batchSize && TimeProvider.getLocalDateTime().minusSeconds(10).toDate().getTime() >= fileToImport.lastModified()) { if (fileToImport.getName().endsWith(".bigf")) { Integer size = extractBigFile(fileToImport); filesReadyToImport.add(fileToImport); currentBatchSize += size; } else { filesReadyToImport.add(fileToImport); currentBatchSize++; } } } if (filesReadyToImport.size() > batchSize) { filesReadyToImport = filesReadyToImport.subList(0, batchSize); } return filesReadyToImport; } private List<File> allFilesRecursivelyIn(File folder) { List<File> files = new ArrayList<>(FileUtils.listFiles(folder, TrueFileFilter.INSTANCE, TrueFileFilter.INSTANCE)); Collections.sort(files, new Comparator<File>() { @Override public int compare(File o1, File o2) { return o1.getAbsolutePath().compareTo(o2.getAbsolutePath()); } }); return files; } private void createFolders() { tempFolder.mkdirs(); toImportFolder.mkdirs(); errorsEmptyFolder.mkdirs(); errorsUnparsableFolder.mkdirs(); try { FileUtils.touch(filesExceedingParsingSizeLimit); } catch (IOException e) { throw new RuntimeException(e); } } public Map<String, Factory<ContentVersionDataSummary>> readFileNameSHA1Index() { return readFileNameSHA1Index(indexProperties); } public Map<String, Factory<ContentVersionDataSummary>> readFileNameSHA1Index(File sha1Properties) { if (!sha1Properties.exists()) { return Collections.emptyMap(); } Map<String, Factory<ContentVersionDataSummary>> map = new HashMap<>(); for (Map.Entry<String, String> entry : PropertyFileUtils.loadKeyValues(sha1Properties).entrySet()) { final String value = entry.getValue(); map.put(entry.getKey(), new Factory<ContentVersionDataSummary>() { @Override public ContentVersionDataSummary get() { return toContentVersionDataSummary(value); } }); } return map; } public static Map<String, Factory<ContentVersionDataSummary>> buildSHA1Map(File file) { Map<String, Factory<ContentVersionDataSummary>> map = new HashMap<>(); for (Map.Entry<String, String> entry : PropertyFileUtils.loadKeyValues(file).entrySet()) { final String value = entry.getValue(); map.put(entry.getKey(), new Factory<ContentVersionDataSummary>() { @Override public ContentVersionDataSummary get() { return toContentVersionDataSummary(value); } }); } return map; } private static ContentVersionDataSummary toContentVersionDataSummary(String value) { String[] parts = value.split(":"); String mimetype = "null".equals(parts[2]) ? null : parts[2]; return new ContentVersionDataSummary(parts[0], mimetype, Integer.valueOf(parts[1])); } }