package de.is24.infrastructure.gridfs.http.gridfs; import com.mongodb.BasicDBObject; import com.mongodb.DBCollection; import com.mongodb.DBObject; import com.mongodb.gridfs.GridFS; import com.mongodb.gridfs.GridFSDBFile; import com.mongodb.gridfs.GridFSFile; import com.mongodb.gridfs.GridFSInputFile; import com.mongodb.gridfs.GridFSUtil; import de.is24.infrastructure.gridfs.http.exception.BadRangeRequestException; import de.is24.infrastructure.gridfs.http.exception.GridFSFileAlreadyExistsException; import de.is24.infrastructure.gridfs.http.exception.GridFSFileNotFoundException; import de.is24.infrastructure.gridfs.http.storage.FileDescriptor; import de.is24.infrastructure.gridfs.http.storage.FileStorageItem; import de.is24.infrastructure.gridfs.http.storage.FileStorageService; import de.is24.infrastructure.gridfs.http.storage.UploadResult; import de.is24.util.monitoring.spring.TimeMeasurement; import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream; import org.bson.types.ObjectId; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.data.mongodb.core.MongoTemplate; import org.springframework.data.mongodb.core.query.Criteria; import org.springframework.data.mongodb.core.query.Query; import org.springframework.data.mongodb.gridfs.GridFsOperations; import org.springframework.data.mongodb.tx.MongoTx; import org.springframework.jmx.export.annotation.ManagedOperation; import org.springframework.jmx.export.annotation.ManagedResource; import org.springframework.security.access.prepost.PreAuthorize; import org.springframework.stereotype.Service; import org.springframework.util.Assert; import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.security.DigestInputStream; import java.security.DigestOutputStream; import java.util.Date; import java.util.List; import static com.mongodb.gridfs.GridFSUtil.mergeMetaData; import static com.mongodb.gridfs.GridFSUtil.remove; import static de.is24.infrastructure.gridfs.http.mongo.DatabaseStructure.ARCH_KEY; import static de.is24.infrastructure.gridfs.http.mongo.DatabaseStructure.ARCH_KEY_REPO_DATA; import static de.is24.infrastructure.gridfs.http.mongo.DatabaseStructure.FILENAME_KEY; import static de.is24.infrastructure.gridfs.http.mongo.DatabaseStructure.GRIDFS_FILES_COLLECTION; import static de.is24.infrastructure.gridfs.http.mongo.DatabaseStructure.MARKED_AS_DELETED_KEY; import static de.is24.infrastructure.gridfs.http.mongo.DatabaseStructure.METADATA_ARCH_KEY; import static de.is24.infrastructure.gridfs.http.mongo.DatabaseStructure.METADATA_MARKED_AS_DELETED_KEY; import static de.is24.infrastructure.gridfs.http.mongo.DatabaseStructure.METADATA_REPO_KEY; import static de.is24.infrastructure.gridfs.http.mongo.DatabaseStructure.METADATA_UPLOAD_DATE_KEY; import static de.is24.infrastructure.gridfs.http.mongo.DatabaseStructure.REPO_KEY; import static de.is24.infrastructure.gridfs.http.mongo.DatabaseStructure.SHA256_KEY; import static de.is24.infrastructure.gridfs.http.security.Permission.HAS_DESCRIPTOR_READ_PERMISSION; import static de.is24.infrastructure.gridfs.http.web.MediaTypes.BZ2_CONTENT_TYPE; import static de.is24.infrastructure.gridfs.http.web.MediaTypes.CONTENT_TYPE_APPLICATION_X_GPG; import static de.is24.infrastructure.gridfs.http.web.MediaTypes.CONTENT_TYPE_APPLICATION_X_RPM; import static java.lang.String.format; import static java.util.regex.Pattern.quote; import static java.util.stream.Collectors.toList; import static org.apache.commons.codec.binary.Hex.encodeHexString; import static org.apache.commons.codec.digest.DigestUtils.getSha256Digest; import static org.apache.commons.io.IOUtils.closeQuietly; import static org.apache.commons.io.IOUtils.copy; import static org.springframework.data.mongodb.core.query.Query.query; import static org.springframework.data.mongodb.core.query.Update.update; import static org.springframework.data.mongodb.gridfs.GridFsCriteria.whereFilename; import static org.springframework.data.mongodb.gridfs.GridFsCriteria.whereMetaData; import static org.springframework.http.MediaType.APPLICATION_OCTET_STREAM_VALUE; import static org.springframework.http.MediaType.APPLICATION_XML_VALUE; @ManagedResource @Service public class GridFsFileStorageService implements FileStorageService { private static final Logger LOGGER = LoggerFactory.getLogger(GridFsFileStorageService.class); private static final String ENDS_WITH_RPM_REGEX = ".*\\.rpm$"; private static final int MB = 1024 * 1024; private static final int FIVE_MB = 5 * MB; private final GridFS gridFs; private final GridFsOperations gridFsTemplate; private final MongoTemplate mongoTemplate; @Autowired public GridFsFileStorageService(GridFS gridFs, GridFsOperations gridFsTemplate, MongoTemplate mongoTemplate) { this.gridFs = gridFs; this.gridFsTemplate = gridFsTemplate; this.mongoTemplate = mongoTemplate; setupIndices(); } @Override public FileStorageItem findById(Object id) { if (!(id instanceof ObjectId)) { throw new IllegalArgumentException("id must be of type ObjectId, but got: " + id.getClass()); } GridFSDBFile gridFSDBFile = gridFs.find((ObjectId) id); return gridFSDBFile != null ? new GridFsFileStorageItem(gridFSDBFile) : null; } @TimeMeasurement @PreAuthorize(HAS_DESCRIPTOR_READ_PERMISSION) @Override public FileStorageItem findBy(FileDescriptor descriptor) { return insecureFindBy(descriptor); } @TimeMeasurement private FileStorageItem insecureFindBy(FileDescriptor descriptor) { GridFSDBFile gridFSDBFile = gridFsTemplate.findOne(query(whereFilename().is(descriptor.getPath()))); return gridFSDBFile != null ? new GridFsFileStorageItem(gridFSDBFile) : null; } @TimeMeasurement @PreAuthorize(HAS_DESCRIPTOR_READ_PERMISSION) @Override public FileStorageItem getFileBy(FileDescriptor descriptor) { FileStorageItem storageItem = findBy(descriptor); if (storageItem == null) { throw new GridFSFileNotFoundException("Could not find file in gridfs.", descriptor.getPath()); } return storageItem; } @Override public void delete(FileStorageItem storageItem) { remove(((GridFsFileStorageItem) storageItem).getDbFile()); } @Override public void delete(FileDescriptor descriptor) { FileStorageItem storageItem = findBy(descriptor); if (storageItem != null) { delete(storageItem); } } @Override public void moveTo(FileStorageItem storageItem, String repo) { GridFSDBFile dbFile = ((GridFsFileStorageItem) storageItem).getDbFile(); FileDescriptor descriptor = new FileDescriptor(storageItem); descriptor.setRepo(repo); dbFile.put(FILENAME_KEY, descriptor.getPath()); dbFile.getMetaData().put(REPO_KEY, repo); dbFile.save(); } @Override public List<FileStorageItem> getAllRpms() { return convert(gridFs.find(query( whereFilename().regex(ENDS_WITH_RPM_REGEX) .and(METADATA_MARKED_AS_DELETED_KEY).is(null)).getQueryObject())); } @Override public List<FileStorageItem> getAllRpms(String repo) { return convert(gridFsTemplate.find(query( whereMetaData(REPO_KEY).is(repo) .and(METADATA_MARKED_AS_DELETED_KEY) .is(null) .andOperator(whereFilename().regex(ENDS_WITH_RPM_REGEX))))); } @Override @MongoTx public FileStorageItem storeFile(InputStream inputStream, FileDescriptor descriptor) { return storeFile(inputStream, descriptor, false); } @Override @MongoTx public FileStorageItem storeFile(InputStream inputStream, FileDescriptor descriptor, boolean allowOverride) { List<GridFSDBFile> existingDbFiles = findAllBy(descriptor); if (!existingDbFiles.isEmpty() && !allowOverride) { throw new GridFSFileAlreadyExistsException("Reupload of rpm is not possible.", descriptor.getPath()); } DigestInputStream digestInputStream = new DigestInputStream(inputStream, getSha256Digest()); GridFSFile inputFile = gridFsTemplate.store(digestInputStream, descriptor.getPath(), getContentType(descriptor.getPath())); closeQuietly(digestInputStream); String sha256Hash = encodeHexString(digestInputStream.getMessageDigest().digest()); DBObject metaData = createBasicMetaDataObject(descriptor, sha256Hash); mergeMetaData(inputFile, metaData); inputFile.save(); if (!existingDbFiles.isEmpty()) { existingDbFiles.forEach(e -> remove(e)); } return findById(inputFile.getId()); } private String getContentType(String path) { if (path.endsWith(".rpm")) { return CONTENT_TYPE_APPLICATION_X_RPM; } else if (path.endsWith(".xml")) { return APPLICATION_XML_VALUE; } else if (path.endsWith(".asc")) { return CONTENT_TYPE_APPLICATION_X_GPG; } return APPLICATION_OCTET_STREAM_VALUE; } @Override public UploadResult storeSqliteFileCompressedWithChecksumName(String reponame, File metadataFile, String name) throws IOException { FileDescriptor descriptor = new FileDescriptor(reponame, ARCH_KEY_REPO_DATA, name); GridFSInputFile inputFile = gridFs.createFile(); inputFile.setContentType(BZ2_CONTENT_TYPE); InputStream fileInputStream = new BufferedInputStream(new FileInputStream(metadataFile)); DigestInputStream uncompressedDigestInputStream = new DigestInputStream(fileInputStream, getSha256Digest()); DigestOutputStream compressedDigestOutputStream; try { OutputStream gridFsOutputStream = inputFile.getOutputStream(); compressedDigestOutputStream = new DigestOutputStream(gridFsOutputStream, getSha256Digest()); BZip2CompressorOutputStream bzip2OutputStream = new BZip2CompressorOutputStream(compressedDigestOutputStream); copy(uncompressedDigestInputStream, bzip2OutputStream); bzip2OutputStream.close(); } finally { uncompressedDigestInputStream.close(); } String compressedChecksum = encodeHexString(compressedDigestOutputStream.getMessageDigest().digest()); String uncompressedChecksum = encodeHexString(uncompressedDigestInputStream.getMessageDigest().digest()); String finalFilename = reponame + "/" + createRepoMdLocation(name, compressedChecksum); gridFs.remove(finalFilename); DBObject metaData = createBasicMetaDataObject(descriptor, compressedChecksum); inputFile.setMetaData(metaData); inputFile.put(FILENAME_KEY, finalFilename); inputFile.getOutputStream().close(); UploadResult uploadResult = new UploadResult(); uploadResult.setLocation(finalFilename); uploadResult.setUploadDate(inputFile.getUploadDate()); uploadResult.setCompressedSize(inputFile.getLength()); uploadResult.setCompressedChecksum(compressedChecksum); uploadResult.setUncompressedSize(metadataFile.length()); uploadResult.setUncompressedChecksum(uncompressedChecksum); return uploadResult; } @ManagedOperation public List<String> listFilesMarkedAsDeleted() { final List<GridFSDBFile> gridFSDBFiles = gridFsTemplate.find(query(whereMetaData(MARKED_AS_DELETED_KEY).ne(null))); return gridFSDBFiles.stream().map(file -> file.getFilename() + " " + file.getMetaData().get(MARKED_AS_DELETED_KEY)).collect(toList()); } @ManagedOperation @MongoTx(writeConcern = "FSYNCED") public void removeFilesMarkedAsDeletedBefore(final Date before) { LOGGER.info("removing files marked as deleted before {}", before); final List<GridFSDBFile> filesToDelete = gridFsTemplate.find(query( whereMetaData(MARKED_AS_DELETED_KEY).lt(before))); int counter = 0; for (GridFSDBFile file : filesToDelete) { final long lengthInBytes = file.getLength(); final String filename = file.getFilename(); LOGGER.info("removing file {}", filename); GridFSUtil.remove(file); //wait depending on the size/count of deleted file to let the mongo cluster do the sync without 'dieing' on io wait if (lengthInBytes > FIVE_MB) { waitAfterDeleteOfLargeFile(lengthInBytes, filename); } if (counter > 100) { LOGGER.info("waiting 2000 ms after removal of 100 files"); try { Thread.sleep(2000); } catch (InterruptedException e) { //should only happen on server shutdown } counter = 0; } else { counter++; } } LOGGER.info("finished removing files marked as deleted before {}", before); } public void markForDeletionByPath(final String path) { markForDeletion(whereFilename().is(path)); } public void markForDeletionByFilenameRegex(final String regex) { markForDeletion(whereFilename().regex(regex)); } @Override public void deleteRepo(String reponame) { markForDeletion(whereMetaData(REPO_KEY).is(reponame)); } @Override public List<FileStorageItem> findByPrefix(String prefix) { return convert(gridFsTemplate.find(query(whereFilename().regex("^" + quote(prefix))))); } @Override public void setUploadDate(FileStorageItem file, Date date) { Assert.notNull(file); Assert.isInstanceOf(GridFsFileStorageItem.class, file); GridFSDBFile dbFile = ((GridFsFileStorageItem) file).getDbFile(); dbFile.getMetaData().put("uploadDate", date); dbFile.save(); } @Override @PreAuthorize(HAS_DESCRIPTOR_READ_PERMISSION) public BoundedGridFsResource getResource(FileDescriptor descriptor) throws IOException { return getResource(descriptor, 0); } @Override @PreAuthorize(HAS_DESCRIPTOR_READ_PERMISSION) public BoundedGridFsResource getResource(FileDescriptor descriptor, long startPos, long size) throws IOException { return new BoundedGridFsResource(getFileStorageItemWithCheckedStartPos(descriptor, startPos), startPos, size); } @Override @MongoTx public List<FileStorageItem> getCorruptFiles() { return convert(gridFsTemplate.find(getCorruptFileQuery())); } @Override public void deleteCorruptFiles() { gridFsTemplate.delete(getCorruptFileQuery()); } @Override @PreAuthorize(HAS_DESCRIPTOR_READ_PERMISSION) public BoundedGridFsResource getResource(FileDescriptor descriptor, long startPos) throws IOException { return new BoundedGridFsResource(getFileStorageItemWithCheckedStartPos(descriptor, startPos), startPos); } private List<GridFSDBFile> findAllBy(FileDescriptor descriptor) { return gridFsTemplate.find(query(whereFilename().is(descriptor.getPath()))); } private List<FileStorageItem> convert(List<GridFSDBFile> rpms) { return rpms.stream().map(GridFsFileStorageItem::new).collect(toList()); } private FileStorageItem getFileStorageItemWithCheckedStartPos(FileDescriptor descriptor, long startPos) { FileStorageItem storageItem = getFileBy(descriptor); if (startPos >= storageItem.getSize()) { throw new BadRangeRequestException(format( "Range start is bigger than file size.\n" + "\tpath: %s\n" + "\tstartPos: %s\n" + "\tlength: %s\n", descriptor.getPath(), startPos, storageItem.getSize())); } return storageItem; } private Query getCorruptFileQuery() { return query(new Criteria().orOperator(whereFilename().is(null), whereMetaData().is(null))); } private void markForDeletion(final Criteria criteria) { mongoTemplate.updateMulti(query(criteria.and(METADATA_MARKED_AS_DELETED_KEY).is(null)), update(METADATA_MARKED_AS_DELETED_KEY, new Date()), GRIDFS_FILES_COLLECTION); } private String createRepoMdLocation(String name, String checksum) { return ARCH_KEY_REPO_DATA + "/" + name + "-" + checksum + ".sqlite.bz2"; } private DBObject createBasicMetaDataObject(FileDescriptor descriptor, String sha256Hash) { DBObject metaData = new BasicDBObject(); metaData.put(REPO_KEY, descriptor.getRepo()); metaData.put(ARCH_KEY, descriptor.getArch()); metaData.put(SHA256_KEY, sha256Hash); return metaData; } private void waitAfterDeleteOfLargeFile(long lengthInBytes, String filename) { //24MB 1600ms //600MB 40sec final long lengthInMb = lengthInBytes / MB; final long millisToWait = (long) ((lengthInMb / 60f) * 4000); LOGGER.info("waiting {}ms after remove of large file {}({}MB)", millisToWait, filename, lengthInMb); try { Thread.sleep(millisToWait); } catch (InterruptedException e) { //should only happen on server shutdown } } private void setupIndices() { createIndex(METADATA_REPO_KEY); createIndex(METADATA_ARCH_KEY); createIndex(METADATA_UPLOAD_DATE_KEY); createIndex(METADATA_MARKED_AS_DELETED_KEY); } private void createIndex(String key) { DBCollection filesCollection = mongoTemplate.getCollection(GRIDFS_FILES_COLLECTION); filesCollection.createIndex(new BasicDBObject(key, 1)); } }