/** * The contents of this file are subject to the license and copyright * detailed in the LICENSE and NOTICE files at the root of the source * tree and available online at * * http://www.dspace.org/license/ */ package org.dspace.storage.bitstore; import org.apache.log4j.Logger; import org.dspace.content.Bitstream; import org.dspace.core.Utils; import java.io.*; import java.security.DigestInputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.Map; /** * Native DSpace (or "Directory Scatter" if you prefer) asset store. * Implements a directory 'scatter' algorithm to avoid OS limits on * files per directory. * * @author Peter Breton, Robert Tansley, Richard Rodgers, Peter Dietz */ public class DSBitStoreService implements BitStoreService { /** log4j log */ private static Logger log = Logger.getLogger(DSBitStoreService.class); // These settings control the way an identifier is hashed into // directory and file names // // With digitsPerLevel 2 and directoryLevels 3, an identifier // like 12345678901234567890 turns into the relative name // /12/34/56/12345678901234567890. // // You should not change these settings if you have data in the // asset store, as the BitstreamStorageManager will be unable // to find your existing data. private static final int digitsPerLevel = 2; private static final int directoryLevels = 3; // Checksum algorithm private static final String CSA = "MD5"; /** the asset directory */ private File baseDir; public DSBitStoreService() { } /** * Initialize the asset store * */ public void init() { // the config string contains just the asset store directory path //set baseDir? } /** * Return an identifier unique to this asset store instance * * @return a unique ID */ public String generateId() { return Utils.generateKey(); } /** * Retrieve the bits for the asset with ID. If the asset does not * exist, returns null. * * @param bitstream * The ID of the asset to retrieve * @throws java.io.IOException * If a problem occurs while retrieving the bits * * @return The stream of bits, or null */ public InputStream get(Bitstream bitstream) throws IOException { try { return new FileInputStream(getFile(bitstream)); } catch (Exception e) { log.error("get(" + bitstream.getInternalId() + ")", e); throw new IOException(e); } } /** * Store a stream of bits. * * <p> * If this method returns successfully, the bits have been stored. * If an exception is thrown, the bits have not been stored. * </p> * * @param in * The stream of bits to store * @throws java.io.IOException * If a problem occurs while storing the bits */ public void put(Bitstream bitstream, InputStream in) throws IOException { try { File file = getFile(bitstream); // Make the parent dirs if necessary File parent = file.getParentFile(); if (!parent.exists()) { parent.mkdirs(); } //Create the corresponding file and open it file.createNewFile(); FileOutputStream fos = new FileOutputStream(file); // Read through a digest input stream that will work out the MD5 DigestInputStream dis = null; try { dis = new DigestInputStream(in, MessageDigest.getInstance(CSA)); } // Should never happen catch (NoSuchAlgorithmException nsae) { log.warn("Caught NoSuchAlgorithmException", nsae); } Utils.bufferedCopy(dis, fos); fos.close(); in.close(); bitstream.setSizeBytes(file.length()); bitstream.setChecksum(Utils.toHex(dis.getMessageDigest().digest())); bitstream.setChecksumAlgorithm(CSA); } catch (Exception e) { log.error("put(" + bitstream.getInternalId() + ", inputstream)", e); throw new IOException(e); } } /** * Obtain technical metadata about an asset in the asset store. * * @param bitstream * The asset to describe * @param attrs * A Map whose keys consist of desired metadata fields * * @throws java.io.IOException * If a problem occurs while obtaining metadata * @return attrs * A Map with key/value pairs of desired metadata */ public Map about(Bitstream bitstream, Map attrs) throws IOException { try { // potentially expensive, since it may calculate the checksum File file = getFile(bitstream); if (file != null && file.exists()) { if (attrs.containsKey("size_bytes")) { attrs.put("size_bytes", file.length()); } if (attrs.containsKey("checksum")) { // generate checksum by reading the bytes DigestInputStream dis = null; try { FileInputStream fis = new FileInputStream(file); dis = new DigestInputStream(fis, MessageDigest.getInstance(CSA)); } catch (NoSuchAlgorithmException e) { log.warn("Caught NoSuchAlgorithmException", e); throw new IOException("Invalid checksum algorithm"); } final int BUFFER_SIZE = 1024 * 4; final byte[] buffer = new byte[BUFFER_SIZE]; while (true) { final int count = dis.read(buffer, 0, BUFFER_SIZE); if (count == -1) { break; } } attrs.put("checksum", Utils.toHex(dis.getMessageDigest().digest())); attrs.put("checksum_algorithm", CSA); dis.close(); } if (attrs.containsKey("modified")) { attrs.put("modified", String.valueOf(file.lastModified())); } return attrs; } return null; } catch (Exception e) { log.error("about(" + bitstream.getInternalId() + ")", e); throw new IOException(e); } } /** * Remove an asset from the asset store. An irreversible operation. * * @param bitstream * The asset to delete * @throws java.io.IOException * If a problem occurs while removing the asset */ public void remove(Bitstream bitstream) throws IOException { try { File file = getFile(bitstream); if (file != null) { if (file.delete()) { deleteParents(file); } } else { log.warn("Attempt to remove non-existent asset. ID: " + bitstream.getInternalId()); } } catch (Exception e) { log.error("remove(" + bitstream.getInternalId() + ")", e); throw new IOException(e); } } //////////////////////////////////////// // Internal methods //////////////////////////////////////// /** * Delete empty parent directories. * * @param file * The file with parent directories to delete */ private synchronized static void deleteParents(File file) { if (file == null) { return; } File tmp = file; for (int i = 0; i < directoryLevels; i++) { File directory = tmp.getParentFile(); File[] files = directory.listFiles(); // Only delete empty directories if (files.length != 0) { break; } directory.delete(); tmp = directory; } } /** * Return the file corresponding to a bitstream. It's safe to pass in * <code>null</code>. * * @param bitstream * the database table row for the bitstream. Can be * <code>null</code> * * @return The corresponding file in the file system, or <code>null</code> * * @throws IOException * If a problem occurs while determining the file */ protected File getFile(Bitstream bitstream) throws IOException { // Check that bitstream is not null if (bitstream == null) { return null; } // turn the internal_id into a file path relative to the assetstore // directory String sInternalId = bitstream.getInternalId(); // there are 4 cases: // -conventional bitstream, conventional storage // -conventional bitstream, srb storage // -registered bitstream, conventional storage // -registered bitstream, srb storage // conventional bitstream - dspace ingested, dspace random name/path // registered bitstream - registered to dspace, any name/path String sIntermediatePath = null; if (isRegisteredBitstream(sInternalId)) { sInternalId = sInternalId.substring(REGISTERED_FLAG.length()); sIntermediatePath = ""; } else { // Sanity Check: If the internal ID contains a // pathname separator, it's probably an attempt to // make a path traversal attack, so ignore the path // prefix. The internal-ID is supposed to be just a // filename, so this will not affect normal operation. if (sInternalId.contains(File.separator)) { sInternalId = sInternalId.substring(sInternalId.lastIndexOf(File.separator) + 1); } sIntermediatePath = getIntermediatePath(sInternalId); } StringBuilder bufFilename = new StringBuilder(); bufFilename.append(baseDir.getCanonicalFile()); bufFilename.append(File.separator); bufFilename.append(sIntermediatePath); bufFilename.append(sInternalId); if (log.isDebugEnabled()) { log.debug("Local filename for " + sInternalId + " is " + bufFilename.toString()); } return new File(bufFilename.toString()); } /** * Return the intermediate path derived from the internal_id. This method * splits the id into groups which become subdirectories. * * @param iInternalId * The internal_id * @return The path based on the id without leading or trailing separators */ protected String getIntermediatePath(String iInternalId) { StringBuilder buf = new StringBuilder(); for (int i = 0; i < directoryLevels; i++) { int digits = i * digitsPerLevel; if (i > 0) { buf.append(File.separator); } buf.append(iInternalId.substring(digits, digits + digitsPerLevel)); } buf.append(File.separator); return buf.toString(); } protected final String REGISTERED_FLAG = "-R"; public boolean isRegisteredBitstream(String internalId) { return internalId.startsWith(REGISTERED_FLAG); } public File getBaseDir() { return baseDir; } public void setBaseDir(File baseDir) { this.baseDir = baseDir; } }