// $Id$ package org.yajul.io; import org.yajul.util.StringUtil; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; import java.io.OutputStream; import java.text.SimpleDateFormat; import java.util.Date; import java.util.Iterator; import java.util.List; import java.util.TimeZone; import java.util.logging.Level; import java.util.logging.Logger; import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; import static org.yajul.juli.LogHelper.unexpected; /** * Javabean that can store objects in the filesystem. The objects are stored in a hierarchy of directories * corresponding to the date value passed in. Callers can persist the file name passed back from storeObject() * so that the objects can easily be retrieved without knowing the date, etc. * When an object or document is stored with the same filename more than once, the archiver * will make a backup copy so the data is not lost. If this is not desired, set the overwrite property * to true. * <ul> * <li>Use storeObject() / retrieveObject() to store and retrieve serialized Java objects.</li> * <li>To store XML documents, XMLDocumentArchiver.</li> * <li>For more control over what is stored, use getSource() / getSink() which provides the generated file names * and input / output streams.</li> * </ul> * See the setter / getter method javadoc for a description of the properties and the default values. * <br> * An example of a Spring initializer is listed here: * <pre> * <bean id="documentArchiver" class="org.yajul.io.DocumentArchiver" * init-method="init"> * <property name="storeageDirectoryName"><value>/archive/documents</value></property> * <property name="retrieveDirectories"> * <list> * <value>/archive/old/documents</value> * </list> * </property> * <property name="extension"><value>.dat.gz</value></property> * </bean> * </pre> * User: jdavis<br> * Date: Mar 5, 2004<br> * Time: 6:07:28 PM<br> * @author josh May 6, 2004 11:41:01 PM */ public class DocumentArchiver { private static final Logger log = Logger.getLogger(DocumentArchiver.class.getName()); public static final String DEFAULT_EXTENSION = ".dat.gz"; private File storeageDirectory; private String extension = DEFAULT_EXTENSION; private List retrieveDirectories; private boolean gzip = true; private boolean buffered = true; private boolean overwrite = false; /** * Returns the storage directory. * @return the storage directory. */ public File getStoreageDirectory() { return storeageDirectory; } /** * String version of @see {@link #setStoreageDirectory(File)} */ public void setStoreageDirectoryStr(String storeageDirectory) { if (storeageDirectory == null) throw new IllegalArgumentException("Storeage directory cannot be null!"); setStoreageDirectory(new File(storeageDirectory)); } /** * Sets the directory where documents will be stored. This will also be the <i>first</i> directory where * documents are retrieved from. No default. * @param storeageDirectory The document storeage directory. */ public void setStoreageDirectory(File storeageDirectory) { if (storeageDirectory == null) throw new IllegalArgumentException("Storeage directory cannot be null!"); if (storeageDirectory.exists() && !storeageDirectory.isDirectory()) throw new IllegalArgumentException(storeageDirectory.toString() + " is not a directory!"); this.storeageDirectory = storeageDirectory; } /** * Returns the filename extension that will be used for the stored documents. * @return the filename extension that will be used for the stored documents. */ public String getExtension() { return extension; } /** * Sets the filename extension used for stored documents. * @param extension The filename extension, defaults to '.dat.gz'. */ public void setExtension(String extension) { if (extension == null) throw new IllegalArgumentException("Extension cannot be null!"); this.extension = extension; } /** * Returns the list of retrieval directories. Documents not found in the storeage directory will * be searched along this path of directories. * @return the list of retrieval directories */ public List getRetrieveDirectories() { return retrieveDirectories; } /** * Returns the number of retrieval directories. * @return the number of retrieval directories. */ public int getRetrieveDirectoryCount() { return (retrieveDirectories == null) ? 0 : retrieveDirectories.size(); } /** * Sets the retrieval directories, which will be scanned in order if a document is not found in the * storeage directory. * * @param retrieveDirectories A list of directory names. */ public void setRetrieveDirectories(List retrieveDirectories) { this.retrieveDirectories = retrieveDirectories; } /** * Returns true if documents are being GZIP compressed. * @return true if documents are being GZIP compressed. */ public boolean isGzip() { return gzip; } /** * Enbales/disables GZIP compression. * @param gzip True for gzip compression, false for uncompressed. */ public void setGzip(boolean gzip) { this.gzip = gzip; } /** * Returns true if the streams will be buffered. * @return true if the streams will be buffered. */ public boolean isBuffered() { return buffered; } /** * Enables/disables stream buffering, the default is 'true'. It is recommended that * this value be set to true (the default) as performance can degrade rapidly if * no buffering is used. * @param buffered If true, streams will be buffered. */ public void setBuffered(boolean buffered) { this.buffered = buffered; } /** * Initializes the bean. * @throws IOException if something goes wrong. */ public void init() throws IOException { log.info("init() : ENTER"); try { if (storeageDirectory == null) throw new IllegalStateException("Storeage directory has not been set!"); if (storeageDirectory.exists()) { if (!storeageDirectory.isDirectory()) throw new IOException("'" + storeageDirectory + "' is not a directory."); } else { storeageDirectory.mkdirs(); } } catch (IOException e) { unexpected(log, e); throw e; } finally { log.info("init() : LEAVE"); } } /** * Stores an object, given the id, date and sub-directory. * * @param subDirectory The sub-directory of the storeage directory where documents of this type are stored. * @param id The id object that will be used to generate the file name. * @param date The date, which will be used to generate the directory name. * @param object The object that will be stored. * @return The name of the file that was used to store the object. * @throws IOException if something goes wrong. */ public String storeObject(String subDirectory, Object id, Date date, Object object) throws IOException { if (log.isLoggable(Level.FINE)) log.log(Level.FINE,"storeObject() : ENTER"); try { Sink docOut = getSink(subDirectory, id, date); ObjectOutputStream oos = new ObjectOutputStream(docOut.getStream()); oos.writeObject(object); oos.flush(); oos.close(); if (log.isLoggable(Level.FINE)) log.log(Level.FINE,"storeObject() : Object sucessfully stored."); return docOut.getFilename(); // Return the relative file name. } catch (IOException e) { unexpected(log, e); throw e; } finally { if (log.isLoggable(Level.FINE)) log.log(Level.FINE,"storeObject() : LEAVE"); } } /** * Retrieves an object given the sub-directory, the id, and the date. * @param subDirectory The sub-directory. * @param id The object id. * @param date The date. * @return The object. * @throws IOException if something goes wrong. */ public Object retrieveObject(String subDirectory, Object id, Date date) throws IOException { return retrieveObject(subDirectory, generateFileName(id, date)); } /** * Retrieves an object given the sub-directory and the file name. * * @param subDirectory The sub-directory. * @param fileName The name of the file, as returned by the storeObject() method. * @return The object. * @throws IOException if something goes wrong. */ public Object retrieveObject(String subDirectory, String fileName) throws IOException { if (log.isLoggable(Level.FINE)) log.log(Level.FINE,"retrieveObject() : ENTER"); try { Source source = getSource(subDirectory, fileName); if (log.isLoggable(Level.FINE)) log.log(Level.FINE,"retrieveObject() : " + source.getFilename()); ObjectInputStream ois = new ObjectInputStream(source.getStream()); Object o = ois.readObject(); if (log.isLoggable(Level.FINE)) log.log(Level.FINE,"retrieveObject() : Object sucessfully retrieved."); return o; } // try catch (FileNotFoundException e) { throw e; } catch (IOException e) { unexpected(log, e); throw e; } catch (ClassNotFoundException e) { unexpected(log, e); throw new IOException("Class not found! " + e.getMessage()); } finally { if (log.isLoggable(Level.FINE)) log.log(Level.FINE,"retrieveObject() : LEAVE"); } } /** * Returns the Sink (information for writing a document) for a given id, date and sub-directory. * @param subDirectory The sub-directory of the storeage directory where documents of this type are stored. * @param id The id object that will be used to generate the file name. * @param date The date, which will be used to generate the diretory name. * @return the Sink (information for writing a document) for a given id, date and sub-directory. * @throws IOException if something goes wrong. */ public Sink getSink(String subDirectory, Object id, Date date) throws IOException { if (storeageDirectory == null) throw new IOException("Storeage directory cannot be null! (Did you forget to invoke setStoreageDirectory()?)"); String fileName = generateFileName(id, date); // If a storeage sub-directory was specified, use it. File dir = getSubDirectory(storeageDirectory, subDirectory); File path = new File(dir, fileName); String pathname = path.getAbsolutePath(); File f = new File(pathname); if (f.exists()) { if (!overwrite) { // This might need to be optimized a bit. We could enumerate the directory // to find a good filename for the backup. int i = 1; File backup = new File(pathname + "." + i); while (backup.exists()) { i++; backup = new File(pathname + "." + i); } log.info("getSink() : Renaming existing file to " + backup.getAbsolutePath()); f.renameTo(backup); f = new File(pathname); } else { log.info("getSink() : deleting " + f.getAbsolutePath()); f.delete(); f = new File(pathname); } } log.info("getSink() : " + f.getAbsolutePath()); OutputStream os = getOutputStream(f); Sink docOut = new Sink(fileName, os); return docOut; } /** * Generates a file name from an object id and a date and file extension. * @param id The object id, used to generate the unique file name. * @param date The date, used to create the directory path. * @return The filename. */ public String generateFileName(Object id, Date date) { // Generate the file name. String fileName = id.toString(); fileName = fileName.replace('/', '-'); fileName = fileName.replace(' ', '_'); fileName = fileName.replace('\t', '_'); fileName = fileName.replace('\r', '_'); fileName = fileName.replace('\n', '_'); fileName = fileName + this.extension; // Get the month and year as a string, with the file separator in the middle. SimpleDateFormat df = new SimpleDateFormat(File.separator + "yyyy" + File.separator + "yyyy_MM" + File.separator + "yyyy_MM_dd" + File.separator); df.setTimeZone(TimeZone.getTimeZone("GMT")); fileName = df.format(date) + fileName; return fileName; } /** * Returns a source (filename and input stream) given the sub-directory, the id, and the date. * @param subDirectory The sub-directory. * @param id The object id. * @param date The date. * @return The object. * @throws IOException if something goes wrong. */ public Source getSource(String subDirectory, Object id, Date date) throws IOException { return getSource(subDirectory, generateFileName(id, date)); } /** * Retrieves an source given the sub-directory and the file name. * @param subDirectory The sub-directory. * @param fileName The name of the file, as returned by the storeObject() method. * @return The source : an input stream and a file name. * @throws IOException if something goes wrong. */ public Source getSource(String subDirectory, String fileName) throws IOException { if (storeageDirectory == null) throw new IOException("Storeage directory cannot be null! (Did you forget to invoke setStoreageDirectory()?)"); // If a storeage sub-directory was specified, use it. File dir = getSubDirectory(storeageDirectory, subDirectory); File f = new File(dir, fileName); // If the file doesn't exist in the primary storeage directory, then // check the other directories. if (!f.exists()) { if (log.isLoggable(Level.FINE)) log.log(Level.FINE,"getSource() : " + f.getAbsolutePath() + " doesn't exist."); if (retrieveDirectories == null || getRetrieveDirectoryCount() == 0) throw new FileNotFoundException("Unable to find " + fileName + " in the storeage directory."); for (Iterator iterator = retrieveDirectories.iterator(); iterator.hasNext();) { String baseString = (String) iterator.next(); File base = new File(baseString); if (!base.exists()) { if (log.isLoggable(Level.FINE)) log.log(Level.FINE,"getSource() : directory " + base + " does not exist, skipping."); continue; } dir = getSubDirectory(base, subDirectory); f = new File(dir, fileName); if (f.exists()) return getSource(f); // Return the source. } // for throw new FileNotFoundException("Unable to find " + fileName + " in the any directories (" + (getRetrieveDirectoryCount() + 1) + " directories searched)."); } // if !f.exists() else return getSource(f); // Return the object. } /** * Returns a source for the given file. * @param f the file * @return a source for the file * @throws IOException if something goes wrong */ private Source getSource(File f) throws IOException { return new Source(f.getAbsolutePath(), getInputStream(f)); } /** * Returns a sub directory of the base directory, if a sub-directory * was specified. * @param subDirectory The sub-directory (optional. * @return a sub directory of the base directory, if a sub-directory * was specified. */ private File getSubDirectory(File base, String subDirectory) { return (StringUtil.isEmpty(subDirectory)) ? base : new File(base, subDirectory); } /** * Returns an output stream for the file. * @param f the file * @return an output stream for the file. * @throws IOException if something goes wrong */ private OutputStream getOutputStream(File f) throws IOException { f.getParentFile().mkdirs(); OutputStream os = new FileOutputStream(f); if (buffered) os = new BufferedOutputStream(os); if (gzip) os = new GZIPOutputStream(os); return os; } /** * Returns an input stream for the file. * @param f the file * @return an input stream for the file * @throws IOException if something goes wrong. */ private InputStream getInputStream(File f) throws IOException { InputStream is = new FileInputStream(f); if (buffered) is = new BufferedInputStream(is); if (gzip) is = new GZIPInputStream(is); return is; } /** * Provides the filename and output stream for a given document. */ public static class Sink { private String filename; private OutputStream stream; /** * Creates a sink. * @param filename The filename that the output stream is pointing to. * @param out The output stream. */ Sink(String filename, OutputStream out) { this.filename = filename; this.stream = out; } /** * Returns the name of the file that the output stream will write to. * @return the name of the file that the output stream will write to. */ public String getFilename() { return filename; } /** * Returns the output stream, which will write to a file. * @return the output stream, which will write to a file. */ public OutputStream getStream() { return stream; } } // class Sink /** * Provides the filename and input stream for a given document. */ public static class Source { private String filename; private InputStream stream; /** * Creates a source. * @param filename The filename that the input stream is pointing to. * @param in The input stream. */ Source(String filename, InputStream in) { this.filename = filename; this.stream = in; } /** * Returns the name of the file that the input stream will read from. * @return the name of the file that the input stream will read from. */ public String getFilename() { return filename; } /** * Returns the input stream, which will read from a file. * @return the input stream, which will read from a file. */ public InputStream getStream() { return stream; } } // class Source }