/* * MultiFileOutputStream.java * * Copyright (c) 2011, The University of Sheffield. * * Valentin Tablan, 26 Apr 2011 * * $Id$ */ package gate.mimir.util; import java.io.BufferedOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.Collections; import java.util.LinkedList; import java.util.List; import java.util.zip.CRC32; /** * An output stream that splits its data into a set of files inside a given * directory. When each file reaches a size limit, it is closed and a new file * is opened to write data to. */ public class MultiFileOutputStream extends OutputStream { /** * The directory where all the output files are written to. */ private File outputDirectory; /** * The prefix used for all output files. */ private String filePrefix; /** * The suffix used for all output files. */ private String fileSuffix; /** * The maximum size permitted for an output file. */ private long maximumFileSize; /** * The actual output stream for the current file. */ protected OutputStream currentOutputStream; /** * The file currently being written to. */ protected File currentOutputFile; /** * The number of bytes written to the current output stream so far. */ protected long currentBytes; /** * The current sequence number. */ protected int currentSeqNumber = -1; protected boolean closed = false; /** * The list of output files that were created and closed so far. */ protected List<File> outputFiles; protected CRC32 crc; /** * Creates a new multi-file output stream. Each output file will be created * inside the supplied output directory, and will have a name comprising the * filePrefix value (if one was given), followed by a sequence number, * followed by the fileSuffinx value (if one was given). * @param outputDirectory the directory in which all output files are created. * The {@link File} value supplied must point to an existing directory. * @param filePrefix the prefix used for creating output file names. * @param fileSuffix the suffix used for creating output file names. * @param maximumFileSize the maximum file size (number of bytes) for the * output files. When the current output file reaches this size, it is closed * and a new output file is created. * @throws IOException */ public MultiFileOutputStream(File outputDirectory, String filePrefix, String fileSuffix, long maximumFileSize) throws IOException { if(outputDirectory != null) { if(outputDirectory.canWrite()) { if(outputDirectory.isDirectory()) { this.outputDirectory = outputDirectory; } else { throw new IllegalArgumentException("Provided output directory (" + outputDirectory.getAbsolutePath() + ") is not a directory!"); } } else { throw new IllegalArgumentException("Provided output directory (" + outputDirectory.getAbsolutePath() + ") does not exist (or is not writeable)!"); } } else { throw new IllegalArgumentException("Output directory cannot be null!"); } this.filePrefix = filePrefix; this.fileSuffix = fileSuffix; if(maximumFileSize > 0) { this.maximumFileSize = maximumFileSize; } else throw new IllegalArgumentException( "Maximum file size must be positive!"); outputFiles = new LinkedList<File>(); crc = new CRC32(); try { // write out the signature byte[] signature = "MMFA".getBytes("UTF-8"); write(signature, 0, signature.length); } catch(UnsupportedEncodingException e) { throw new RuntimeException("This JVM does not support UTF-8!"); } } /** * Closes the current output file and opens the next one. * @throws IOException */ protected void nextFile() throws IOException { if(currentOutputStream != null) { currentOutputStream.flush(); currentOutputStream.close(); outputFiles.add(currentOutputFile); } currentSeqNumber++; currentBytes = 0; StringBuilder fileName = new StringBuilder(); if(filePrefix != null) fileName.append(filePrefix); fileName.append(String.format("%04d", currentSeqNumber)); if(fileSuffix != null) fileName.append(fileSuffix); currentOutputFile = new File(outputDirectory, fileName.toString()); currentOutputStream = new BufferedOutputStream( new FileOutputStream(currentOutputFile)); } /** * Gets the current value for the maximum file size. * @return */ public long getMaximumFileSize() { return maximumFileSize; } /** * Gets the directory in which output files are created. * @return */ public File getOutputDirectory() { return outputDirectory; } public String getFilePrefix() { return filePrefix; } public String getFileSuffix() { return fileSuffix; } /** * Gets the output files that were created, written to, and closed so far. * The returned array is only guaranteed to include all output files if this * method is called after the stream was closed. * @return */ public File[] getOutputFiles() { return outputFiles.toArray(new File[outputFiles.size()]); } /** * Gets the CRC32 sum calculated for the bytes written so far. It probably * makes sense to only call this after closing the steam, so that the returned * value refer to the entire data stream. * @return */ public long getCRC() { return crc.getValue(); } @Override public void write(int b) throws IOException { if(closed) throw new IOException("Stream already closed!"); // this may be the first write if(currentOutputStream == null) nextFile(); //we may have reached the limit if(currentBytes >= maximumFileSize) nextFile(); currentOutputStream.write(b); crc.update(b); currentBytes += 1; } @Override public void write(byte[] b, int off, int len) throws IOException { if(closed) throw new IOException("Stream already closed!"); // this may be the first write if(currentOutputStream == null) nextFile(); if(currentBytes + len <= maximumFileSize) { currentOutputStream.write(b, off, len); crc.update(b, off, len); currentBytes += len; } else { // we cannot write all the bytes int newLen = (int)(maximumFileSize - currentBytes); currentOutputStream.write(b, off, newLen); crc.update(b, off, newLen); currentBytes += newLen; nextFile(); // recursive all, in case len is really large and maximumFileSize is // really small write(b, off + newLen, len - newLen); } } @Override public void flush() throws IOException { if(closed) throw new IOException("Stream already closed!"); if(currentOutputStream != null) currentOutputStream.flush(); } @Override public void close() throws IOException { if(closed) return; if(currentOutputStream != null){ currentOutputStream.close(); outputFiles.add(currentOutputFile); } currentOutputStream = null; currentOutputFile = null; closed = true; } }