package edu.stanford.nlp.io; import edu.stanford.nlp.util.*; import edu.stanford.nlp.util.logging.Redwood; import java.io.*; import java.lang.reflect.InvocationTargetException; import java.net.InetAddress; import java.net.SocketTimeoutException; import java.net.URL; import java.net.URLConnection; import java.nio.channels.FileChannel; import java.util.*; import java.util.function.Consumer; import java.util.regex.Pattern; import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; /** * Helper Class for various I/O related things. * * @author Kayur Patel * @author Teg Grenager * @author Christopher Manning */ public class IOUtils { private static final int SLURP_BUFFER_SIZE = 16384; public static final String eolChar = System.lineSeparator(); // todo: Inline public static final String defaultEncoding = "utf-8"; /** A logger for this class */ private static final Redwood.RedwoodChannels logger = Redwood.channels(IOUtils.class); // A class of static methods private IOUtils() { } /** * Write object to a file with the specified name. The file is silently gzipped if the filename ends with .gz. * * @param o Object to be written to file * @param filename Name of the temp file * @throws IOException If can't write file. * @return File containing the object */ public static File writeObjectToFile(Object o, String filename) throws IOException { return writeObjectToFile(o, new File(filename)); } /** * Write an object to a specified File. The file is silently gzipped if the filename ends with .gz. * * @param o Object to be written to file * @param file The temp File * @throws IOException If File cannot be written * @return File containing the object */ public static File writeObjectToFile(Object o, File file) throws IOException { return writeObjectToFile(o, file, false); } /** * Write an object to a specified File. The file is silently gzipped if the filename ends with .gz. * * @param o Object to be written to file * @param file The temp File * @param append If true, append to this file instead of overwriting it * @throws IOException If File cannot be written * @return File containing the object */ public static File writeObjectToFile(Object o, File file, boolean append) throws IOException { // file.createNewFile(); // cdm may 2005: does nothing needed OutputStream os = new FileOutputStream(file, append); if (file.getName().endsWith(".gz")) { os = new GZIPOutputStream(os); } os = new BufferedOutputStream(os); ObjectOutputStream oos = new ObjectOutputStream(os); oos.writeObject(o); oos.close(); return file; } /** * Write object to a file with the specified name. * * @param o Object to be written to file * @param filename Name of the temp file * @return File containing the object, or null if an exception was caught */ public static File writeObjectToFileNoExceptions(Object o, String filename) { File file = null; ObjectOutputStream oos = null; try { file = new File(filename); // file.createNewFile(); // cdm may 2005: does nothing needed oos = new ObjectOutputStream(new BufferedOutputStream( new GZIPOutputStream(new FileOutputStream(file)))); oos.writeObject(o); oos.close(); } catch (Exception e) { logger.err(throwableToStackTrace(e)); } finally { closeIgnoringExceptions(oos); } return file; } /** * Write object to temp file which is destroyed when the program exits. * * @param o Object to be written to file * @param filename Name of the temp file * @throws IOException If file cannot be written * @return File containing the object */ public static File writeObjectToTempFile(Object o, String filename) throws IOException { File file = File.createTempFile(filename, ".tmp"); file.deleteOnExit(); writeObjectToFile(o, file); return file; } /** * Write object to a temp file and ignore exceptions. * * @param o Object to be written to file * @param filename Name of the temp file * @return File containing the object */ public static File writeObjectToTempFileNoExceptions(Object o, String filename) { try { return writeObjectToTempFile(o, filename); } catch (Exception e) { logger.error("Error writing object to file " + filename); logger.err(throwableToStackTrace(e)); return null; } } private static OutputStream getBufferedOutputStream(String path) throws IOException { OutputStream os = new BufferedOutputStream(new FileOutputStream(path)); if (path.endsWith(".gz")) { os = new GZIPOutputStream(os); } return os; } //++ todo [cdm, Aug 2012]: Do we need the below methods? They're kind of weird in unnecessarily bypassing using a Writer. /** * Writes a string to a file. * * @param contents The string to write * @param path The file path * @param encoding The encoding to encode in * @throws IOException In case of failure */ public static void writeStringToFile(String contents, String path, String encoding) throws IOException { OutputStream writer = getBufferedOutputStream(path); writer.write(contents.getBytes(encoding)); writer.close(); } /** * Writes a string to a file, squashing exceptions * * @param contents The string to write * @param path The file path * @param encoding The encoding to encode in * */ public static void writeStringToFileNoExceptions(String contents, String path, String encoding) { OutputStream writer = null; try{ if (path.endsWith(".gz")) { writer = new GZIPOutputStream(new FileOutputStream(path)); } else { writer = new BufferedOutputStream(new FileOutputStream(path)); } writer.write(contents.getBytes(encoding)); } catch (Exception e) { logger.err(throwableToStackTrace(e)); } finally { closeIgnoringExceptions(writer); } } /** * Writes a string to a temporary file * * @param contents The string to write * @param path The file path * @param encoding The encoding to encode in * @throws IOException In case of failure * @return The File written to */ public static File writeStringToTempFile(String contents, String path, String encoding) throws IOException { OutputStream writer; File tmp = File.createTempFile(path,".tmp"); if (path.endsWith(".gz")) { writer = new GZIPOutputStream(new FileOutputStream(tmp)); } else { writer = new BufferedOutputStream(new FileOutputStream(tmp)); } writer.write(contents.getBytes(encoding)); return tmp; } /** * Writes a string to a temporary file, as UTF-8 * * @param contents The string to write * @param path The file path * @throws IOException In case of failure */ public static void writeStringToTempFile(String contents, String path) throws IOException { writeStringToTempFile(contents, path, "UTF-8"); } /** * Writes a string to a temporary file, squashing exceptions * * @param contents The string to write * @param path The file path * @param encoding The encoding to encode in * @return The File that was written to */ public static File writeStringToTempFileNoExceptions(String contents, String path, String encoding) { OutputStream writer = null; File tmp = null; try { tmp = File.createTempFile(path,".tmp"); if (path.endsWith(".gz")) { writer = new GZIPOutputStream(new FileOutputStream(tmp)); } else { writer = new BufferedOutputStream(new FileOutputStream(tmp)); } writer.write(contents.getBytes(encoding)); } catch (Exception e) { logger.err(throwableToStackTrace(e)); } finally { closeIgnoringExceptions(writer); } return tmp; } /** * Writes a string to a temporary file with UTF-8 encoding, squashing exceptions * * @param contents The string to write * @param path The file path */ public static void writeStringToTempFileNoExceptions(String contents, String path) { writeStringToTempFileNoExceptions(contents, path, "UTF-8"); } //-- todo [cdm, Aug 2012]: Do we need the below methods? They're kind of weird in unnecessarily bypassing using a Writer. // todo [cdm, Sep 2013]: Can we remove this next method and its friends? (Weird in silently gzipping, overlaps other functionality.) /** * Read an object from a stored file. It is silently ungzipped, regardless of name. * * @param file The file pointing to the object to be retrieved * @throws IOException If file cannot be read * @throws ClassNotFoundException If reading serialized object fails * @return The object read from the file. */ public static <T> T readObjectFromFile(File file) throws IOException, ClassNotFoundException { try { ObjectInputStream ois = new ObjectInputStream(new BufferedInputStream( new GZIPInputStream(new FileInputStream(file)))); Object o = ois.readObject(); ois.close(); return ErasureUtils.uncheckedCast(o); } catch (java.util.zip.ZipException e) { ObjectInputStream ois = new ObjectInputStream(new BufferedInputStream( new FileInputStream(file))); Object o = ois.readObject(); ois.close(); return ErasureUtils.uncheckedCast(o); } } public static DataInputStream getDataInputStream(String filenameUrlOrClassPath) throws IOException { return new DataInputStream(getInputStreamFromURLOrClasspathOrFileSystem(filenameUrlOrClassPath)); } public static DataOutputStream getDataOutputStream(String filename) throws IOException { return new DataOutputStream(getBufferedOutputStream((filename))); } /** * Read an object from a stored file. The file can be anything obtained * via a URL, the filesystem, or the classpath (eg in a jar file). * * @param filename The file pointing to the object to be retrieved * @throws IOException If file cannot be read * @throws ClassNotFoundException If reading serialized object fails * @return The object read from the file. */ public static <T> T readObjectFromURLOrClasspathOrFileSystem(String filename) throws IOException, ClassNotFoundException { ObjectInputStream ois = new ObjectInputStream(getInputStreamFromURLOrClasspathOrFileSystem(filename)); Object o = ois.readObject(); ois.close(); return ErasureUtils.uncheckedCast(o); } public static <T> T readObjectAnnouncingTimingFromURLOrClasspathOrFileSystem(Redwood.RedwoodChannels log, String msg, String path) { T obj; try { Timing timing = new Timing(); obj = IOUtils.readObjectFromURLOrClasspathOrFileSystem(path); log.info(msg + ' ' + path + " ... done [" + timing.toSecondsString() + " sec]."); } catch (IOException | ClassNotFoundException e) { throw new RuntimeIOException(e); } return obj; } public static <T> T readObjectFromObjectStream(ObjectInputStream ois) throws IOException, ClassNotFoundException { Object o = ois.readObject(); return ErasureUtils.uncheckedCast(o); } /** * Read an object from a stored file. * * @param filename The filename of the object to be retrieved * @throws IOException If file cannot be read * @throws ClassNotFoundException If reading serialized object fails * @return The object read from the file. */ public static <T> T readObjectFromFile(String filename) throws IOException, ClassNotFoundException { return ErasureUtils.uncheckedCast(readObjectFromFile(new File(filename))); } /** * Read an object from a stored file without throwing exceptions. * * @param file The file pointing to the object to be retrieved * @return The object read from the file, or null if an exception occurred. */ public static <T> T readObjectFromFileNoExceptions(File file) { Object o = null; try { ObjectInputStream ois = new ObjectInputStream(new BufferedInputStream( new GZIPInputStream(new FileInputStream(file)))); o = ois.readObject(); ois.close(); } catch (IOException | ClassNotFoundException e) { logger.err(throwableToStackTrace(e)); } return ErasureUtils.uncheckedCast(o); } public static int lineCount(String textFileOrUrl) throws IOException { BufferedReader r = readerFromString(textFileOrUrl); int numLines = 0; while (r.readLine() != null) { numLines++; } return numLines; } public static ObjectOutputStream writeStreamFromString(String serializePath) throws IOException { ObjectOutputStream oos; if (serializePath.endsWith(".gz")) { oos = new ObjectOutputStream(new BufferedOutputStream( new GZIPOutputStream(new FileOutputStream(serializePath)))); } else { oos = new ObjectOutputStream(new BufferedOutputStream( new FileOutputStream(serializePath))); } return oos; } /** * Returns an ObjectInputStream reading from any of a URL, a CLASSPATH resource, or a file. * The CLASSPATH takes priority over the file system. * This stream is buffered and, if necessary, gunzipped. * * @param filenameOrUrl The String specifying the URL/resource/file to load * @return An ObjectInputStream for loading a resource * @throws RuntimeIOException On any IO error * @throws NullPointerException Input parameter is null */ public static ObjectInputStream readStreamFromString(String filenameOrUrl) throws IOException { InputStream is = getInputStreamFromURLOrClasspathOrFileSystem(filenameOrUrl); return new ObjectInputStream(is); } /** * Locates this file either in the CLASSPATH or in the file system. The CLASSPATH takes priority. * Note that this method uses the ClassLoader methods, so that classpath resources must be specified as * absolute resource paths without a leading "/". * * @param name The file or resource name * @throws FileNotFoundException If the file does not exist * @return The InputStream of name, or null if not found */ private static InputStream findStreamInClasspathOrFileSystem(String name) throws FileNotFoundException { // ms 10-04-2010: // - even though this may look like a regular file, it may be a path inside a jar in the CLASSPATH // - check for this first. This takes precedence over the file system. InputStream is = IOUtils.class.getClassLoader().getResourceAsStream(name); // windows File.separator is \, but getting resources only works with / if (is == null) { is = IOUtils.class.getClassLoader().getResourceAsStream(name.replaceAll("\\\\", "/")); // Classpath doesn't like double slashes (e.g., /home/user//foo.txt) if (is == null) { is = IOUtils.class.getClassLoader().getResourceAsStream(name.replaceAll("\\\\", "/").replaceAll("/+", "/")); } } // if not found in the CLASSPATH, load from the file system if (is == null) { is = new FileInputStream(name); } return is; } /** * Check if this path exists either in the classpath or on the filesystem. * * @param name The file or resource name. * @return true if a call to {@link IOUtils#getBufferedReaderFromClasspathOrFileSystem(String)} would return a valid stream. */ public static boolean existsInClasspathOrFileSystem(String name) { InputStream is = IOUtils.class.getClassLoader().getResourceAsStream(name); if (is == null) { is = IOUtils.class.getClassLoader().getResourceAsStream(name.replaceAll("\\\\", "/")); if (is == null) { is = IOUtils.class.getClassLoader().getResourceAsStream(name.replaceAll("\\\\", "/").replaceAll("/+", "/")); } } return is != null || new File(name).exists(); } /** * Locates this file either using the given URL, or in the CLASSPATH, or in the file system * The CLASSPATH takes priority over the file system! * This stream is buffered and gunzipped (if necessary). * * @param textFileOrUrl The String specifying the URL/resource/file to load * @return An InputStream for loading a resource * @throws IOException On any IO error * @throws NullPointerException Input parameter is null */ public static InputStream getInputStreamFromURLOrClasspathOrFileSystem(String textFileOrUrl) throws IOException, NullPointerException { InputStream in; if (textFileOrUrl == null) { throw new NullPointerException("Attempt to open file with null name"); } else if (textFileOrUrl.matches("https?://.*")) { URL u = new URL(textFileOrUrl); URLConnection uc = u.openConnection(); in = uc.getInputStream(); } else { try { in = findStreamInClasspathOrFileSystem(textFileOrUrl); } catch (FileNotFoundException e) { try { // Maybe this happens to be some other format of URL? URL u = new URL(textFileOrUrl); URLConnection uc = u.openConnection(); in = uc.getInputStream(); } catch (IOException e2) { // Don't make the original exception a cause, since it is usually bogus throw new IOException("Unable to open \"" + textFileOrUrl + "\" as " + "class path, filename or URL"); // , e2); } } } // If it is a GZIP stream then ungzip it if (textFileOrUrl.endsWith(".gz")) { try { in = new GZIPInputStream(in); } catch (Exception e) { throw new RuntimeIOException("Resource or file looks like a gzip file, but is not: " + textFileOrUrl, e); } } // buffer this stream. even gzip streams benefit from buffering, // such as for the shift reduce parser [cdm 2016: I think this is only because default buffer is small; see below] in = new BufferedInputStream(in); return in; } // todo [cdm 2015]: I think GZIPInputStream has its own buffer and so we don't need to buffer in that case. // todo: Though it's default size is 512 bytes so need to make 8K in constructor. Or else buffering outside gzip is faster // todo: final InputStream is = new GZIPInputStream( new FileInputStream( file ), 65536 ); /** * Quietly opens a File. If the file ends with a ".gz" extension, * automatically opens a GZIPInputStream to wrap the constructed * FileInputStream. */ public static InputStream inputStreamFromFile(File file) throws RuntimeIOException { try { InputStream is = new BufferedInputStream(new FileInputStream(file)); if (file.getName().endsWith(".gz")) { is = new GZIPInputStream(is); } return is; } catch (IOException e) { throw new RuntimeIOException(e); } } /** * Open a BufferedReader to a File. If the file's getName() ends in .gz, * it is interpreted as a gzipped file (and uncompressed). The file is then * interpreted as a utf-8 text file. * * @param file What to read from * @return The BufferedReader * @throws RuntimeIOException If there is an I/O problem */ public static BufferedReader readerFromFile(File file) { InputStream is = null; try { is = inputStreamFromFile(file); return new BufferedReader(new InputStreamReader(is, "UTF-8")); } catch (IOException ioe) { IOUtils.closeIgnoringExceptions(is); throw new RuntimeIOException(ioe); } } // todo [cdm 2014]: get rid of this method, using other methods. This will change the semantics to null meaning UTF-8, but that seems better in 2015. /** * Open a BufferedReader to a File. If the file's getName() ends in .gz, * it is interpreted as a gzipped file (and uncompressed). The file is then * turned into a BufferedReader with the given encoding. * If the encoding passed in is null, then the system default encoding is used. * * @param file What to read from * @param encoding What charset to use. A null String is interpreted as platform default encoding * @return The BufferedReader * @throws RuntimeIOException If there is an I/O problem */ public static BufferedReader readerFromFile(File file, String encoding) { InputStream is = null; try { is = inputStreamFromFile(file); if (encoding == null) { return new BufferedReader(new InputStreamReader(is)); } else { return new BufferedReader(new InputStreamReader(is, encoding)); } } catch (IOException ioe) { IOUtils.closeIgnoringExceptions(is); throw new RuntimeIOException(ioe); } } /** * Open a BufferedReader on stdin. Use the user's default encoding. * * @return The BufferedReader */ public static BufferedReader readerFromStdin() { return new BufferedReader(new InputStreamReader(System.in)); } /** * Open a BufferedReader on stdin. Use the specified character encoding. * * @param encoding CharSet encoding. Maybe be null, in which case the * platform default encoding is used * @return The BufferedReader * @throws IOException If there is an I/O problem */ public static BufferedReader readerFromStdin(String encoding) throws IOException { if (encoding == null) { return new BufferedReader(new InputStreamReader(System.in)); } return new BufferedReader(new InputStreamReader(System.in, encoding)); } // TODO [cdm 2015]: Should we rename these methods. Sort of misleading: They really read files, resources, etc. specified by a String /** * Open a BufferedReader to a file, class path entry or URL specified by a String name. * If the String starts with https?://, then it is first tried as a URL. It * is next tried as a resource on the CLASSPATH, and then it is tried * as a local file. Finally, it is then tried again in case it is some network-available * file accessible by URL. If the String ends in .gz, it * is interpreted as a gzipped file (and uncompressed). The file is then * interpreted as a utf-8 text file. * Note that this method uses the ClassLoader methods, so that classpath resources must be specified as * absolute resource paths without a leading "/". * * @param textFileOrUrl What to read from * @return The BufferedReader * @throws IOException If there is an I/O problem */ public static BufferedReader readerFromString(String textFileOrUrl) throws IOException { return new BufferedReader(new InputStreamReader( getInputStreamFromURLOrClasspathOrFileSystem(textFileOrUrl), "UTF-8")); } /** * Open a BufferedReader to a file or URL specified by a String name. If the * String starts with https?://, then it is first tried as a URL, otherwise it * is next tried as a resource on the CLASSPATH, and then finally it is tried * as a local file or other network-available file . If the String ends in .gz, it * is interpreted as a gzipped file (and uncompressed), else it is interpreted as * a regular text file in the given encoding. * If the encoding passed in is null, then the system default encoding is used. * * @param textFileOrUrl What to read from * @param encoding CharSet encoding. Maybe be null, in which case the * platform default encoding is used * @return The BufferedReader * @throws IOException If there is an I/O problem */ public static BufferedReader readerFromString(String textFileOrUrl, String encoding) throws IOException { InputStream is = getInputStreamFromURLOrClasspathOrFileSystem(textFileOrUrl); if (encoding == null) { return new BufferedReader(new InputStreamReader(is)); } return new BufferedReader(new InputStreamReader(is, encoding)); } /** * Returns an Iterable of the lines in the file. * * The file reader will be closed when the iterator is exhausted. IO errors * will throw an (unchecked) RuntimeIOException * * @param path The file whose lines are to be read. * @return An Iterable containing the lines from the file. */ public static Iterable<String> readLines(String path) { return readLines(path, null); } /** * Returns an Iterable of the lines in the file. * * The file reader will be closed when the iterator is exhausted. IO errors * will throw an (unchecked) RuntimeIOException * * @param path The file whose lines are to be read. * @param encoding The encoding to use when reading lines. * @return An Iterable containing the lines from the file. */ public static Iterable<String> readLines(String path, String encoding) { return new GetLinesIterable(path, null, encoding); } /** * Returns an Iterable of the lines in the file. * * The file reader will be closed when the iterator is exhausted. * * @param file The file whose lines are to be read. * @return An Iterable containing the lines from the file. */ public static Iterable<String> readLines(final File file) { return readLines(file, null, null); } /** * Returns an Iterable of the lines in the file. * * The file reader will be closed when the iterator is exhausted. * * @param file The file whose lines are to be read. * @param fileInputStreamWrapper * The class to wrap the InputStream with, e.g. GZIPInputStream. Note * that the class must have a constructor that accepts an * InputStream. * @return An Iterable containing the lines from the file. */ public static Iterable<String> readLines(final File file, final Class<? extends InputStream> fileInputStreamWrapper) { return readLines(file, fileInputStreamWrapper, null); } /** * Returns an Iterable of the lines in the file, wrapping the generated * FileInputStream with an instance of the supplied class. IO errors will * throw an (unchecked) RuntimeIOException * * @param file The file whose lines are to be read. * @param fileInputStreamWrapper * The class to wrap the InputStream with, e.g. GZIPInputStream. Note * that the class must have a constructor that accepts an * InputStream. * @param encoding The encoding to use when reading lines. * @return An Iterable containing the lines from the file. */ public static Iterable<String> readLines(final File file, final Class<? extends InputStream> fileInputStreamWrapper, final String encoding) { return new GetLinesIterable(file, fileInputStreamWrapper, encoding); } static class GetLinesIterable implements Iterable<String> { final File file; final String path; final Class<? extends InputStream> fileInputStreamWrapper; final String encoding; // TODO: better programming style would be to make this two // separate classes, but we don't expect to make more versions of // this class anyway GetLinesIterable(final File file, final Class<? extends InputStream> fileInputStreamWrapper, final String encoding) { this.file = file; this.path = null; this.fileInputStreamWrapper = fileInputStreamWrapper; this.encoding = encoding; } GetLinesIterable(final String path, final Class<? extends InputStream> fileInputStreamWrapper, final String encoding) { this.file = null; this.path = path; this.fileInputStreamWrapper = fileInputStreamWrapper; this.encoding = encoding; } private InputStream getStream() throws IOException { if (file != null) { return inputStreamFromFile(file); } else if (path != null) { return getInputStreamFromURLOrClasspathOrFileSystem(path); } else { throw new AssertionError("No known path to read"); } } @Override public Iterator<String> iterator() { return new Iterator<String>() { protected final BufferedReader reader = this.getReader(); protected String line = this.getLine(); @Override public boolean hasNext() { return this.line != null; } @Override public String next() { String nextLine = this.line; if (nextLine == null) { throw new NoSuchElementException(); } line = getLine(); return nextLine; } protected String getLine() { try { String result = this.reader.readLine(); if (result == null) { this.reader.close(); } return result; } catch (IOException e) { throw new RuntimeIOException(e); } } protected BufferedReader getReader() { try { InputStream stream = getStream(); if (fileInputStreamWrapper != null) { stream = fileInputStreamWrapper.getConstructor(InputStream.class).newInstance(stream); } if (encoding == null) { return new BufferedReader(new InputStreamReader(stream)); } else { return new BufferedReader(new InputStreamReader(stream, encoding)); } } catch (Exception e) { throw new RuntimeIOException(e); } } @Override public void remove() { throw new UnsupportedOperationException(); } }; } } // end static class GetLinesIterable /** * Given a reader, returns the lines from the reader as an Iterable. * * @param r input reader * @param includeEol whether to keep eol-characters in the returned strings * @return iterable of lines (as strings) */ public static Iterable<String> getLineIterable( Reader r, boolean includeEol) { if (includeEol) { return new EolPreservingLineReaderIterable(r); } else { return new LineReaderIterable( (r instanceof BufferedReader)? (BufferedReader) r:new BufferedReader(r) ); } } public static Iterable<String> getLineIterable( Reader r, int bufferSize, boolean includeEol) { if (includeEol) { return new EolPreservingLineReaderIterable(r, bufferSize); } else { return new LineReaderIterable( (r instanceof BufferedReader)? (BufferedReader) r:new BufferedReader(r, bufferSize) ); } } /** * Line iterator that uses BufferedReader.readLine() * EOL-characters are automatically discarded and not included in the strings returns */ private static final class LineReaderIterable implements Iterable<String> { private final BufferedReader reader; private LineReaderIterable( BufferedReader reader ) { this.reader = reader; } @Override public Iterator<String> iterator() { return new Iterator<String>() { private String next = getNext(); private String getNext() { try { return reader.readLine(); } catch (IOException ex) { throw new RuntimeIOException(ex); } } @Override public boolean hasNext() { return this.next != null; } @Override public String next() { String nextLine = this.next; if (nextLine == null) { throw new NoSuchElementException(); } next = getNext(); return nextLine; } @Override public void remove() { throw new UnsupportedOperationException(); } }; } } /** * Line iterator that preserves the eol-character exactly as read from reader. * Line endings are: \r\n,\n,\r * Lines returns by this iterator will include the eol-characters **/ private static final class EolPreservingLineReaderIterable implements Iterable<String> { private final Reader reader; private final int bufferSize; private EolPreservingLineReaderIterable( Reader reader ) { this(reader, SLURP_BUFFER_SIZE); } private EolPreservingLineReaderIterable( Reader reader, int bufferSize ) { this.reader = reader; this.bufferSize = bufferSize; } @Override public Iterator<String> iterator() { return new Iterator<String>() { private String next; private boolean done = false; private StringBuilder sb = new StringBuilder(80); private char[] charBuffer = new char[bufferSize]; private int charBufferPos = -1; private int charsInBuffer = 0; boolean lastWasLF = false; private String getNext() { try { while (true) { if (charBufferPos < 0) { charsInBuffer = reader.read(charBuffer); if (charsInBuffer < 0) { // No more!!! if (sb.length() > 0) { String line = sb.toString(); // resets the buffer sb.setLength(0); return line; } else { return null; } } charBufferPos = 0; } boolean eolReached = copyUntilEol(); if (eolReached) { // eol reached String line = sb.toString(); // resets the buffer sb.setLength(0); return line; } } } catch (IOException ex) { throw new RuntimeIOException(ex); } } private boolean copyUntilEol() { for (int i = charBufferPos; i < charsInBuffer; i++) { if (charBuffer[i] == '\n') { // line end // copy into our string builder sb.append(charBuffer, charBufferPos, i - charBufferPos + 1); // advance character buffer pos charBufferPos = i+1; lastWasLF = false; return true; // end of line reached } else if (lastWasLF) { // not a '\n' here - still need to terminate line (but don't include current character) if (i > charBufferPos) { sb.append(charBuffer, charBufferPos, i - charBufferPos); // advance character buffer pos charBufferPos = i; lastWasLF = false; return true; // end of line reached } } lastWasLF = (charBuffer[i] == '\r'); } sb.append(charBuffer, charBufferPos, charsInBuffer - charBufferPos); // reset character buffer pos charBufferPos = -1; return false; } @Override public boolean hasNext() { if (done) return false; if (next == null) { next = getNext(); } if (next == null) { done = true; } return !done; } @Override public String next() { if (!hasNext()) { throw new NoSuchElementException(); } String res = next; next = null; return res; } @Override public void remove() { throw new UnsupportedOperationException(); } }; } // end iterator() } // end static class EolPreservingLineReaderIterable /** * Provides an implementation of closing a file for use in a finally block so * you can correctly close a file without even more exception handling stuff. * From a suggestion in a talk by Josh Bloch. Calling close() will flush(). * * @param c The IO resource to close (e.g., a Stream/Reader) */ public static void closeIgnoringExceptions(Closeable c) { if (c != null) { try { c.close(); } catch (IOException ioe) { // ignore } } } /** * Iterate over all the files in the directory, recursively. * * @param dir The root directory. * @return All files within the directory. */ public static Iterable<File> iterFilesRecursive(final File dir) { return iterFilesRecursive(dir, (Pattern) null); } /** * Iterate over all the files in the directory, recursively. * * @param dir The root directory. * @param ext A string that must be at the end of all files (e.g. ".txt") * @return All files within the directory ending in the given extension. */ public static Iterable<File> iterFilesRecursive(final File dir, final String ext) { return iterFilesRecursive(dir, Pattern.compile(Pattern.quote(ext) + "$")); } /** * Iterate over all the files in the directory, recursively. * * @param dir The root directory. * @param pattern A regular expression that the file path must match. This uses * Matcher.find(), so use ^ and $ to specify endpoints. * @return All files within the directory. */ public static Iterable<File> iterFilesRecursive(final File dir, final Pattern pattern) { return new Iterable<File>() { public Iterator<File> iterator() { return new AbstractIterator<File>() { private final Queue<File> files = new LinkedList<>(Collections .singleton(dir)); private File file = this.findNext(); @Override public boolean hasNext() { return this.file != null; } @Override public File next() { File result = this.file; if (result == null) { throw new NoSuchElementException(); } this.file = this.findNext(); return result; } private File findNext() { File next = null; while (!this.files.isEmpty() && next == null) { next = this.files.remove(); if (next.isDirectory()) { files.addAll(Arrays.asList(next.listFiles())); next = null; } else if (pattern != null) { if (!pattern.matcher(next.getPath()).find()) { next = null; } } } return next; } }; } }; } /** * Returns all the text in the given File as a single String. * If the file's name ends in .gz, it is assumed to be gzipped and is silently uncompressed. */ public static String slurpFile(File file) throws IOException { return slurpFile(file, null); } /** * Returns all the text in the given File as a single String. * If the file's name ends in .gz, it is assumed to be gzipped and is silently uncompressed. * * @param file The file to read from * @param encoding The character encoding to assume. This may be null, and * the platform default character encoding is used. */ public static String slurpFile(File file, String encoding) throws IOException { return IOUtils.slurpReader(IOUtils.encodedInputStreamReader( inputStreamFromFile(file), encoding)); } /** * Returns all the text in the given File as a single String. */ public static String slurpGZippedFile(String filename) throws IOException { Reader r = encodedInputStreamReader(new GZIPInputStream(new FileInputStream( filename)), null); return IOUtils.slurpReader(r); } /** * Returns all the text in the given File as a single String. */ public static String slurpGZippedFile(File file) throws IOException { Reader r = encodedInputStreamReader(new GZIPInputStream(new FileInputStream( file)), null); return IOUtils.slurpReader(r); } /** * Returns all the text in the given file with the given encoding. * The string may be empty, if the file is empty. */ public static String slurpFile(String filename, String encoding) throws IOException { Reader r = readerFromString(filename, encoding); return IOUtils.slurpReader(r); } /** * Returns all the text in the given file with the given * encoding. If the file cannot be read (non-existent, etc.), then * the method throws an unchecked RuntimeIOException. If the caller * is willing to tolerate missing files, they should catch that * exception. */ public static String slurpFileNoExceptions(String filename, String encoding) { try { return slurpFile(filename, encoding); } catch (IOException e) { throw new RuntimeIOException("slurpFile IO problem", e); } } /** * Returns all the text in the given file * * @return The text in the file. */ public static String slurpFile(String filename) throws IOException { return slurpFile(filename, defaultEncoding); } /** * Returns all the text at the given URL. */ public static String slurpURLNoExceptions(URL u, String encoding) { try { return IOUtils.slurpURL(u, encoding); } catch (Exception e) { logger.err(throwableToStackTrace(e)); return null; } } /** * Returns all the text at the given URL. */ public static String slurpURL(URL u, String encoding) throws IOException { String lineSeparator = System.lineSeparator(); URLConnection uc = u.openConnection(); uc.setReadTimeout(30000); InputStream is; try { is = uc.getInputStream(); } catch (SocketTimeoutException e) { logger.error("Socket time out; returning empty string."); logger.err(throwableToStackTrace(e)); return ""; } BufferedReader br = new BufferedReader(new InputStreamReader(is, encoding)); StringBuilder buff = new StringBuilder(SLURP_BUFFER_SIZE); // make biggish for (String temp; (temp = br.readLine()) != null; ) { buff.append(temp); buff.append(lineSeparator); } br.close(); return buff.toString(); } public static String getUrlEncoding(URLConnection connection) { String contentType = connection.getContentType(); String[] values = contentType.split(";"); String charset = defaultEncoding; // might or might not be right.... for (String value : values) { value = value.trim(); if (value.toLowerCase(Locale.ENGLISH).startsWith("charset=")) { charset = value.substring("charset=".length()); } } return charset; } /** * Returns all the text at the given URL. */ public static String slurpURL(URL u) throws IOException { String lineSeparator = System.getProperty("line.separator"); URLConnection uc = u.openConnection(); String encoding = getUrlEncoding(uc); InputStream is = uc.getInputStream(); BufferedReader br = new BufferedReader(new InputStreamReader(is, encoding)); StringBuilder buff = new StringBuilder(SLURP_BUFFER_SIZE); // make biggish for (String temp; (temp = br.readLine()) != null; ) { buff.append(temp); buff.append(lineSeparator); } br.close(); return buff.toString(); } /** * Returns all the text at the given URL. */ public static String slurpURLNoExceptions(URL u) { try { return slurpURL(u); } catch (Exception e) { logger.err(throwableToStackTrace(e)); return null; } } /** * Returns all the text at the given URL. */ public static String slurpURL(String path) throws Exception { return slurpURL(new URL(path)); } /** * Returns all the text at the given URL. If the file cannot be read * (non-existent, etc.), then and only then the method returns * {@code null}. */ public static String slurpURLNoExceptions(String path) { try { return slurpURL(path); } catch (Exception e) { logger.err(throwableToStackTrace(e)); return null; } } /** * Returns all the text in the given file with the given * encoding. If the file cannot be read (non-existent, etc.), then * the method throws an unchecked RuntimeIOException. If the caller * is willing to tolerate missing files, they should catch that * exception. */ public static String slurpFileNoExceptions(File file) { try { return IOUtils.slurpReader(encodedInputStreamReader(new FileInputStream(file), null)); } catch (IOException e) { throw new RuntimeIOException(e); } } /** * Returns all the text in the given file with the given * encoding. If the file cannot be read (non-existent, etc.), then * the method throws an unchecked RuntimeIOException. If the caller * is willing to tolerate missing files, they should catch that * exception. */ public static String slurpFileNoExceptions(String filename) { try { return slurpFile(filename); } catch (IOException e) { throw new RuntimeIOException(e); } } /** * Returns all the text from the given Reader. * Closes the Reader when done. * * @return The text in the file. */ public static String slurpReader(Reader reader) { StringBuilder buff = new StringBuilder(); try { char[] chars = new char[SLURP_BUFFER_SIZE]; BufferedReader r = new BufferedReader(reader); while (true) { int amountRead = r.read(chars, 0, SLURP_BUFFER_SIZE); if (amountRead < 0) { break; } buff.append(chars, 0, amountRead); } r.close(); } catch (Exception e) { throw new RuntimeIOException("slurpReader IO problem", e); } return buff.toString(); } /** * Read the contents of an input stream, decoding it according to the given character encoding. * @param input The input stream to read from * @return The String representation of that input stream */ public static String slurpInputStream(InputStream input, String encoding) throws IOException { return slurpReader(encodedInputStreamReader(input, encoding)); } /** * Send all bytes from the input stream to the output stream. * * @param input The input bytes. * @param output Where the bytes should be written. */ public static void writeStreamToStream(InputStream input, OutputStream output) throws IOException { byte[] buffer = new byte[4096]; while (true) { int len = input.read(buffer); if (len == -1) { break; } output.write(buffer, 0, len); } } /** * Read in a CSV formatted file with a header row. * * @param path - path to CSV file * @param quoteChar - character for enclosing strings, defaults to " * @param escapeChar - character for escaping quotes appearing in quoted strings; defaults to " (i.e. "" is used for " inside quotes, consistent with Excel) * @return a list of maps representing the rows of the csv. The maps' keys are the header strings and their values are the row contents * @throws IOException If any IO problem */ public static List<Map<String,String>> readCSVWithHeader(String path, char quoteChar, char escapeChar) throws IOException { String[] labels = null; List<Map<String,String>> rows = Generics.newArrayList(); for (String line : IOUtils.readLines(path)) { // logger.info("Splitting "+line); if (labels == null) { labels = StringUtils.splitOnCharWithQuoting(line,',','"',escapeChar); } else { String[] cells = StringUtils.splitOnCharWithQuoting(line,',',quoteChar,escapeChar); assert(cells.length == labels.length); Map<String,String> cellMap = Generics.newHashMap(); for (int i=0; i<labels.length; i++) cellMap.put(labels[i],cells[i]); rows.add(cellMap); } } return rows; } public static List<Map<String,String>> readCSVWithHeader(String path) throws IOException { return readCSVWithHeader(path, '"', '"'); } /** * Read a CSV file character by character. Allows for multi-line CSV files (in quotes), but * is less flexible and likely slower than readCSVWithHeader() * @param csvContents The char[] array corresponding to the contents of the file * @param numColumns The number of columns in the file (for verification, primarily) * @return A list of lines in the file */ public static LinkedList<String[]> readCSVStrictly(char[] csvContents, int numColumns){ //--Variables StringBuilder[] buffer = new StringBuilder[numColumns]; buffer[0] = new StringBuilder(); LinkedList<String[]> lines = new LinkedList<>(); //--State boolean inQuotes = false; boolean nextIsEscaped = false; int columnI = 0; //--Read for(int offset=0; offset<csvContents.length; offset++){ if(nextIsEscaped){ buffer[columnI].append(csvContents[offset]); nextIsEscaped = false; } else { switch(csvContents[offset]){ case '"': //(case: quotes) inQuotes = !inQuotes; break; case ',': //(case: field separator) if(inQuotes){ buffer[columnI].append(','); } else { columnI += 1; if(columnI >= numColumns){ throw new IllegalArgumentException("Too many columns: "+columnI+"/"+numColumns+" (offset: " + offset + ")"); } buffer[columnI] = new StringBuilder(); } break; case '\n': //(case: newline) if(inQuotes){ buffer[columnI].append('\n'); } else { //((error checks)) if(columnI != numColumns-1){ throw new IllegalArgumentException("Too few columns: "+columnI+"/"+numColumns+" (offset: " + offset + ")"); } //((create line)) String[] rtn = new String[buffer.length]; for(int i=0; i<buffer.length; i++){ rtn[i] = buffer[i].toString(); } lines.add(rtn); //((update state)) columnI = 0; buffer[columnI] = new StringBuilder(); } break; case '\\': nextIsEscaped = true; break; default: buffer[columnI].append(csvContents[offset]); } } } //--Return return lines; } public static LinkedList<String[]> readCSVStrictly(String filename, int numColumns) throws IOException { return readCSVStrictly(slurpFile(filename).toCharArray(), numColumns); } /** * Get a input file stream (automatically gunzip/bunzip2 depending on file extension) * @param filename Name of file to open * @return Input stream that can be used to read from the file * @throws IOException if there are exceptions opening the file */ public static InputStream getFileInputStream(String filename) throws IOException { InputStream in = new FileInputStream(filename); if (filename.endsWith(".gz")) { in = new GZIPInputStream(in); } else if (filename.endsWith(".bz2")) { //in = new CBZip2InputStream(in); in = getBZip2PipedInputStream(filename); } return in; } /** * Get a output file stream (automatically gzip/bzip2 depending on file extension) * @param filename Name of file to open * @return Output stream that can be used to write to the file * @throws IOException if there are exceptions opening the file */ public static OutputStream getFileOutputStream(String filename) throws IOException { OutputStream out = new FileOutputStream(filename); if (filename.endsWith(".gz")) { out = new GZIPOutputStream(out); } else if (filename.endsWith(".bz2")) { //out = new CBZip2OutputStream(out); out = getBZip2PipedOutputStream(filename); } return out; } public static OutputStream getFileOutputStream(String filename, boolean append) throws IOException { OutputStream out = new FileOutputStream(filename, append); if (filename.endsWith(".gz")) { out = new GZIPOutputStream(out); } else if (filename.endsWith(".bz2")) { //out = new CBZip2OutputStream(out); out = getBZip2PipedOutputStream(filename); } return out; } /** @deprecated Just call readerFromString(filename) */ @Deprecated public static BufferedReader getBufferedFileReader(String filename) throws IOException { return readerFromString(filename, defaultEncoding); } /** @deprecated Just call readerFromString(filename) */ @Deprecated public static BufferedReader getBufferedReaderFromClasspathOrFileSystem(String filename) throws IOException { return readerFromString(filename, defaultEncoding); } public static PrintWriter getPrintWriter(File textFile) throws IOException { return getPrintWriter(textFile, null); } public static PrintWriter getPrintWriter(File textFile, String encoding) throws IOException { File f = textFile.getAbsoluteFile(); if (encoding == null) { encoding = defaultEncoding; } return new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f), encoding)), true); } public static PrintWriter getPrintWriter(String filename) throws IOException { return getPrintWriter(filename, defaultEncoding); } public static PrintWriter getPrintWriterIgnoringExceptions(String filename) { try { return getPrintWriter(filename, defaultEncoding); } catch (IOException ioe) { return null; } } public static PrintWriter getPrintWriterOrDie(String filename) { try { return getPrintWriter(filename, defaultEncoding); } catch (IOException ioe) { throw new RuntimeIOException(ioe); } } public static PrintWriter getPrintWriter(String filename, String encoding) throws IOException { OutputStream out = getFileOutputStream(filename); if (encoding == null) { encoding = defaultEncoding; } return new PrintWriter(new BufferedWriter(new OutputStreamWriter(out, encoding)), true); } public static InputStream getBZip2PipedInputStream(String filename) throws IOException { String bzcat = System.getProperty("bzcat", "bzcat"); Runtime rt = Runtime.getRuntime(); String cmd = bzcat + " " + filename; //log.info("getBZip2PipedInputStream: Running command: "+cmd); Process p = rt.exec(cmd); Writer errWriter = new BufferedWriter(new OutputStreamWriter(System.err)); StreamGobbler errGobbler = new StreamGobbler(p.getErrorStream(), errWriter); errGobbler.start(); return p.getInputStream(); } public static OutputStream getBZip2PipedOutputStream(String filename) throws IOException { return new BZip2PipedOutputStream(filename); } private static final Pattern tab = Pattern.compile("\t"); /** * Read column as set * @param infile - filename * @param field index of field to read * @return a set of the entries in column field * @throws IOException */ public static Set<String> readColumnSet(String infile, int field) throws IOException { BufferedReader br = IOUtils.getBufferedFileReader(infile); Set<String> set = Generics.newHashSet(); for (String line; (line = br.readLine()) != null; ) { line = line.trim(); if (line.length() > 0) { if (field < 0) { set.add(line); } else { String[] fields = tab.split(line); if (field < fields.length) { set.add(fields[field]); } } } } br.close(); return set; } public static <C> List<C> readObjectFromColumns(Class objClass, String filename, String[] fieldNames, String delimiter) throws IOException, InstantiationException, IllegalAccessException, NoSuchFieldException, NoSuchMethodException, InvocationTargetException { Pattern delimiterPattern = Pattern.compile(delimiter); List<C> list = new ArrayList<>(); BufferedReader br = IOUtils.getBufferedFileReader(filename); for (String line; (line = br.readLine()) != null; ) { line = line.trim(); if (line.length() > 0) { C item = StringUtils.columnStringToObject(objClass, line, delimiterPattern, fieldNames); list.add(item); } } br.close(); return list; } public static Map<String,String> readMap(String filename) throws IOException { Map<String,String> map = Generics.newHashMap(); try { BufferedReader br = IOUtils.getBufferedFileReader(filename); for (String line; (line = br.readLine()) != null; ) { String[] fields = tab.split(line,2); map.put(fields[0], fields[1]); } br.close(); } catch (IOException ex) { throw new RuntimeException(ex); } return map; } /** * Returns the contents of a file as a single string. The string may be * empty, if the file is empty. If there is an IOException, it is caught * and null is returned. */ public static String stringFromFile(String filename) { return stringFromFile(filename, defaultEncoding); } /** * Returns the contents of a file as a single string. The string may be * empty, if the file is empty. If there is an IOException, it is caught * and null is returned. Encoding can also be specified. */ // todo: This is same as slurpFile (!) public static String stringFromFile(String filename, String encoding) { try { StringBuilder sb = new StringBuilder(); BufferedReader in = new BufferedReader(new EncodingFileReader(filename,encoding)); String line; while ((line = in.readLine()) != null) { sb.append(line); sb.append(eolChar); } in.close(); return sb.toString(); } catch (IOException e) { logger.err(throwableToStackTrace(e)); return null; } } /** * Returns the contents of a file as a list of strings. The list may be * empty, if the file is empty. If there is an IOException, it is caught * and null is returned. */ public static List<String> linesFromFile(String filename) { return linesFromFile(filename, defaultEncoding); } /** * Returns the contents of a file as a list of strings. The list may be * empty, if the file is empty. If there is an IOException, it is caught * and null is returned. Encoding can also be specified */ public static List<String> linesFromFile(String filename,String encoding) { return linesFromFile(filename, encoding, false); } public static List<String> linesFromFile(String filename,String encoding, boolean ignoreHeader) { try { List<String> lines = new ArrayList<>(); BufferedReader in = readerFromString(filename, encoding); String line; int i = 0; while ((line = in.readLine()) != null) { i++; if(ignoreHeader && i == 1) continue; lines.add(line); } in.close(); return lines; } catch (IOException e) { logger.err(throwableToStackTrace(e)); return null; } } /** * A JavaNLP specific convenience routine for obtaining the current * scratch directory for the machine you're currently running on. */ public static File getJNLPLocalScratch() { try { String machineName = InetAddress.getLocalHost().getHostName().split("\\.")[0]; String username = System.getProperty("user.name"); return new File("/"+machineName+"/scr1/"+username); } catch (Exception e) { return new File("./scr/"); // default scratch } } /** * Given a filepath, makes sure a directory exists there. If not, creates and returns it. * Same as ENSURE-DIRECTORY in CL. * * @param tgtDir The directory that you wish to ensure exists * @throws IOException If directory can't be created, is an existing file, or for other reasons */ public static File ensureDir(File tgtDir) throws IOException { if (tgtDir.exists()) { if (tgtDir.isDirectory()) { return tgtDir; } else { throw new IOException("Could not create directory "+tgtDir.getAbsolutePath()+", as a file already exists at that path."); } } else { if ( ! tgtDir.mkdirs()) { throw new IOException("Could not create directory "+tgtDir.getAbsolutePath()); } return tgtDir; } } /** * Given a filepath, delete all files in the directory recursively * @param dir Directory from which to delete files * @return {@code true} if the deletion is successful, {@code false} otherwise */ public static boolean deleteDirRecursively(File dir) { if (dir.isDirectory()) { for (File f : dir.listFiles()) { boolean success = deleteDirRecursively(f); if (!success) return false; } } return dir.delete(); } public static String getExtension(String fileName) { if(!fileName.contains(".")) return null; int idx = fileName.lastIndexOf('.'); return fileName.substring(idx+1); } /** Create a Reader with an explicit encoding around an InputStream. * This static method will treat null as meaning to use the platform default, * unlike the Java library methods that disallow a null encoding. * * @param stream An InputStream * @param encoding A charset encoding * @return A Reader * @throws IOException If any IO problem */ public static Reader encodedInputStreamReader(InputStream stream, String encoding) throws IOException { // InputStreamReader doesn't allow encoding to be null; if (encoding == null) { return new InputStreamReader(stream); } else { return new InputStreamReader(stream, encoding); } } /** Create a Reader with an explicit encoding around an InputStream. * This static method will treat null as meaning to use the platform default, * unlike the Java library methods that disallow a null encoding. * * @param stream An InputStream * @param encoding A charset encoding * @return A Reader * @throws IOException If any IO problem */ public static Writer encodedOutputStreamWriter(OutputStream stream, String encoding) throws IOException { // OutputStreamWriter doesn't allow encoding to be null; if (encoding == null) { return new OutputStreamWriter(stream); } else { return new OutputStreamWriter(stream, encoding); } } /** Create a Reader with an explicit encoding around an InputStream. * This static method will treat null as meaning to use the platform default, * unlike the Java library methods that disallow a null encoding. * * @param stream An InputStream * @param encoding A charset encoding * @param autoFlush Whether to make an autoflushing Writer * @return A Reader * @throws IOException If any IO problem */ public static PrintWriter encodedOutputStreamPrintWriter(OutputStream stream, String encoding, boolean autoFlush) throws IOException { // PrintWriter doesn't allow encoding to be null; or to have charset and flush if (encoding == null) { return new PrintWriter(stream, autoFlush); } else { return new PrintWriter(new OutputStreamWriter(stream, encoding), autoFlush); } } /** * A raw file copy function -- this is not public since no error checks are made as to the * consistency of the file being copied. Use instead: * @see IOUtils#cp(java.io.File, java.io.File, boolean) * @param source The source file. This is guaranteed to exist, and is guaranteed to be a file. * @param target The target file. * @throws IOException Throws an exception if the copy fails. */ private static void copyFile(File source, File target) throws IOException { FileChannel sourceChannel = new FileInputStream( source ).getChannel(); FileChannel targetChannel = new FileOutputStream( target ).getChannel(); // allow for the case that it doesn't all transfer in one go (though it probably does for a file cp) long pos = 0; long toCopy = sourceChannel.size(); while (toCopy > 0) { long bytes = sourceChannel.transferTo(pos, toCopy, targetChannel); pos += bytes; toCopy -= bytes; } sourceChannel.close(); targetChannel.close(); } /** * <p>An implementation of cp, as close to the Unix command as possible. * Both directories and files are valid for either the source or the target; * if the target exists, the semantics of Unix cp are [intended to be] obeyed.</p> * * @param source The source file or directory. * @param target The target to write this file or directory to. * @param recursive If true, recursively copy directory contents * @throws IOException If either the copy fails (standard IO Exception), or the command is invalid * (e.g., copying a directory without the recursive flag) */ public static void cp(File source, File target, boolean recursive) throws IOException { // Error checks if (source.isDirectory() && !recursive) { // cp a b -- a is a directory throw new IOException("cp: omitting directory: " + source); } if (!target.getParentFile().exists()) { // cp a b/c/d/e -- b/c/d doesn't exist throw new IOException("cp: cannot copy to directory: " + recursive + " (parent doesn't exist)"); } if (!target.getParentFile().isDirectory()) { // cp a b/c/d/e -- b/c/d is a regular file throw new IOException("cp: cannot copy to directory: " + recursive + " (parent isn't a directory)"); } // Get true target File trueTarget; if (target.exists() && target.isDirectory()) { trueTarget = new File(target.getPath() + File.separator + source.getName()); } else { trueTarget = target; } // Copy if (source.isFile()) { // Case: copying a file copyFile(source, trueTarget); } else if (source.isDirectory()) { // Case: copying a directory File[] children = source.listFiles(); if (children == null) { throw new IOException("cp: could not list files in source: " + source); } if (target.exists()) { // Case: cp -r a b -- b exists if (!target.isDirectory()) { // cp -r a b -- b is a regular file throw new IOException("cp: cannot copy directory into regular file: " + target); } if (trueTarget.exists() && !trueTarget.isDirectory()) { // cp -r a b -- b/a is not a directory throw new IOException("cp: overwriting a file with a directory: " + trueTarget); } if (!trueTarget.exists() && !trueTarget.mkdir()) { // cp -r a b -- b/a cannot be created throw new IOException("cp: could not create directory: " + trueTarget); } } else { // Case: cp -r a b -- b does not exist assert trueTarget == target; if (!trueTarget.mkdir()) { // cp -r a b -- cannot create b as a directory throw new IOException("cp: could not create target directory: " + trueTarget); } } // Actually do the copy for (File child : children) { File childTarget = new File(trueTarget.getPath() + File.separator + child.getName()); cp(child, childTarget, recursive); } } else { throw new IOException("cp: unknown file type: " + source); } } /** * @see IOUtils#cp(java.io.File, java.io.File, boolean) */ public static void cp(File source, File target) throws IOException { cp(source, target, false); } /** * A Java implementation of the Unix tail functionality. * That is, read the last n lines of the input file f. * @param f The file to read the last n lines from * @param n The number of lines to read from the end of the file. * @param encoding The encoding to read the file in. * @return The read lines, one String per line. * @throws IOException if the file could not be read. */ public static String[] tail(File f, int n, String encoding) throws IOException { if (n == 0) { return new String[0]; } // Variables RandomAccessFile raf = new RandomAccessFile(f, "r"); int linesRead = 0; List<Byte> bytes = new ArrayList<>(); List<String> linesReversed = new ArrayList<>(); // Seek to end of file long length = raf.length() - 1; raf.seek(length); // Read backwards for(long seek = length; seek >= 0; --seek){ // Seek back raf.seek(seek); // Read the next character byte c = raf.readByte(); if(c == '\n'){ // If it's a newline, handle adding the line byte[] str = new byte[bytes.size()]; for (int i = 0; i < str.length; ++i) { str[i] = bytes.get(str.length - i - 1); } linesReversed.add(new String(str, encoding)); bytes = new ArrayList<>(); linesRead += 1; if (linesRead == n){ break; } } else { // Else, register the character for later bytes.add(c); } } // Add any remaining lines if (linesRead < n && bytes.size() > 0) { byte[] str = new byte[bytes.size()]; for (int i = 0; i < str.length; ++i) { str[i] = bytes.get(str.length - i - 1); } linesReversed.add(new String(str, encoding)); } // Create output String[] rtn = new String[linesReversed.size()]; for (int i = 0; i < rtn.length; ++i) { rtn[i] = linesReversed.get(rtn.length - i - 1); } raf.close(); return rtn; } /** @see edu.stanford.nlp.io.IOUtils#tail(java.io.File, int, String) */ public static String[] tail(File f, int n) throws IOException { return tail(f, n, "utf-8"); } /** Bare minimum sanity checks */ private static Set<String> blacklistedPathsToRemove = new HashSet<String>(){{ add("/"); add("/u"); add("/u/"); add("/u/nlp"); add("/u/nlp/"); add("/u/nlp/data"); add("/u/nlp/data/"); add("/scr"); add("/scr/"); add("/scr/nlp/data"); add("/scr/nlp/data/"); }}; /** * Delete this file; or, if it is a directory, delete this directory and all its contents. * This is a somewhat dangerous function to call from code, and so a few safety features have been * implemented (though you should not rely on these!): * * <ul> * <li>Certain directories are prohibited from being removed.</li> * <li>More than 100 files cannot be removed with this function.</li> * <li>More than 10GB cannot be removed with this function.</li> * </ul> * * @param file The file or directory to delete. */ public static void deleteRecursively(File file) { // Sanity checks if (blacklistedPathsToRemove.contains(file.getPath())) { throw new IllegalArgumentException("You're trying to delete " + file + "! I _really_ don't think you want to do that..."); } int count = 0; long size = 0; for (File f : iterFilesRecursive(file)) { count += 1; size += f.length(); } if (count > 100) { throw new IllegalArgumentException("Deleting more than 100 files; you should do this manually"); } if (size > 10000000000L) { // 10 GB throw new IllegalArgumentException("Deleting more than 10GB; you should do this manually"); } // Do delete if (file.isDirectory()) { File[] children = file.listFiles(); if (children != null) { for (File child : children) { deleteRecursively(child); } } } //noinspection ResultOfMethodCallIgnored file.delete(); } /** * Start a simple console. Read lines from stdin, and pass each line to the callback. * Returns on typing "exit" or "quit". * * @param callback The function to run for every line of input. * @throws IOException Thrown from the underlying input stream. */ public static void console(String prompt, Consumer<String> callback) throws IOException { BufferedReader reader = new BufferedReader(new InputStreamReader(System.in)); String line; System.out.print(prompt); while ( (line = reader.readLine()) != null) { switch (line.toLowerCase()) { case "": break; case "exit": case "quit": case "q": return; default: callback.accept(line); break; } System.out.print(prompt); } } /** * Create a prompt, and read a single line of response. * @param prompt An optional prompt to show the user. * @throws IOException Throw from the underlying reader. */ public static String promptUserInput(Optional<String> prompt) throws IOException { BufferedReader reader = new BufferedReader(new InputStreamReader(System.in)); System.out.print(prompt.orElse("> ")); return reader.readLine(); } /** @see IOUtils#console(String, Consumer) */ public static void console(Consumer<String> callback) throws IOException { console("> ", callback); } public static String throwableToStackTrace(Throwable t) { StringBuilder sb = new StringBuilder(); sb.append(t).append(eolChar); for (StackTraceElement e : t.getStackTrace()) { sb.append("\t at ").append(e).append(eolChar); } return sb.toString(); } }