package edu.stanford.nlp.io;
import edu.stanford.nlp.util.StreamGobbler;
import edu.stanford.nlp.util.StringUtils;
import java.lang.reflect.InvocationTargetException;
import java.util.*;
import java.io.*;
import java.net.InetAddress;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
public class FileUtils {
public static final String eolChar = System.getProperty("line.separator");
private static final String defaultEnc = "utf-8";
private FileUtils() {} // just static methods
/**
* Get a input file stream (automatically gunzip/bunzip2 depending on file extension)
* @param filename Name of file to open
* @return Input stream that can be used to read from the file
* @throws IOException if there are exceptions opening the file
*/
public static InputStream getFileInputStream(String filename) throws IOException {
InputStream in = new FileInputStream(filename);
if (filename.endsWith(".gz")) {
in = new GZIPInputStream(in);
} else if (filename.endsWith(".bz2")) {
//in = new CBZip2InputStream(in);
in = getBZip2PipedInputStream(filename);
}
return in;
}
/**
* Get a output file stream (automatically gzip/bzip2 depending on file extension)
* @param filename Name of file to open
* @return Output stream that can be used to write to the file
* @throws IOException if there are exceptions opening the file
*/
public static OutputStream getFileOutputStream(String filename) throws IOException {
OutputStream out = new FileOutputStream(filename);
if (filename.endsWith(".gz")) {
out = new GZIPOutputStream(out);
} else if (filename.endsWith(".bz2")) {
//out = new CBZip2OutputStream(out);
out = getBZip2PipedOutputStream(filename);
}
return out;
}
public static BufferedReader getBufferedFileReader(String filename) throws IOException {
return getBufferedFileReader(filename, defaultEnc);
}
public static BufferedReader getBufferedFileReader(String filename, String encoding) throws IOException {
InputStream in = getFileInputStream(filename);
return new BufferedReader(new InputStreamReader(in, encoding));
}
public static PrintWriter getPrintWriter(String filename) throws IOException {
return getPrintWriter(filename, defaultEnc);
}
public static PrintWriter getPrintWriter(String filename, String encoding) throws IOException {
OutputStream out = getFileOutputStream(filename);
return new PrintWriter(new BufferedWriter(new OutputStreamWriter(out, encoding)));
}
public static InputStream getBZip2PipedInputStream(String filename) throws IOException
{
String bzcat = System.getProperty("bzcat", "bzcat");
Runtime rt = Runtime.getRuntime();
String cmd = bzcat + " " + filename;
//System.err.println("getBZip2PipedInputStream: Running command: "+cmd);
Process p = rt.exec(cmd);
Writer errWriter = new BufferedWriter(new OutputStreamWriter(System.err));
StreamGobbler errGobler = new StreamGobbler(p.getErrorStream(), errWriter);
errGobler.start();
return p.getInputStream();
}
public static OutputStream getBZip2PipedOutputStream(String filename) throws IOException
{
return new BZip2PipedOutputStream(filename);
}
private static final Pattern tab = Pattern.compile("\t");
/**
* Read column as set
* @param infile - filename
* @param field index of field to read
* @return a set of the entries in column field
* @throws IOException
*/
public static Set<String> readColumnSet(String infile, int field) throws IOException
{
BufferedReader br = FileUtils.getBufferedFileReader(infile);
String line;
Set<String> set = new HashSet<String>();
while ((line = br.readLine()) != null) {
line = line.trim();
if (line.length() > 0) {
if (field < 0) {
set.add(line);
} else {
String[] fields = tab.split(line);
if (field < fields.length) {
set.add(fields[field]);
}
}
}
}
br.close();
return set;
}
public static <C> List<C> readObjectFromColumns(Class objClass, String filename, String[] fieldNames, String delimiter)
throws IOException, InstantiationException, IllegalAccessException,
NoSuchFieldException, NoSuchMethodException, InvocationTargetException
{
Pattern delimiterPattern = Pattern.compile(delimiter);
List<C> list = new ArrayList<C>();
BufferedReader br = FileUtils.getBufferedFileReader(filename);
String line;
while ((line = br.readLine()) != null) {
line = line.trim();
if (line.length() > 0) {
C item = StringUtils.<C>columnStringToObject(objClass, line, delimiterPattern, fieldNames);
list.add(item);
}
}
br.close();
return list;
}
public static Map<String,String> readMap(String filename) throws IOException
{
Map<String,String> map = new HashMap<String,String>();
try {
BufferedReader br = FileUtils.getBufferedFileReader(filename);
String line;
while ((line = br.readLine()) != null) {
String[] fields = tab.split(line,2);
map.put(fields[0], fields[1]);
}
} catch (IOException ex) {
throw new RuntimeException(ex);
}
return map;
}
/**
* Returns the contents of a file as a single string. The string may be
* empty, if the file is empty. If there is an IOException, it is caught
* and null is returned.
*/
public static String stringFromFile(String filename) {
return stringFromFile(filename,defaultEnc);
}
/**
* Returns the contents of a file as a single string. The string may be
* empty, if the file is empty. If there is an IOException, it is caught
* and null is returned. Encoding can also be specified.
*/
public static String stringFromFile(String filename, String encoding) {
try {
StringBuilder sb = new StringBuilder();
BufferedReader in = new BufferedReader(new EncodingFileReader(filename,encoding));
String line;
while ((line = in.readLine()) != null) {
sb.append(line);
sb.append(eolChar);
}
in.close();
return sb.toString();
}
catch (IOException e) {
e.printStackTrace();
return null;
}
}
/**
* Returns the contents of a file as a list of strings. The list may be
* empty, if the file is empty. If there is an IOException, it is caught
* and null is returned.
*/
public static List<String> linesFromFile(String filename) {
return linesFromFile(filename,defaultEnc);
}
/**
* Returns the contents of a file as a list of strings. The list may be
* empty, if the file is empty. If there is an IOException, it is caught
* and null is returned. Encoding can also be specified
*/
public static List<String> linesFromFile(String filename,String encoding) {
try {
List<String> lines = new ArrayList<String>();
BufferedReader in = new BufferedReader(new EncodingFileReader(filename,encoding));
String line;
while ((line = in.readLine()) != null) {
lines.add(line);
}
in.close();
return lines;
}
catch (IOException e) {
e.printStackTrace();
return null;
}
}
public static String backupName(String filename) {
return backupFile(new File(filename)).toString();
}
public static File backupFile(File file) {
int max = 1000;
String filename = file.toString();
File backup = new File(filename + "~");
if (!backup.exists()) { return backup; }
for (int i = 1; i <= max; i++) {
backup = new File(filename + ".~" + i + ".~");
if (!backup.exists()) { return backup; }
}
return null;
}
public static boolean renameToBackupName(File file) {
return file.renameTo(backupFile(file));
}
/**
* A JavaNLP specific convenience routine for obtaining the current
* scratch directory for the machine you're currently running on.
*/
public static File getJNLPLocalScratch() {
try {
String machineName = InetAddress.getLocalHost().getHostName().split("\\.")[0];
String username = System.getProperty("user.name");
return new File("/"+machineName+"/scr1/"+username);
} catch (Exception e) {
return new File("./scr/"); // default scratch
}
}
/**
* Given a filepath, makes sure a directory exists there. If not, creates and returns it.
* Same as ENSURE-DIRECTORY in CL.
* @throws Exception
*/
public static File ensureDir(File tgtDir) throws Exception {
if (tgtDir.exists()) {
if (tgtDir.isDirectory()) return tgtDir;
else
throw new Exception("Could not create directory "+tgtDir.getAbsolutePath()+", as a file already exists at that path.");
} else {
tgtDir.mkdirs();
return tgtDir;
}
}
public static void main(String[] args) {
System.out.println(backupName(args[0]));
}
public static String getExtension(String fileName) {
if(!fileName.contains("."))
return null;
int idx = fileName.lastIndexOf(".");
return fileName.substring(idx+1);
}
}