package me.osm.gazetter.utils;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
/**
* File reading and writing utilities
* */
public class FileUtils {
/**
* Filter for readLines routine.
*
* Allows to filter lines during the iteration over
* a list of lines from file.
*
* @see me.osm.gazetter.utils.FileUtils.readLines(File, LineFilter)
* */
public static interface LineFilter {
/**
* If line isSuitable - it will be passed to LineHandler
*
* @param s - readed line
* */
public boolean isSuitable(String s);
}
/**
* Handles lines in file during the iteration over it.
*
* @see me.osm.gazetter.utils.FileUtils.handleLines(InputStream, LineHandler)
* */
public static interface LineHandler {
/**
* @param s - readed line
* */
public void handle(String s);
}
/**
* Read file line by line into list of strings
*
* Lines will be decoded as UTF-8 Strings.
* If file ends with .gz or .bz2 - it will be red with decompression
*
* @param f - File to be red.
* @returns List of Strings
* */
public static List<String> readLines(File f) throws IOException {
return readLines(f, null);
}
/**
* Read InputStream line by line, and pass lines without storing
* to the LineHandler.
*
* @param f - input stream to read from
* @param handler - lines handler callback
* */
public static void handleLines(InputStream f, LineHandler handler) {
BufferedReader bufferedReader = null;
try {
bufferedReader = new BufferedReader(new InputStreamReader(f, "UTF8"));
String line = bufferedReader.readLine();
do {
if(line != null) {
handler.handle(line);
}
line = bufferedReader.readLine();
} while (line != null);
} catch (IOException e) {
throw new RuntimeException(e);
} finally {
if (bufferedReader != null) {
try {
bufferedReader.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
}
/**
* Read file line by line, and pass lines without storing
* to the LineHandler.
* <p>
* If file ends with .gz or .bz2 - it will be readed with decompression
*
* @param f - file to read
* @param handler - callback interface
* */
public static void handleLines(File f, LineHandler handler) throws IOException {
try {
handleLines(getFileIS(f), handler);
} catch (FileNotFoundException e) {
throw new RuntimeException("Failed to read file " + f.getName(), e);
}
}
/**
* Read all lines in file into the List, with lines filtration.
*
* If file ends with .gz or .bz2 - it will be readed with decompression
*
* @param f - file to read
* @param filter - lines filter (will be ignored if it is null)
* */
public static List<String> readLines(File f, final LineFilter filter) throws IOException {
final List<String> result = new ArrayList<>();
handleLines(f, new LineHandler() {
@Override
public void handle(String s) {
if (filter == null || filter.isSuitable(s)) {
result.add(s);
}
}
});
return result;
}
/**
* Returns InputStream for file.
* <p>
* If file name ends with .gz or bz2 stream will be wrapped into
* GZIPInputStream or BZip2CompressorInputStream accordingly.
*
* @param osmFilePath - file to read
* */
public static InputStream getFileIS(File osmFilePath) throws IOException,
FileNotFoundException {
if (osmFilePath.getName().endsWith(".gz")) {
return new GZIPInputStream(new FileInputStream(osmFilePath));
}
if (osmFilePath.getName().endsWith(".bz2")) {
return new BZip2CompressorInputStream(new FileInputStream(
osmFilePath));
}
return new FileInputStream(osmFilePath);
}
/**
* Return print writer for file with UTF8 encoding.
* <p>
* If filename ends with .gz - file will be compressed
*
* @param file - file to write into
* @param append - append or overwrite file content
* */
public static PrintWriter getPrintWriter(File file, boolean append) throws IOException {
if(file.getName().endsWith(".gz") && file.exists() && append) {
throw new IllegalArgumentException("Can't append to gzipped file");
}
OutputStream os = new FileOutputStream(file, append);
if(file.getName().endsWith(".gz")) {
os = new GZIPOutputStream(os);
}
return new PrintWriter(new OutputStreamWriter(os, "UTF8"));
}
/**
* Try to find exists file with or without .gz name suffix.
* <p>
* If none of them doesn't exists, returns original file.
*
* @param file to try
* */
public static File withGz(File file) {
if(file.exists()) {
return file;
}
File newF = null;
if(file.getName().endsWith(".gz")) {
newF = new File(file.getPath().replace(".gz", ""));
}
else {
newF = new File(file.getPath() + ".gz");
}
if(newF.exists()) {
return newF;
}
return file;
}
/**
* Write lines into file.
* If file name ends with .gz - file will be compressed
*
* @param stripeF - file to write to
* @param lines - lines to be written
* */
public static void writeLines(File stripeF, List<String> lines) throws IOException {
writeLines(stripeF, lines, false);
}
/**
* Write lines into file.
* If file name ends with .gz - file will be compressed
*
* @param stripeF - file to write to
* @param lines - lines to be written
* @param append - append or overwrite exists file
* */
public static void writeLines(File stripeF, List<String> lines, boolean append) throws IOException {
PrintWriter printwriter = null;
try {
printwriter = getPrintWriter(stripeF, append);
for(String line : lines) {
printwriter.println(line);
}
}
finally {
if(printwriter != null) {
printwriter.flush();
printwriter.close();
}
}
}
/**
* Creates PrintWriter for file.
*
* Wrap file with GZipOutput stream if file name ends with .gz
*
* In case of append is true, and file is *.gz
* rewrites data into file to append via GZipOutputStream correctly.
*
* @param file file to write to. New file will be created if provided doesn't exists.
* @param append append to exists file or override it if append is false
*
* */
public static PrintWriter getPrintWriterWithGZAppendTrick(File file, boolean append) throws IOException {
/*
* There are JZlib library, which theoretically allows to append into
* exists gzip file. But I haven't give it a try yet.
*
* So code down below isn't an optimal solution,
* especially for large files.
*/
if(!file.getName().endsWith(".gz") || !append || !file.exists()) {
return getPrintWriter(file, append);
}
//rename old
File tmp = new File(file.getAbsolutePath() + ".t.gz");
file.renameTo(tmp);
//create new
file.createNewFile();
//rewrite ( Damn, Java why can't you just append to exist gzip archive)
final PrintWriter writer = getPrintWriter(file, false);
handleLines(tmp, new LineHandler() {
@Override
public void handle(String s) {
writer.println(s);
}
});
//delete temp file
tmp.delete();
return writer;
}
}