//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.resources; import java.io.File; import java.io.IOException; import java.nio.file.Files; import java.util.LinkedList; import java.util.List; import org.apache.commons.lang.StringUtils; import org.apache.uima.util.FileUtils; import uk.gov.dstl.baleen.uima.BaleenResource; /** * A shared resource that can be used to read any file. This primarily exists as * a shared resource rather than a helper so that it can be passed into * IGazetteers, but could implement some shared logic such as Tika extraction in * the future. * * */ public class SharedFileResource extends BaleenResource { /** Read an entire file into a string, ignoring any leading or trailing whitespace. * * @param file to load * @return the content of the file as a string. * @throws IOException on error reading or accessing the file. */ public static String readFile(File file) throws IOException { String contents = FileUtils.file2String(file); contents = contents.replaceAll("\r\n", "\n"); return StringUtils.strip(contents); } /** * Read the file and return all the lines, with any leading or trailing 'empty' lines omitted. * Lines that consist solely of whitespace are assumed to be empty. * Lines are trimmed of any leading or trailing whitespace as they are read. * * Implemented as per BufferedReader. * * @param the file to load * @return non-null, but potentially empty, array of string (one line per string) * @throws IOException on error accessing or reading from the file. */ public static String[] readFileLines(File file) throws IOException { List<String> lines = new LinkedList<>(); Files.lines(file.toPath()).forEach(l -> lines.add(StringUtils.strip(l.replaceAll("\r\n","\n")))); while(StringUtils.strip(lines.get(0)).isEmpty()){ lines.remove(0); } while(StringUtils.strip(lines.get(lines.size() - 1)).isEmpty()){ lines.remove(lines.size() - 1); } return lines.toArray(new String[lines.size()]); } }