/* * ImportHelper.java * * (c) 2002-2005 BEAST Development Core Team * * This package may be distributed under the * Lesser Gnu Public Licence (LGPL) */ package es.uvigo.darwin.prottest.util.fileio; import es.uvigo.darwin.prottest.util.exception.ImportException; import java.io.EOFException; import java.io.IOException; import java.io.LineNumberReader; import java.io.Reader; import java.io.Writer; /** * A helper class for phylogenetic file format importers * * @author Andrew Rambaut * @author Alexei Drummond * * @version $Id: ImportHelper.java 699 2007-04-28 07:07:49Z matthew_cheung $ */ public class ImportHelper { private long totalCharactersRead = 0; // Expected length of input in bytes, or 0 if unknown private long expectedInputLength = 0; /** * ATTENTION: The ImportHelper never closes the reader passed to the constructor. * If the reader holds resources (e.g. a FileReader, which holds an open file), * then it is the client class' responsibility to close the reader when it has * finished using it. * @param reader */ public ImportHelper(Reader reader) { this.reader = new LineNumberReader(reader); this.commentWriter = null; } public void setExpectedInputLength(long l) { this.expectedInputLength = l; } public ImportHelper(Reader reader, Writer commentWriter) { this.reader = new LineNumberReader(reader); this.commentWriter = commentWriter; } /** * @return If the length of the input is known (because a file was * passed to the constructor), this reports a value between 0.0 and 1.0 * indicating the relative read position in the file. Otherwise, this * always returns 0.0. * * This method assumes that all characters in the input are one byte * long (to get its estimate, it divides the number of *characters* read * by the number of *bytes* in the file). If there is an efficient way * to fix this, we should do so :) */ public double getProgress() { if (expectedInputLength == 0) { return 0.0; } else { return (double) totalCharactersRead / expectedInputLength; } } public void closeReader() throws IOException { reader.close(); } public void setCommentDelimiters(char line) { hasComments = true; this.lineComment = line; } public void setCommentDelimiters(char start, char stop) { hasComments = true; this.startComment = start; this.stopComment = stop; } public void setCommentDelimiters(char start, char stop, char line) { hasComments = true; this.startComment = start; this.stopComment = stop; this.lineComment = line; } public void setCommentDelimiters(char start, char stop, char line, char write, char meta) { hasComments = true; this.startComment = start; this.stopComment = stop; this.lineComment = line; this.writeComment = write; this.metaComment = meta; } public void setCommentWriter(Writer commentWriter) { this.commentWriter = commentWriter; } public int getLineNumber() { return reader.getLineNumber(); } public int getLastDelimiter() { return lastDelimiter; } public char nextCharacter() throws IOException { if (lastChar == '\0') { lastChar = readCharacter(); } return (char)lastChar; } public char readCharacter() throws IOException { skipSpace(); char ch = read(); while (hasComments && (ch == startComment || ch == lineComment)) { skipComments(ch); skipSpace(); ch = read(); } return ch; } public void unreadCharacter(char ch) { lastChar = ch; } public char next() throws IOException { if (lastChar == '\0') { lastChar = read(); } return (char)lastChar; } /** * All read attempts pass through this function. * * @return the next char * @throws IOException */ public char read() throws IOException { int ch; if (lastChar == '\0') { // this is the only point where anything is read from the reader ch = reader.read(); if (ch != -1) { totalCharactersRead++; } else { throw new EOFException(); } } else { ch = lastChar; lastChar = '\0'; } return (char)ch; } /** * Reads a line, skipping over any comments. */ public String readLine() throws IOException { StringBuffer line = new StringBuffer(); char ch = read(); try { while (ch != '\n' && ch != '\r') { if (hasComments) { if (ch == lineComment) { skipComments(ch); break; } if (ch == startComment) { skipComments(ch); ch = read(); } } line.append(ch); ch = read(); } // accommodate DOS line endings.. if (ch == '\r') { if (next() == '\n') read(); } lastDelimiter = ch; } catch (EOFException e) { // We catch an EOF and return the line we have so far // encounteredEndOfFile(); } return line.toString(); } /** * Attempts to read and parse an integer delimited by whitespace. */ public int readInteger() throws IOException, ImportException { String token = readToken(); try { return Integer.parseInt(token); } catch (NumberFormatException nfe) { throw new ImportException("Number format error: " + nfe.getMessage()); } } /** * Attempts to read and parse an integer delimited by whitespace or by * any character in delimiters. */ public int readInteger(String delimiters) throws IOException, ImportException { String token = readToken(delimiters); try { return Integer.parseInt(token); } catch (NumberFormatException nfe) { throw new ImportException("Number format error: " + nfe.getMessage()); } } /** * Attempts to read and parse a double delimited by whitespace. */ public double readDouble() throws IOException, ImportException { String token = readToken(); try { return Double.parseDouble(token); } catch (NumberFormatException nfe) { throw new ImportException("Number format error: " + nfe.getMessage()); } } /** * Attempts to read and parse a double delimited by whitespace or by * any character in delimiters. */ public double readDouble(String delimiters) throws IOException, ImportException { String token = readToken(delimiters); try { return Double.parseDouble(token); } catch (NumberFormatException nfe) { throw new ImportException("Number format error: " + nfe.getMessage()); } } /** * Reads a token stopping when any whitespace or a comment is found. * If the token begins with a quote char then all characters will be * included in token until a matching quote is found (including whitespace or comments). */ public String readToken() throws IOException { return readToken(""); } /** * Reads a token stopping when any whitespace, a comment or when any character * in delimiters is found. If the token begins with a quote char * then all characters will be included in token until a matching * quote is found (including whitespace or comments). */ public String readToken(String delimiters) throws IOException { int space = 0; char ch, ch2, quoteChar = '\0'; boolean done = false, first = true, quoted = false, isSpace; nextCharacter(); StringBuffer token = new StringBuffer(); while (!done) { ch = read(); try { isSpace = Character.isWhitespace(ch); if (quoted && ch == quoteChar) { // Found the closing quote ch2 = read(); if (ch == ch2) { // A repeated quote character so add this to the token token.append(ch); } else { // otherwise it terminates the token lastDelimiter = ' '; if (hasComments && (ch2 == startComment || ch2 == lineComment)) { skipComments(ch2, startComment!= '\"' && startComment != '\''); } else { unreadCharacter(ch2); } done = true; quoted = false; } } else if (first && (ch == '\'' || ch == '"')) { // if the opening character is a quote // read everything up to the closing quote quoted = true; quoteChar = ch; first = false; space = 0; } else if (!quoted && (ch == startComment || ch == lineComment) ) { // comment markers don't count if we are quoted skipComments(ch, startComment!= '\"' && startComment != '\''); lastDelimiter = ' '; done = true; } else { if (quoted) { token.append(ch); } else if (isSpace) { lastDelimiter = ' '; done = true; } else if (delimiters.indexOf(ch) != -1) { done = true; lastDelimiter = ch; } else { token.append(ch); first = false; } } } catch (EOFException e) { // We catch an EOF and return the token we have so far done = true; } } if (Character.isWhitespace((char)lastDelimiter)) { ch = nextCharacter(); while (Character.isWhitespace(ch)) { read(); ch = nextCharacter(); } if (delimiters.indexOf(ch) != -1) { lastDelimiter = readCharacter(); } } return token.toString(); } /** * Skips over any comments. The opening comment delimiter is passed. * @param delimiter * @throws java.io.IOException */ protected void skipComments(char delimiter) throws IOException { skipComments(delimiter, false); } /** * Skips over any comments. The opening comment delimiter is passed. * @param delimiter * @param gobbleStrings * @throws java.io.IOException */ protected void skipComments(char delimiter, boolean gobbleStrings) throws IOException { char ch; int n=1; boolean write = true; StringBuffer meta = null; if (lastComment == null) { lastComment = new StringBuffer(); } if (nextCharacter() == writeComment) { read(); // if (commentWriter != null) { // commentWriter.write(writeComment); // } } else if (nextCharacter() == metaComment) { read(); meta = new StringBuffer(); write = false; } lastMetaComment = null; if (delimiter == lineComment) { String line = readLine(); if (write && commentWriter != null) { commentWriter.write(line, 0, line.length()); commentWriter.write('\n');//.newLine(); } else if (meta != null) { meta.append(line); } } else { Character inString = null; do { ch = read(); if( ch == '\"' || ch == '\'' ) { if( gobbleStrings ) { if( inString == null ) { inString = ch; } else if( inString == ch ) { inString = null; } } } if( inString == null ) { if (ch == startComment) { lastComment = new StringBuffer(); n++; continue; } else if (ch == stopComment) { if (write && commentWriter != null) { if (lastComment.toString().contains("ID")) { commentWriter.write(lastComment.toString()); } lastComment = new StringBuffer(); } // if (write && commentWriter != null) { // commentWriter.write('\n');//.newLine(); // } n--; continue; } } if (write && commentWriter != null) { lastComment.append(ch); // commentWriter.write(ch); } else if (meta != null) { meta.append(ch); } } while (n > 0); } if (meta != null) { lastMetaComment = meta.toString(); } } /** * Skips to the end of the line. If a comment is found then this is read. */ public void skipToEndOfLine() throws IOException { char ch; do { ch = read(); if (hasComments) { if (ch == lineComment) { skipComments(ch); break; } if (ch == startComment) { skipComments(ch); ch = read(); } } } while (ch != '\n' && ch != '\r'); if (ch == '\r') { if (nextCharacter() == '\n') read(); } } /** * Skips char any contiguous characters in skip. Will also skip * comments. */ public void skipWhile(String skip) throws IOException { char ch; do { ch = read(); } while ( skip.indexOf(ch) > -1 ); unreadCharacter(ch); } /** * Skips over any space (plus tabs and returns) in the file. Will also skip * comments. */ public void skipSpace() throws IOException { skipWhile(" \t\r\n"); } /** * Skips over any contiguous characters in skip. Will also skip * comments and space. */ public void skipCharacters(String skip) throws IOException { skipWhile(skip + " \t\r\n"); } /** * Skips over the file until a character from delimiters is found. Returns * the delimiter found. Will skip comments and will ignore delimiters within * comments. */ public char skipUntil(String skip) throws IOException { char ch; do { ch = readCharacter(); } while ( skip.indexOf(ch) == -1 ); return ch; } public String getLastMetaComment() { return lastMetaComment; } public void clearLastMetaComment() { lastMetaComment = null; } static String safeName(String name) { if( ! name.matches("[a-zA-Z0-9_.]+") ) { name = "\"" + name + "\""; } return name; } /** * Convert control (unprintable) characters to something printable * @param token * @return token printable version */ static String convertControlsChars(String token) { if( ! token.matches("[^\\p{Cntrl}]+") ) { StringBuilder b = new StringBuilder(); for( char c : token.toCharArray() ) { if( c < 0x20 || c >= 0xfe ) { b.append("#").append(Integer.toHexString(c)); } else { b.append(c); } } return b.toString(); } return token; } // Private stuff private LineNumberReader reader; private Writer commentWriter = null; private int lastChar = '\0'; private int lastDelimiter = '\0'; private boolean hasComments = false; private char startComment = (char)-1; private char stopComment = (char)-1; private char lineComment = (char)-1; private char writeComment = (char)-1; private char metaComment = (char)-1; private StringBuffer lastComment; private String lastMetaComment = null; }