/********************************************************************************* * TotalCross Software Development Kit * * Copyright (C) 2000-2012 SuperWaba Ltda. * * All Rights Reserved * * * * This library and virtual machine is distributed in the hope that it will * * be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * * * This file is covered by the GNU LESSER GENERAL PUBLIC LICENSE VERSION 3.0 * * A copy of this license is located in file license.txt at the root of this * * SDK or can be downloaded here: * * http://www.gnu.org/licenses/lgpl-3.0.txt * * * *********************************************************************************/ package totalcross.io; import totalcross.util.*; /** * Used to read an array of tokens in a line ending with \r\n (enter/linefeed) from a stream. Consecutive newlines are skipped. * This class does not work well with multi-byte characters when the second byte contains the delimiter or enter/linefeed. * <br><br>The usual way to parse a CSV file is: * <pre> * do * line = readline * Convert.tokenizeString(line) * </pre> * Using this class takes less memory, because the line is read in tokens. For example, suppose a line contains 200 chars, * and splitting them contains 10 tokens of 20 chars each. Using the first approach (readline/tokenizeString), the readline will * create a string with 200 chars, then that String will be tokenized into 10 smaller strings of 20 chars each.<br><br> * Using this class, it will read the 10 tokens of 20 chars each directly, no longer having to create the temporary string of 200 chars. * <br><br> * The delimiter can be any character except for \r and \n. Note that two consecutive delimiters are considered a single token. So * <code>;a;;</code> is returned as <code>{"","a","",""}</code>. * <br><br> * Here's a sample that parses the input from a file: * * <pre> * TokenReader reader = new TokenReader(new File("text.csv",File.READ_WRITE), ','); * String[] tokens; * while ((tokens = reader.readTokens()) != null) * { * ... do whatever you want with the tokens. * } * </pre> * And here's another sample that parses from a string: * <pre> * String lines = "a;;;a;\na;;;a; \nb;;b;b;b \nb;;b;b;b;\nb\nb;\n;b\n;\n b ;\n b \n b \n b \nb \n b"; * String ll[] = Convert.tokenizeString(lines,'\n'); * TokenReader tk = new TokenReader(new ByteArrayStream(lines.getBytes()),';'); * tk.doTrim = true; * String []line; * for (int j =0; ((line = tk.readTokens()) != null); j++) * { * Vm.debug('"'+ll[j]+'"'); * for (int i =0; i < line.length; i++) * Vm.debug(i+": '"+line[i]+"'"); * Vm.debug(""); * } * </pre> * The output is: * <pre> * "a;;;a;" * 0: 'a' * 1: '' * 2: '' * 3: 'a' * 4: '' * * "a;;;a; " * 0: 'a' * 1: '' * 2: '' * 3: 'a' * 4: '' * * "b;;b;b;b " * 0: 'b' * 1: '' * 2: 'b' * 3: 'b' * 4: 'b' * * "b;;b;b;b;" * 0: 'b' * 1: '' * 2: 'b' * 3: 'b' * 4: 'b' * 5: '' * * "b" * 0: 'b' * * "b;" * 0: 'b' * 1: '' * * ";b" * 0: '' * 1: 'b' * * ";" * 0: '' * 1: '' * * " b ;" * 0: 'b' * 1: '' * * " b " * 0: 'b' * * " b " * 0: 'b' * * " b " * 0: 'b' * * "b " * 0: 'b' * * " b" * 0: 'b' * </pre> * Note that this class already uses a buffer for faster detection of the newline and delimiters. * Don't use TokenReader with a BufferedStream, it's nonsense and it will throw a warning on the desktop. * * @author Guilherme Campos Hazan (guich) * @since TotalCross 1.23 */ public class TokenReader extends LineReader { protected char delimiter; private Vector lines = new Vector(10); /** * Constructs a new TokenReader and sets maxTries accordingly to the type of * class: 10 if its a Socket or a PortConnector; 0, otherwise. * * @throws totalcross.io.IOException */ public TokenReader(Stream f, char delimiter) throws totalcross.io.IOException { super(f); this.delimiter = delimiter; } /** * Constructs a new TokenReader and sets maxTries accordingly to the type of * class: 10 if its a Socket or a PortConnector, 0 otherwise. * The given buffer contents are added to the internal buffer to start reading from them. * * @throws totalcross.io.IOException * @since TotalCross 1.25 */ public TokenReader(Stream f, char delimiter, byte[] buffer, int start, int len) throws totalcross.io.IOException // guich@tc125_16 { super(f, buffer, start, len); this.delimiter = delimiter; } /** Cannot be used; throws a RuntimeException. * @see #readTokens */ public String readLine() { throw new RuntimeException("Use readTokens instead of readLine!"); } /** * Returns the next tokens available in this stream or null if none. Empty * lines are skipped. * * @throws totalcross.io.IOException */ public String[] readTokens() throws totalcross.io.IOException { byte[] buf = readBuf.getBuffer(); int size = readBuf.getPos(); byte delimiter = (byte)this.delimiter; // skip starting control chars while (ofs < size && (buf[ofs] == '\n' || buf[ofs] == '\r')) ofs++; lines.removeAllElements(); while (true) { int i; for (i = ofs; i < size; i++) { if (buf[i] == delimiter || buf[i] == '\n') // found a token or a linefeed? { int len = i - ofs; if (i > 0 && buf[i-1] == '\r') // guich@tc123_47: is the previous character a \r? len--; int ii = ofs+len; if (doTrim && len > 0 && (buf[ofs] <= ' ' || buf[ii-1] <= ' ')) // guich@tc123_37 { while (ofs < ii && buf[ofs] <= ' ') ofs++; while (ii > ofs && buf[ii-1] <= ' ') ii--; len = ii - ofs; } // allocate the new String and return String s = new String(buf, ofs, len); ofs = i; lines.addElement(s); if (buf[i] == '\n') return (String[])lines.toObjectArray(); ofs++; // strip the cr/lf from the string } } // no enter found; fetch more data int lastOfs = ofs; reuse(); boolean foundMore = readMore(); size = readBuf.getPos(); // size had changed buf = readBuf.getBuffer(); // buffer may have changed if (!foundMore) { int len = i - lastOfs; if (len == 0 && lines.size() == 0) return null; ofs = len; lastOfs = 0; if (doTrim && len > 0 && (buf[0] <= ' ' || buf[len-1] <= ' ')) // guich@tc123_37 { while (lastOfs < len && buf[lastOfs] <= ' ') lastOfs++; while (len > lastOfs && buf[len-1] <= ' ') len--; } String s = new String(buf, lastOfs, len-lastOfs); lines.addElement(s); return (String[])lines.toObjectArray(); } } } }