/* * JEF - Copyright 2009-2010 Jiyi (mr.jiyi@gmail.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package jef.tools.string; import java.io.IOException; import java.io.Reader; import jef.tools.ArrayUtils; /** * 在Java类StringReader基础上进行了扩展,增加了若干方法,比较适合文本语义的解析操作。 * @author Administrator */ public class JefStringReader extends Reader { private String str; private int length; private int next = 0; private int mark = 0; private char[] ignoreChars; /** * Creates a new string reader. * * @param s * String providing the character stream. */ public JefStringReader(String s) { this.str = s; this.length = s.length(); } /** Check to make sure that the stream has not been closed */ private void ensureOpen() throws IOException { if (str == null) throw new IOException("Stream closed"); } /** * Reads a single character. * * @return The character read, or -1 if the end of the stream has been * reached * * @exception IOException * If an I/O error occurs */ public int read() throws IOException { synchronized (lock) { ensureOpen(); if (next >= length) return -1; return str.charAt(next++); } } @Override public String toString() { return str.substring(next); } /** * Reads characters into a portion of an array. * * @param cbuf * Destination buffer * @param off * Offset at which to start writing characters * @param len * Maximum number of characters to read * * @return The number of characters read, or -1 if the end of the stream has * been reached * * @exception IOException * If an I/O error occurs */ public int read(char cbuf[], int off, int len) throws IOException { synchronized (lock) { ensureOpen(); if ((off < 0) || (off > cbuf.length) || (len < 0) || ((off + len) > cbuf.length) || ((off + len) < 0)) { throw new IndexOutOfBoundsException(); } else if (len == 0) { return 0; } if (next >= length) return -1; int n = Math.min(length - next, len); str.getChars(next, next + n, cbuf, off); next += n; return n; } } /** * Skips the specified number of characters in the stream. Returns the * number of characters that were skipped. * * <p> * The <code>ns</code> parameter may be negative, even though the * <code>skip</code> method of the {@link Reader} superclass throws an * exception in this case. Negative values of <code>ns</code> cause the * stream to skip backwards. Negative return values indicate a skip * backwards. It is not possible to skip backwards past the beginning of the * string. * * <p> * If the entire string has been read or skipped, then this method has no * effect and always returns 0. * * @exception IOException * If an I/O error occurs */ public long skip(long ns) throws IOException { synchronized (lock) { ensureOpen(); if (next >= length) return 0; // Bound skip by beginning and end of the source long n = Math.min(length - next, ns); n = Math.max(-next, n); next += n; return n; } } /** * Tells whether this stream is ready to be read. * * @return True if the next read() is guaranteed not to block for input * * @exception IOException * If the stream is closed */ public boolean ready() throws IOException { synchronized (lock) { ensureOpen(); return true; } } /** * Tells whether this stream supports the mark() operation, which it does. */ public boolean markSupported() { return true; } /** * Marks the present position in the stream. Subsequent calls to reset() * will reposition the stream to this point. * * @param readAheadLimit * Limit on the number of characters that may be read while still * preserving the mark. Because the stream's input comes from a * string, there is no actual limit, so this argument must not be * negative, but is otherwise ignored. * * @exception IllegalArgumentException * If readAheadLimit is < 0 * @exception IOException * If an I/O error occurs */ public void mark(int readAheadLimit) throws IOException { if (readAheadLimit < 0) { throw new IllegalArgumentException("Read-ahead limit < 0"); } synchronized (lock) { ensureOpen(); mark = next; } } /** * Resets the stream to the most recent mark, or to the beginning of the * string if it has never been marked. * * @exception IOException * If an I/O error occurs */ public void reset() throws IOException { synchronized (lock) { ensureOpen(); next = mark; } } /** * Closes the stream and releases any system resources associated with it. * Once the stream has been closed, further read(), ready(), mark(), or * reset() invocations will throw an IOException. Closing a previously * closed stream has no effect. */ public void close() { str = null; } /** * 得到接下来的一个字符,如果已经到末尾则返回-1 * 整体游标不向前滚动 * @return 下一个字符 */ public int nextChar() { if (next >= length) return -1; return str.charAt(next); } /** * 得到接下来的一个字符,如果已经到末尾则返回-1 * 整体游标向前滚动 * @return 下一个字符 */ public int readChar() { if (next >= length) return -1; return str.charAt(next++); } /** * * @return true if the reader is end; */ public boolean eof(){ return (next >= length); } /** * 得到接下来的若干字符,如果剩余长度不足返回null; * * @param n * @return 读到的文字 */ public String nextString(int n) { if (next+n > length) return null; return str.substring(next, next+n); } /** * 读取指定数量的字符形成String,如果长度不足返回null * @return 读到的字符 */ public String readString(int n){ if (next+n > length) return null; String result=str.substring(next, next+n); next+=n; return result; } /** * 跳过指定个数的字符 * @param n */ public int omit(int n){ next+=n; if (next > length){ n=n-(next-length); next=length; } return n; } /** * @deprecated 请使用{@link #readUntilCharIs(char[])} * @param keyChars * @return */ public char[] readUntil(char[] keyChars) { return readUntilCharIs(keyChars); } /** * 读取Char直到出现指定的匹配(或者结束)为止。指向匹配的char * @return 读到的字符 * @throws IOException */ public char[] readUntilCharIs(char... keyChars){ int n=0; char result[]=new char[length-next]; for(int nc=nextChar();nc!=-1 && !ArrayUtils.contains(keyChars,(char)nc);nc=nextChar()){ result[n]=(char) readChar(); n++; } return ArrayUtils.subarray(result, 0, n); } /** * 忽略字符,直到出现指定的字符位置,指向匹配的char * @return */ public int omitUntillChar(char... keyChars){ int n=next; for(int nc=nextChar();nc!=-1 && !ArrayUtils.contains(keyChars,(char)nc);nc=nextChar()){ next++; } return next-n; } /** * @deprecated 使用{@link #readChars(char...)} * @param keyChars * @return */ public char[] readWhileCharIs(char[] keyChars){ return readChars(keyChars); } /** * 读取指定范围内char序列。结束后指向第一个不在指定范围内的char * @param keyChars * @return 读到的字符 */ public char[] readChars(char... keyChars){ int n=0; char result[]=new char[length-next]; for(int nc=nextChar();nc!=-1 && ArrayUtils.contains(keyChars,(char)nc);nc=nextChar()){ result[n]=(char) readChar(); n++; } return ArrayUtils.subarray(result, 0, n); } /** * 跳过这些字符 * @param keyChars * @return * @throws IOException */ public int omitChars(char... keyChars){ int offset=next; if(keyChars.length==0){ keyChars=this.ignoreChars; } for(int nc=nextChar();nc!=-1 && ArrayUtils.contains(keyChars,(char)nc);nc=nextChar()){ next++; } return next-offset; } /** * 检测后续字符是否符合指定的字符串 * @param key 要查找的关键字 * @param ignoreChars 如果遇到ignoreChars中的字符则会自动略过(即便在关键字匹配开始后,出现此类字符也会忽略,比较典型的使用场景是: * <pre> * a = 'hello world!' * 上例中,如果以a=作为key,那么中间的空格会影响搜索结果,此时设置空格为ignorChar,那么就可以排除这种影响。 * </pre> * 如果通过{@link #setIgnoreChars(char...)}设置的忽略表,不会忽略关键字当中的特殊字符,因此两者有细微的差别,使用者需要掌握使用这个差别来达到解析的目的。 * 备注:ignoreChars 一般可以是空格, \r\n\t等字符,目的是让被这些字符隔断的文字形成完整的语义。 * * @return end char offset from current offset * 返回偏移量,match完成后的位置距离当前位置的偏移量。 */ public int matchNext(String key,char... ignoreChars) { int offset=0; int match=0; boolean matched=false; while(!matched && next+offset<length){ char c=str.charAt(next+offset); offset++; if(c==key.charAt(match)){//先匹配 match++; if(match==key.length())matched=true; continue; } //匹配失败 if(match==0){//在匹配开始前允许忽略的字符 if(ArrayUtils.contains(this.ignoreChars, c)){ continue; } } //匹配失败 if(ArrayUtils.contains(ignoreChars, c)){ continue; } //匹配失败且字符不可忽略 return -1; } return matched?offset:-1; } /** * 检测后续字符是否符合指定的字符串(忽略大小写) * @param key 要查找的关键字 * @param ignoreChars 如果遇到ignoreChars中的字符则会自动略过(即便在关键字匹配开始后,出现此类字符也会忽略,比较典型的使用场景是: * <pre> * a = 'hello world!' * 上例中,如果以a=作为key,那么中间的空格会影响搜索结果,此时设置空格为ignorChar,那么就可以排除这种影响。 * </pre> * 如果通过{@link #setIgnoreChars(char...)}设置的忽略表,不会忽略关键字当中的特殊字符,因此两者有细微的差别,使用者需要掌握使用这个差别来达到解析的目的。 * 备注:ignoreChars 一般可以是空格, \r\n\t等字符,目的是让被这些字符隔断的文字形成完整的语义。 * * @return end char offset from current offset * 返回偏移量,match完成后的位置距离当前位置的偏移量。 */ public int matchNextIgnoreCase(String key,char... ignoreChars) { int offset=0; int match=0; boolean matched=false; while(!matched && next+offset<length){ char c=str.charAt(next+offset); offset++; if(Character.toLowerCase(c)==Character.toLowerCase(key.charAt(match))){//先匹配 match++; if(match==key.length())matched=true; continue; } //匹配失败 if(match==0){//在匹配开始前允许忽略的字符 if(ArrayUtils.contains(this.ignoreChars, c)){ continue; } } //匹配失败 if(ArrayUtils.contains(ignoreChars, c)){ continue; } //匹配失败且字符不可忽略 return -1; } return matched?offset:-1; } public char[] getIgnoreChars() { return ignoreChars; } /** * 设置通用的字符忽略表 * @see #matchNext(String, char...) * @see #matchNextIgnoreCase(String, char...) * @param ignoreChars */ public void setIgnoreChars(char... ignoreChars) { this.ignoreChars = ignoreChars; } private static final char[] lb={'\n'}; /** * 读取一行数据,到\n为止 * @return 读到的文字 * @throws IOException */ public String readLine() throws IOException { if(eof())return null; char[] line=readUntilCharIs(lb); omit(1); if(line[line.length-1]=='\r'){ return new String(line,0,line.length-1); }else{ return new String(line); } } /** * 返回当前的哦啊第几个字符 * @return */ public int getOffset(){ return next; } /** * 查找直到指定的key出现,next指向key的第一个字符。如果匹配始终没找到,next指向不会后移 * @param key * @param ignorchars * @return -1,if the key was not found. */ public int omitUntillKey(String key,char...ignorchars){ int offset=next;//保留初始状态 int n; while((n=matchNext(key,ignorchars))==-1 && next<length){ next++; } if(n<0){//始终没有匹配成功 next=offset;//回滚 return -1; } return next-offset; } /** * 查找直到指定的key出现,next指向key的第一个字符。如果匹配始终没找到,next指向不会后移 * @param key * @param ignorchars * @return -1,if the key was not found. */ public int omitUntillKeyIgnoreCase(String key,char...ignorchars){ int offset=next;//保留初始状态 int n; while((n=matchNextIgnoreCase(key,ignorchars))==-1 && next<length){ next++; } if(n<0){//始终没有匹配成功 next=offset;//回滚 return -1; } return next-offset; } /** * 查找直到指定的key出现并完成,next指向key后面的第一个字符。如果匹配始终没找到,next指向不会后移 * @param key * @param ignorchars * @return */ public int omitAfterKey(String key,char...ignorchars){ int offset=next;//保留初始状态 int n; while((n=matchNext(key,ignorchars))==-1 && next<length){ next++; } if(n==-1){//始终没有匹配成功 next=offset;//回滚 }else{ next+=n; } return next-offset; } /** * 查找直到指定的key出现并完成,next指向key后面的第一个字符。如果匹配始终没找到,next指向不会后移 * @param key * @param ignorchars * @return */ public int omitAfterKeyIgnoreCase(String key,char...ignorchars){ int offset=next;//保留初始状态 int n; while((n=matchNextIgnoreCase(key,ignorchars))==-1 && next<length){ next++; } if(n==-1){//始终没有匹配成功 next=offset;//回滚 }else{ next+=n; } return next-offset; } /** *读取文本,知道出现指定的文本为止。指定的文本不含 * @param key * @return 读到的文字 * @throws IOException */ public String readUntillKey(String key,char... ignorchars){ StringBuilder sb = new StringBuilder(); while(matchNext(key,ignorchars)==-1 && next<length){ sb.append((char)readChar()); } return sb.toString(); } /** * 读到指定的字符串出现位置 * @param key * @return * @throws IOException */ public String readUntillKeyIgnoreCase(String key,char... ignorchars) { StringBuilder sb = new StringBuilder(); while(matchNextIgnoreCase(key,ignorchars)==-1 && next<length){ sb.append((char)readChar()); } return sb.toString(); } /** * 读取直到出现指定的字符串。指定的字符串作为结束符,被消耗掉 * @param endChars token结束的字符 * @return */ public String readToken(char... endChars){ StringBuilder sb = new StringBuilder(); char c; while(!eof()){ c=(char)readChar(); if(ArrayUtils.contains(endChars, c)){ return sb.toString(); } if(ArrayUtils.contains(this.ignoreChars, c)){ continue; } sb.append(c); } return sb.toString(); } /** * 消费字符串,在知道下一个词的情况下,读取下一个词 * @param key * @param ignoreChars 参见{@link #matchNext(String, char...)} */ public void consume(String key,char... ignoreChars) { int x=matchNext(key,ignoreChars); if(x>-1){ omit(x); }else{ throw new IllegalArgumentException("not expected chars:"+ key); } } /** * 消费字符串 * @param key * @param ignoreChars */ public void consumeIgnoreCase(String key,char... ignoreChars) { int x=matchNextIgnoreCase(key,ignoreChars); if(x>-1){ omit(x); }else{ throw new IllegalArgumentException("not expected chars:"+ key); } } /** * 跳过这些字符 * 效果和@{link {@link #omit(int)}相同 * @param chars */ public void consumeChars(char... chars){ omitChars(chars); } }