/* Copyright (c) 2006 Ola Bini * * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /** * $Id: ScannerImpl.java,v 1.5 2006/09/23 21:43:30 olabini Exp $ */ package org.jvyaml; import java.io.FileReader; import java.io.InputStream; import java.io.IOException; import java.io.Reader; import java.nio.CharBuffer; import java.nio.channels.Channels; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.jvyaml.tokens.*; /** * <p>A Java implementation of the RbYAML scanner.</p> * * @author <a href="mailto:ola.bini@ki.se">Ola Bini</a> * @version $Revision: 1.5 $ */ public class ScannerImpl implements Scanner { private final static String LINEBR = "\n\u0085\u2028\u2029"; private final static String NULL_BL_LINEBR = "\0 \r\n\u0085"; private final static String NULL_BL_T_LINEBR = "\0 \t\r\n\u0085"; private final static String NULL_OR_OTHER = NULL_BL_T_LINEBR; private final static String NULL_OR_LINEBR = "\0\r\n\u0085"; private final static String FULL_LINEBR = "\r\n\u0085"; private final static String BLANK_OR_LINEBR = " \r\n\u0085"; private final static String S4 = "\0 \t\r\n\u0028[]{}"; private final static String ALPHA = "abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ-_"; private final static String STRANGE_CHAR = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789][-';/?:@&=+$,.!~*()%"; private final static String RN = "\r\n"; private final static String BLANK_T = " \t"; private final static String SPACES_AND_STUFF = "'\"\\\0 \t\r\n\u0085"; private final static String DOUBLE_ESC = "\"\\"; private final static String NON_ALPHA_OR_NUM = "\0 \t\r\n\u0085?:,]}%@`"; private final static Pattern NON_PRINTABLE = Pattern.compile("[^\u0009\n\r\u0020-\u007E\u0085\u00A0-\u00FF]"); private final static Pattern NOT_HEXA = Pattern.compile("[^0-9A-Fa-f]"); private final static Pattern NON_ALPHA = Pattern.compile("[^-0-9A-Za-z_]"); private final static Pattern R_FLOWZERO = Pattern.compile("[\0 \t\r\n\u0085]|(:[\0 \t\r\n\u0028])"); private final static Pattern R_FLOWNONZERO = Pattern.compile("[\0 \t\r\n\u0085\\[\\]{},:?]"); private final static Pattern LINE_BR_REG = Pattern.compile("[\n\u0085]|(?:\r[^\n])"); private final static Pattern END_OR_START = Pattern.compile("^(---|\\.\\.\\.)[\0 \t\r\n\u0085]$"); private final static Pattern ENDING = Pattern.compile("^---[\0 \t\r\n\u0085]$"); private final static Pattern START = Pattern.compile("^\\.\\.\\.[\0 \t\r\n\u0085]$"); private final static Pattern BEG = Pattern.compile("^([^\0 \t\r\n\u0085\\-?:,\\[\\]{}#&*!|>'\"%@]|([\\-?:][^\0 \t\r\n\u0085]))"); private final static Map ESCAPE_REPLACEMENTS = new HashMap(); private final static Map ESCAPE_CODES = new HashMap(); static { ESCAPE_REPLACEMENTS.put(new Character('0'),"\0"); ESCAPE_REPLACEMENTS.put(new Character('a'),"\u0007"); ESCAPE_REPLACEMENTS.put(new Character('b'),"\u0008"); ESCAPE_REPLACEMENTS.put(new Character('t'),"\u0009"); ESCAPE_REPLACEMENTS.put(new Character('\t'),"\u0009"); ESCAPE_REPLACEMENTS.put(new Character('n'),"\n"); ESCAPE_REPLACEMENTS.put(new Character('v'),"\u000B"); ESCAPE_REPLACEMENTS.put(new Character('f'),"\u000C"); ESCAPE_REPLACEMENTS.put(new Character('r'),"\r"); ESCAPE_REPLACEMENTS.put(new Character('e'),"\u001B"); ESCAPE_REPLACEMENTS.put(new Character(' '),"\u0020"); ESCAPE_REPLACEMENTS.put(new Character('"'),"\""); ESCAPE_REPLACEMENTS.put(new Character('\\'),"\\"); ESCAPE_REPLACEMENTS.put(new Character('N'),"\u0085"); ESCAPE_REPLACEMENTS.put(new Character('_'),"\u00A0"); ESCAPE_REPLACEMENTS.put(new Character('L'),"\u2028"); ESCAPE_REPLACEMENTS.put(new Character('P'),"\u2029"); ESCAPE_CODES.put(new Character('x'),new Integer(2)); ESCAPE_CODES.put(new Character('u'),new Integer(4)); ESCAPE_CODES.put(new Character('U'),new Integer(8)); } private boolean done = false; private int flowLevel = 0; private int tokensTaken = 0; private int indent = -1; private boolean allowSimpleKey = true; private boolean eof = true; private int column = 0; private int pointer = 0; private StringBuffer buffer; private Reader stream; private List tokens; private List indents; private Map possibleSimpleKeys; private boolean docStart = false; public ScannerImpl(final Reader stream) { this.stream = stream; this.eof = false; this.buffer = new StringBuffer(); this.tokens = new LinkedList(); this.indents = new LinkedList(); this.possibleSimpleKeys = new HashMap(); fetchStreamStart(); } public ScannerImpl(final String stream) { this.buffer = new StringBuffer(stream); this.stream = null; this.tokens = new LinkedList(); this.indents = new LinkedList(); this.possibleSimpleKeys = new HashMap(); fetchStreamStart(); } public boolean checkToken(final Class[] choices) { while(needMoreTokens()) { fetchMoreTokens(); } if(!this.tokens.isEmpty()) { if(choices.length == 0) { return true; } final Object first = this.tokens.get(0); for(int i=0,j=choices.length;i<j;i++) { if(choices[i].isInstance(first)) { return true; } } } return false; } public Token peekToken() { while(needMoreTokens()) { fetchMoreTokens(); } return (Token)(this.tokens.isEmpty() ? null : this.tokens.get(0)); } public Token getToken() { while(needMoreTokens()) { fetchMoreTokens(); } if(!this.tokens.isEmpty()) { this.tokensTaken++; return (Token)this.tokens.remove(0); } return null; } private class TokenIterator implements Iterator { public boolean hasNext() { return null != peekToken(); } public Object next() { return getToken(); } public void remove() { } } public Iterator eachToken() { return new TokenIterator(); } public Iterator iterator() { return eachToken(); } private char peek() { if(this.pointer + 1 > this.buffer.length()) { update(1); } return this.buffer.charAt(this.pointer); } private char peek(final int index) { if(this.pointer + index + 1 > this.buffer.length()) { update(index+1); } return this.buffer.charAt(this.pointer + index); } private String prefix() { return prefix(1); } private String prefix(final int length) { if(this.pointer + length >= this.buffer.length()) { update(length); } if(this.pointer+length > this.buffer.length()) { return this.buffer.substring(this.pointer,this.buffer.length()); } else { return this.buffer.substring(this.pointer,this.pointer+length); } } private String prefixForward(final int length) { if(this.pointer + length + 1 >= this.buffer.length()) { update(length+1); } String buff = null; if(this.pointer+length > this.buffer.length()) { buff = this.buffer.substring(this.pointer,this.buffer.length()); } else { buff = this.buffer.substring(this.pointer,this.pointer+length); } char ch = 0; for(int i=0,j=buff.length();i<j;i++) { ch = buff.charAt(i); this.pointer++; if(LINEBR.indexOf(ch) != -1 || (ch == '\r' && buff.charAt(i+1) != '\n')) { this.column = 0; } else if(ch != '\uFEFF') { this.column++; } } return buff; } private void forward() { if(this.pointer + 2 >= this.buffer.length()) { update(2); } final char ch1 = this.buffer.charAt(this.pointer); this.pointer++; if(ch1 == '\n' || ch1 == '\u0085' || (ch1 == '\r' && this.buffer.charAt(this.pointer) != '\n')) { this.column = 0; } else { this.column++; } } private void forward(final int length) { if(this.pointer + length + 1 >= this.buffer.length()) { update(length+1); } char ch = 0; for(int i=0;i<length;i++) { ch = this.buffer.charAt(this.pointer); this.pointer++; if(LINEBR.indexOf(ch) != -1 || (ch == '\r' && this.buffer.charAt(this.pointer) != '\n')) { this.column = 0; } else if(ch != '\uFEFF') { this.column++; } } } private void checkPrintable(final CharSequence data) { final Matcher em = NON_PRINTABLE.matcher(data); if(em.find()) { final int position = this.buffer.length() - this.pointer + em.start(); throw new YAMLException("At " + position + " we found: " + em.group() + ". Special characters are not allowed"); } } private void update(final int length) { this.buffer.delete(0,this.pointer); this.pointer = 0; while(this.buffer.length() < length) { String rawData = ""; if(!this.eof) { char[] data = new char[1024]; int converted = -2; try { converted = this.stream.read(data); } catch(final IOException ioe) { throw new YAMLException(ioe); } if(converted == -1) { this.eof = true; } else { rawData = String.valueOf(data,0,converted); } } checkPrintable(rawData); this.buffer.append(rawData); if(this.eof) { this.buffer.append('\0'); break; } } } private boolean needMoreTokens() { if(this.done) { return false; } return this.tokens.isEmpty() || nextPossibleSimpleKey() == this.tokensTaken; } private Token fetchMoreTokens() { scanToNextToken(); unwindIndent(this.column); final char ch = peek(); final boolean colz = this.column == 0; switch(ch) { case '\0': return fetchStreamEnd(); case '\'': return fetchSingle(); case '"': return fetchDouble(); case '?': if(this.flowLevel != 0 || NULL_OR_OTHER.indexOf(peek(1)) != -1) { return fetchKey(); } break; case ':': if(this.flowLevel != 0 || NULL_OR_OTHER.indexOf(peek(1)) != -1) { return fetchValue(); } break; case '%': if(colz) {return fetchDirective(); } break; case '-': if((colz || docStart) && ENDING.matcher(prefix(4)).matches()) { return fetchDocumentStart(); } else if(NULL_OR_OTHER.indexOf(peek(1)) != -1) { return fetchBlockEntry(); } break; case '.': if(colz && START.matcher(prefix(4)).matches()) { return fetchDocumentEnd(); } break; case '[': return fetchFlowSequenceStart(); case '{': return fetchFlowMappingStart(); case ']': return fetchFlowSequenceEnd(); case '}': return fetchFlowMappingEnd(); case ',': return fetchFlowEntry(); case '*': return fetchAlias(); case '&': return fetchAnchor(); case '!': return fetchTag(); case '|': if(this.flowLevel == 0) { return fetchLiteral(); } break; case '>': if(this.flowLevel == 0) { return fetchFolded(); } break; } if(BEG.matcher(prefix(2)).find()) { return fetchPlain(); } throw new ScannerException("while scanning for the next token","found character " + ch + "(" + ((int)ch) + " that cannot start any token",null); } private int nextPossibleSimpleKey() { for(final Iterator iter = this.possibleSimpleKeys.values().iterator();iter.hasNext();) { final SimpleKey key = (SimpleKey)iter.next(); if(key.getTokenNumber() > 0) { return key.getTokenNumber(); } } return -1; } private void savePossibleSimpleKey() { if(this.allowSimpleKey) { this.possibleSimpleKeys.put(new Integer(this.flowLevel),new SimpleKey(this.tokensTaken+this.tokens.size(),(this.flowLevel == 0) && this.indent == this.column,-1,-1,this.column)); } } private void unwindIndent(final int col) { if(this.flowLevel != 0) { return; } while(this.indent > col) { this.indent = ((Integer)(this.indents.remove(0))).intValue(); this.tokens.add(Token.BLOCK_END); } } private boolean addIndent(final int col) { if(this.indent < col) { this.indents.add(0,new Integer(this.indent)); this.indent = col; return true; } return false; } private Token fetchStreamStart() { this.docStart = true; this.tokens.add(Token.STREAM_START); return Token.STREAM_START; } private Token fetchStreamEnd() { unwindIndent(-1); this.allowSimpleKey = false; this.possibleSimpleKeys = new HashMap(); this.tokens.add(Token.STREAM_END); this.done = true; return Token.STREAM_END; } private Token fetchDirective() { unwindIndent(-1); this.allowSimpleKey = false; final Token tok = scanDirective(); this.tokens.add(tok); return tok; } private Token fetchDocumentStart() { this.docStart = false; return fetchDocumentIndicator(Token.DOCUMENT_START); } private Token fetchDocumentEnd() { return fetchDocumentIndicator(Token.DOCUMENT_END); } private Token fetchDocumentIndicator(final Token tok) { unwindIndent(-1); this.allowSimpleKey = false; forward(3); this.tokens.add(tok); return tok; } private Token fetchFlowSequenceStart() { return fetchFlowCollectionStart(Token.FLOW_SEQUENCE_START); } private Token fetchFlowMappingStart() { return fetchFlowCollectionStart(Token.FLOW_MAPPING_START); } private Token fetchFlowCollectionStart(final Token tok) { savePossibleSimpleKey(); this.flowLevel++; this.allowSimpleKey = true; forward(1); this.tokens.add(tok); return tok; } private Token fetchFlowSequenceEnd() { return fetchFlowCollectionEnd(Token.FLOW_SEQUENCE_END); } private Token fetchFlowMappingEnd() { return fetchFlowCollectionEnd(Token.FLOW_MAPPING_END); } private Token fetchFlowCollectionEnd(final Token tok) { this.flowLevel--; this.allowSimpleKey = false; forward(1); this.tokens.add(tok); return tok; } private Token fetchFlowEntry() { this.allowSimpleKey = true; forward(1); this.tokens.add(Token.FLOW_ENTRY); return Token.FLOW_ENTRY; } private Token fetchBlockEntry() { if(this.flowLevel == 0) { if(!this.allowSimpleKey) { throw new ScannerException(null,"sequence entries are not allowed here",null); } if(addIndent(this.column)) { this.tokens.add(Token.BLOCK_SEQUENCE_START); } } this.allowSimpleKey = true; forward(); this.tokens.add(Token.BLOCK_ENTRY); return Token.BLOCK_ENTRY; } private Token fetchKey() { if(this.flowLevel == 0) { if(!this.allowSimpleKey) { throw new ScannerException(null,"mapping keys are not allowed here",null); } if(addIndent(this.column)) { this.tokens.add(Token.BLOCK_MAPPING_START); } } this.allowSimpleKey = this.flowLevel == 0; forward(); this.tokens.add(Token.KEY); return Token.KEY; } private Token fetchValue() { final SimpleKey key = (SimpleKey)this.possibleSimpleKeys.get(new Integer(this.flowLevel)); if(null == key) { if(this.flowLevel == 0 && !this.allowSimpleKey) { throw new ScannerException(null,"mapping values are not allowed here",null); } } else { this.possibleSimpleKeys.remove(new Integer(this.flowLevel)); this.tokens.add(key.getTokenNumber()-this.tokensTaken,Token.KEY); if(this.flowLevel == 0 && addIndent(key.getColumn())) { this.tokens.add(key.getTokenNumber()-this.tokensTaken,Token.BLOCK_MAPPING_START); } this.allowSimpleKey = false; } forward(); this.tokens.add(Token.VALUE); return Token.VALUE; } private Token fetchAlias() { savePossibleSimpleKey(); this.allowSimpleKey = false; final Token tok = scanAnchor(new AliasToken()); this.tokens.add(tok); return tok; } private Token fetchAnchor() { savePossibleSimpleKey(); this.allowSimpleKey = false; final Token tok = scanAnchor(new AnchorToken()); this.tokens.add(tok); return tok; } private Token fetchTag() { savePossibleSimpleKey(); this.allowSimpleKey = false; final Token tok = scanTag(); this.tokens.add(tok); return tok; } private Token fetchLiteral() { return fetchBlockScalar('|'); } private Token fetchFolded() { return fetchBlockScalar('>'); } private Token fetchBlockScalar(final char style) { this.allowSimpleKey = true; final Token tok = scanBlockScalar(style); this.tokens.add(tok); return tok; } private Token fetchSingle() { return fetchFlowScalar('\''); } private Token fetchDouble() { return fetchFlowScalar('"'); } private Token fetchFlowScalar(final char style) { savePossibleSimpleKey(); this.allowSimpleKey = false; final Token tok = scanFlowScalar(style); this.tokens.add(tok); return tok; } private Token fetchPlain() { savePossibleSimpleKey(); this.allowSimpleKey = false; final Token tok = scanPlain(); this.tokens.add(tok); return tok; } private void scanToNextToken() { for(;;) { while(peek() == ' ') { forward(); } if(peek() == '#') { while(NULL_OR_LINEBR.indexOf(peek()) == -1) { forward(); } } if(scanLineBreak().length() != 0 ) { if(this.flowLevel == 0) { this.allowSimpleKey = true; } } else { break; } } } private Token scanDirective() { forward(); final String name = scanDirectiveName(); String[] value = null; if(name.equals("YAML")) { value = scanYamlDirectiveValue(); } else if(name.equals("TAG")) { value = scanTagDirectiveValue(); } else { while(NULL_OR_LINEBR.indexOf(peek()) == -1) { forward(); } } scanDirectiveIgnoredLine(); return new DirectiveToken(name,value); } private String scanDirectiveName() { int length = 0; char ch = peek(length); boolean zlen = true; while(ALPHA.indexOf(ch) != -1) { zlen = false; length++; ch = peek(length); } if(zlen) { throw new ScannerException("while scanning a directive","expected alphabetic or numeric character, but found " + ch + "(" + ((int)ch) + ")",null); } final String value = prefixForward(length); // forward(length); if(NULL_BL_LINEBR.indexOf(peek()) == -1) { throw new ScannerException("while scanning a directive","expected alphabetic or numeric character, but found " + ch + "(" + ((int)ch) + ")",null); } return value; } private String[] scanYamlDirectiveValue() { while(peek() == ' ') { forward(); } final String major = scanYamlDirectiveNumber(); if(peek() != '.') { throw new ScannerException("while scanning a directive","expected a digit or '.', but found " + peek() + "(" + ((int)peek()) + ")",null); } forward(); final String minor = scanYamlDirectiveNumber(); if(NULL_BL_LINEBR.indexOf(peek()) == -1) { throw new ScannerException("while scanning a directive","expected a digit or ' ', but found " + peek() + "(" + ((int)peek()) + ")",null); } return new String[] {major,minor}; } private String scanYamlDirectiveNumber() { final char ch = peek(); if(!Character.isDigit(ch)) { throw new ScannerException("while scanning a directive","expected a digit, but found " + ch + "(" + ((int)ch) + ")",null); } int length = 0; while(Character.isDigit(peek(length))) { length++; } final String value = prefixForward(length); // forward(length); return value; } private String[] scanTagDirectiveValue() { while(peek() == ' ') { forward(); } final String handle = scanTagDirectiveHandle(); while(peek() == ' ') { forward(); } final String prefix = scanTagDirectivePrefix(); return new String[] {handle,prefix}; } private String scanTagDirectiveHandle() { final String value = scanTagHandle("directive"); if(peek() != ' ') { throw new ScannerException("while scanning a directive","expected ' ', but found " + peek() + "(" + ((int)peek()) + ")",null); } return value; } private String scanTagDirectivePrefix() { final String value = scanTagUri("directive"); if(NULL_BL_LINEBR.indexOf(peek()) == -1) { throw new ScannerException("while scanning a directive","expected ' ', but found " + peek() + "(" + ((int)peek()) + ")",null); } return value; } private String scanDirectiveIgnoredLine() { while(peek() == ' ') { forward(); } if(peek() == '"') { while(NULL_OR_LINEBR.indexOf(peek()) == -1) { forward(); } } final char ch = peek(); if(NULL_OR_LINEBR.indexOf(ch) == -1) { throw new ScannerException("while scanning a directive","expected a comment or a line break, but found " + peek() + "(" + ((int)peek()) + ")",null); } return scanLineBreak(); } private Token scanAnchor(final Token tok) { final char indicator = peek(); final String name = indicator == '*' ? "alias" : "anchor"; forward(); int length = 0; int chunk_size = 16; Matcher m = null; for(;;) { final String chunk = prefix(chunk_size); if((m = NON_ALPHA.matcher(chunk)).find()) { break; } chunk_size+=16; } length = m.start(); if(length == 0) { throw new ScannerException("while scanning an " + name,"expected alphabetic or numeric character, but found something else...",null); } final String value = prefixForward(length); // forward(length); if(NON_ALPHA_OR_NUM.indexOf(peek()) == -1) { throw new ScannerException("while scanning an " + name,"expected alphabetic or numeric character, but found "+ peek() + "(" + ((int)peek()) + ")",null); } tok.setValue(value); return tok; } private Token scanTag() { char ch = peek(1); String handle = null; String suffix = null; if(ch == '<') { forward(2); suffix = scanTagUri("tag"); if(peek() != '>') { throw new ScannerException("while scanning a tag","expected '>', but found "+ peek() + "(" + ((int)peek()) + ")",null); } forward(); } else if(NULL_BL_T_LINEBR.indexOf(ch) != -1) { suffix = "!"; forward(); } else { int length = 1; boolean useHandle = false; while(NULL_BL_T_LINEBR.indexOf(ch) == -1) { if(ch == '!') { useHandle = true; break; } length++; ch = peek(length); } handle = "!"; if(useHandle) { handle = scanTagHandle("tag"); } else { handle = "!"; forward(); } suffix = scanTagUri("tag"); } if(NULL_BL_LINEBR.indexOf(peek()) == -1) { throw new ScannerException("while scanning a tag","expected ' ', but found " + peek() + "(" + ((int)peek()) + ")",null); } return new TagToken(new String[] {handle,suffix}); } private Token scanBlockScalar(final char style) { final boolean folded = style == '>'; final StringBuffer chunks = new StringBuffer(); forward(); final Object[] chompi = scanBlockScalarIndicators(); final boolean chomping = ((Boolean)chompi[0]).booleanValue(); final int increment = ((Integer)chompi[1]).intValue(); scanBlockScalarIgnoredLine(); int minIndent = this.indent+1; if(minIndent < 1) { minIndent = 1; } String breaks = null; int maxIndent = 0; int ind = 0; if(increment == -1) { final Object[] brme = scanBlockScalarIndentation(); breaks = (String)brme[0]; maxIndent = ((Integer)brme[1]).intValue(); if(minIndent > maxIndent) { ind = minIndent; } else { ind = maxIndent; } } else { ind = minIndent + increment - 1; breaks = scanBlockScalarBreaks(ind); } String lineBreak = ""; while(this.column == ind && peek() != '\0') { chunks.append(breaks); final boolean leadingNonSpace = BLANK_T.indexOf(peek()) == -1; int length = 0; while(NULL_OR_LINEBR.indexOf(peek(length))==-1) { length++; } chunks.append(prefixForward(length)); // forward(length); lineBreak = scanLineBreak(); breaks = scanBlockScalarBreaks(ind); if(this.column == ind && peek() != '\0') { if(folded && lineBreak.equals("\n") && leadingNonSpace && BLANK_T.indexOf(peek()) == -1) { if(breaks.length() == 0) { chunks.append(" "); } } else { chunks.append(lineBreak); } } else { break; } } if(chomping) { chunks.append(lineBreak); chunks.append(breaks); } return new ScalarToken(chunks.toString(),false,style); } private Object[] scanBlockScalarIndicators() { boolean chomping = false; int increment = -1; char ch = peek(); if(ch == '-' || ch == '+') { chomping = ch == '+'; forward(); ch = peek(); if(Character.isDigit(ch)) { increment = Integer.parseInt((""+ch)); if(increment == 0) { throw new ScannerException("while scanning a block scalar","expected indentation indicator in the range 1-9, but found 0",null); } forward(); } } else if(Character.isDigit(ch)) { increment = Integer.parseInt((""+ch)); if(increment == 0) { throw new ScannerException("while scanning a block scalar","expected indentation indicator in the range 1-9, but found 0",null); } forward(); ch = peek(); if(ch == '-' || ch == '+') { chomping = ch == '+'; forward(); } } if(NULL_BL_LINEBR.indexOf(peek()) == -1) { throw new ScannerException("while scanning a block scalar","expected chomping or indentation indicators, but found " + peek() + "(" + ((int)peek()) + ")",null); } return new Object[] {Boolean.valueOf(chomping),new Integer(increment)}; } private String scanBlockScalarIgnoredLine() { while(peek() == ' ') { forward(); } if(peek() == '#') { while(NULL_OR_LINEBR.indexOf(peek()) == -1) { forward(); } } if(NULL_OR_LINEBR.indexOf(peek()) == -1) { throw new ScannerException("while scanning a block scalar","expected a comment or a line break, but found " + peek() + "(" + ((int)peek()) + ")",null); } return scanLineBreak(); } private Object[] scanBlockScalarIndentation() { final StringBuffer chunks = new StringBuffer(); int maxIndent = 0; while(BLANK_OR_LINEBR.indexOf(peek()) != -1) { if(peek() != ' ') { chunks.append(scanLineBreak()); } else { forward(); if(this.column > maxIndent) { maxIndent = column; } } } return new Object[] {chunks.toString(),new Integer(maxIndent)}; } private String scanBlockScalarBreaks(final int indent) { final StringBuffer chunks = new StringBuffer(); while(this.column < indent && peek() == ' ') { forward(); } while(FULL_LINEBR.indexOf(peek()) != -1) { chunks.append(scanLineBreak()); while(this.column < indent && peek() == ' ') { forward(); } } return chunks.toString(); } private Token scanFlowScalar(final char style) { final boolean dbl = style == '"'; final StringBuffer chunks = new StringBuffer(); final char quote = peek(); forward(); chunks.append(scanFlowScalarNonSpaces(dbl)); while(peek() != quote) { chunks.append(scanFlowScalarSpaces()); chunks.append(scanFlowScalarNonSpaces(dbl)); } forward(); return new ScalarToken(chunks.toString(),false,style); } private String scanFlowScalarNonSpaces(final boolean dbl) { final StringBuffer chunks = new StringBuffer(); for(;;) { int length = 0; while(SPACES_AND_STUFF.indexOf(peek(length)) == -1) { length++; } if(length != 0) { chunks.append(prefixForward(length)); // forward(length); } char ch = peek(); if(!dbl && ch == '\'' && peek(1) == '\'') { chunks.append("'"); forward(2); } else if((dbl && ch == '\'') || (!dbl && DOUBLE_ESC.indexOf(ch) != -1)) { chunks.append(ch); forward(); } else if(dbl && ch == '\\') { forward(); ch = peek(); if(ESCAPE_REPLACEMENTS.containsKey(new Character(ch))) { chunks.append(ESCAPE_REPLACEMENTS.get(new Character(ch))); forward(); } else if(ESCAPE_CODES.containsKey(new Character(ch))) { length = ((Integer)ESCAPE_CODES.get(new Character(ch))).intValue(); forward(); final String val = prefix(length); if(NOT_HEXA.matcher(val).find()) { throw new ScannerException("while scanning a double-quoted scalar","expected escape sequence of " + length + " hexadecimal numbers, but found something else: " + val,null); } chunks.append(Integer.parseInt(val,16)); forward(length); } else if(FULL_LINEBR.indexOf(ch) != -1) { scanLineBreak(); chunks.append(scanFlowScalarBreaks()); } else { throw new ScannerException("while scanning a double-quoted scalar","found unknown escape character " + ch + "(" + ((int)ch) + ")",null); } } else { return chunks.toString(); } } } private String scanFlowScalarSpaces() { final StringBuffer chunks = new StringBuffer(); int length = 0; while(BLANK_T.indexOf(peek(length)) != -1) { length++; } final String whitespaces = prefixForward(length); // forward(length); char ch = peek(); if(ch == '\0') { throw new ScannerException("while scanning a quoted scalar","found unexpected end of stream",null); } else if(FULL_LINEBR.indexOf(ch) != -1) { final String lineBreak = scanLineBreak(); final String breaks = scanFlowScalarBreaks(); if(!lineBreak.equals("\n")) { chunks.append(lineBreak); } else if(breaks.length() == 0) { chunks.append(" "); } chunks.append(breaks); } else { chunks.append(whitespaces); } return chunks.toString(); } private String scanFlowScalarBreaks() { final StringBuffer chunks = new StringBuffer(); String pre = null; for(;;) { pre = prefix(3); if((pre.equals("---") || pre.equals("...")) && NULL_BL_T_LINEBR.indexOf(peek(3)) != -1) { throw new ScannerException("while scanning a quoted scalar","found unexpected document separator",null); } while(BLANK_T.indexOf(peek()) != -1) { forward(); } if(FULL_LINEBR.indexOf(peek()) != -1) { chunks.append(scanLineBreak()); } else { return chunks.toString(); } } } private Token scanPlain() { /* See the specification for details. We add an additional restriction for the flow context: plain scalars in the flow context cannot contain ',', ':' and '?'. We also keep track of the `allow_simple_key` flag here. Indentation rules are loosed for the flow context. */ final StringBuffer chunks = new StringBuffer(); final int ind = this.indent+1; String spaces = ""; boolean f_nzero = true; Pattern r_check = R_FLOWNONZERO; if(this.flowLevel == 0) { f_nzero = false; r_check = R_FLOWZERO; } while(peek() != '#') { int length = 0; int chunkSize = 32; Matcher m = null; while(!(m = r_check.matcher(prefix(chunkSize))).find()) { chunkSize += 32; } length = m.start(); final char ch = peek(length); if(f_nzero && ch == ':' && S4.indexOf(peek(length+1)) == -1) { forward(length); throw new ScannerException("while scanning a plain scalar","found unexpected ':'","Please check http://pyyaml.org/wiki/YAMLColonInFlowContext for details."); } if(length == 0) { break; } this.allowSimpleKey = false; chunks.append(spaces); chunks.append(prefixForward(length)); // forward(length); spaces = scanPlainSpaces(ind); if(spaces == null || (this.flowLevel == 0 && this.column < ind)) { break; } } return new ScalarToken(chunks.toString(),true); } private String scanPlainSpaces(final int indent) { final StringBuffer chunks = new StringBuffer(); int length = 0; while(peek(length) == ' ') { length++; } final String whitespaces = prefixForward(length); // forward(length); char ch = peek(); if(FULL_LINEBR.indexOf(ch) != -1) { final String lineBreak = scanLineBreak(); this.allowSimpleKey = true; if(END_OR_START.matcher(prefix(4)).matches()) { return ""; } final StringBuffer breaks = new StringBuffer(); while(BLANK_OR_LINEBR.indexOf(peek()) != -1) { if(' ' == peek()) { forward(); } else { breaks.append(scanLineBreak()); if(END_OR_START.matcher(prefix(4)).matches()) { return ""; } } } if(!lineBreak.equals("\n")) { chunks.append(lineBreak); } else if(breaks == null || breaks.toString().equals("")) { chunks.append(" "); } chunks.append(breaks); } else { chunks.append(whitespaces); } return chunks.toString(); } private String scanTagHandle(final String name) { char ch = peek(); if(ch != '!') { throw new ScannerException("while scanning a " + name,"expected '!', but found " + ch + "(" + ((int)ch) + ")",null); } int length = 1; ch = peek(length); if(ch != ' ') { while(ALPHA.indexOf(ch) != -1) { length++; ch = peek(length); } if('!' != ch) { forward(length); throw new ScannerException("while scanning a " + name,"expected '!', but found " + ch + "(" + ((int)ch) + ")",null); } length++; } final String value = prefixForward(length); // forward(length); return value; } private String scanTagUri(final String name) { final StringBuffer chunks = new StringBuffer(); int length = 0; char ch = peek(length); while(STRANGE_CHAR.indexOf(ch) != -1) { if('%' == ch) { chunks.append(prefixForward(length)); // forward(length); length = 0; chunks.append(scanUriEscapes(name)); } else { length++; } ch = peek(length); } if(length != 0) { chunks.append(prefixForward(length)); // forward(length); } if(chunks.length() == 0) { throw new ScannerException("while scanning a " + name,"expected URI, but found " + ch + "(" + ((int)ch) + ")",null); } return chunks.toString(); } private String scanUriEscapes(final String name) { final StringBuffer bytes = new StringBuffer(); while(peek() == '%') { forward(); try { bytes.append(Integer.parseInt(prefix(2),16)); } catch(final NumberFormatException nfe) { throw new ScannerException("while scanning a " + name,"expected URI escape sequence of 2 hexadecimal numbers, but found " + peek(1) + "(" + ((int)peek(1)) + ") and "+ peek(2) + "(" + ((int)peek(2)) + ")",null); } forward(2); } return bytes.toString(); } private String scanLineBreak() { // Transforms: // '\r\n' : '\n' // '\r' : '\n' // '\n' : '\n' // '\x85' : '\n' // default : '' final char val = peek(); if(FULL_LINEBR.indexOf(val) != -1) { if(RN.equals(prefix(2))) { forward(2); } else { forward(); } return "\n"; } else { return ""; } } /* public static void oldmain(final String[] args) { final String test1 = "--- \nA: b\nc: 3.14\n"; final String filename = args[0]; final ScannerImpl sce = new ScannerImpl(test1); System.out.println("Reading of string: \"" + test1 + "\""); while(!sce.eof) { int toShow = 20; if(sce.buffer.remaining()<20) { toShow = sce.buffer.remaining(); } System.out.println("--prefix" + toShow + ": \"" + sce.prefix(toShow) + "\""); sce.forward(toShow); } System.out.println("Reading of file: \"" + filename + "\""); final ScannerImpl sce2 = new ScannerImpl(new FileReader(filename)); while(!sce2.eof) { int toShow = 20; if(sce2.buffer.remaining()<20) { toShow = sce2.buffer.remaining(); } System.out.println("--prefix" + toShow + ": \"" + sce2.prefix(toShow) + "\""); sce2.forward(toShow); } } */ public static void main(final String[] args) throws Exception { // final String test1 = "--- \nA: b\nc: 3.14\n"; final String filename = args[0]; /* final Scanner sce = new ScannerImpl(test1); System.out.println("Reading of string: \"" + test1 + "\""); for(final Iterator iter = sce.eachToken();iter.hasNext();) { System.out.println(iter.next()); } */ System.out.println("Reading of file: \"" + filename + "\""); final StringBuffer input = new StringBuffer(); final Reader reader = new FileReader(filename); char[] buff = new char[1024]; int read = 0; while(true) { read = reader.read(buff); input.append(buff,0,read); if(read < 1024) { break; } } reader.close(); final String str = input.toString(); final long before = System.currentTimeMillis(); for(int i=0;i<1;i++) { final Scanner sce2 = new ScannerImpl(str); for(final Iterator iter = sce2.eachToken();iter.hasNext();) { System.out.println(iter.next()); } } final long after = System.currentTimeMillis(); final long time = after-before; final double timeS = (after-before)/1000.0; System.out.println("Walking through the tokens for the file: " + filename + " took " + time + "ms, or " + timeS + " seconds"); } }// Scanner