/** * Copyright (c) 2008-2011, http://www.snakeyaml.org * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.pyyaml; import java.util.ArrayList; import java.util.List; import java.util.Map; import org.yaml.snakeyaml.error.Mark; import org.yaml.snakeyaml.nodes.Tag; import org.yaml.snakeyaml.scanner.Scanner; import org.yaml.snakeyaml.scanner.ScannerImpl; import org.yaml.snakeyaml.tokens.AliasToken; import org.yaml.snakeyaml.tokens.AnchorToken; import org.yaml.snakeyaml.tokens.DirectiveToken; import org.yaml.snakeyaml.tokens.DocumentStartToken; import org.yaml.snakeyaml.tokens.FlowEntryToken; import org.yaml.snakeyaml.tokens.FlowMappingEndToken; import org.yaml.snakeyaml.tokens.FlowMappingStartToken; import org.yaml.snakeyaml.tokens.FlowSequenceEndToken; import org.yaml.snakeyaml.tokens.FlowSequenceStartToken; import org.yaml.snakeyaml.tokens.KeyToken; import org.yaml.snakeyaml.tokens.ScalarToken; import org.yaml.snakeyaml.tokens.StreamEndToken; import org.yaml.snakeyaml.tokens.StreamStartToken; import org.yaml.snakeyaml.tokens.TagToken; import org.yaml.snakeyaml.tokens.TagTuple; import org.yaml.snakeyaml.tokens.Token; import org.yaml.snakeyaml.tokens.ValueToken; public class CanonicalScanner implements Scanner { private static final String DIRECTIVE = "%YAML 1.1"; private final static Map<Character, Integer> QUOTE_CODES = ScannerImpl.ESCAPE_CODES; private final static Map<Character, String> QUOTE_REPLACES = ScannerImpl.ESCAPE_REPLACEMENTS; private String data; private int index; public ArrayList<Token> tokens; private boolean scanned; private Mark mark; public CanonicalScanner(String data) { this.data = data + "\0"; this.index = 0; this.tokens = new ArrayList<Token>(); this.scanned = false; this.mark = new Mark("test", 0, 0, 0, data, 0); } public boolean checkToken(Token.ID... choices) { if (!scanned) { scan(); } if (!tokens.isEmpty()) { if (choices.length == 0) { return true; } Token first = this.tokens.get(0); for (Token.ID choice : choices) { if (first.getTokenId() == choice) { return true; } } } return false; } public Token peekToken() { if (!scanned) { scan(); } if (!tokens.isEmpty()) { return this.tokens.get(0); } return null; } public Token getToken() { if (!scanned) { scan(); } return this.tokens.remove(0); } public Token getToken(Token.ID choice) { Token token = getToken(); if (choice != null && token.getTokenId() != choice) { throw new CanonicalException("unexpected token " + token); } return token; } private void scan() { this.tokens.add(new StreamStartToken(mark, mark)); boolean stop = false; while (!stop) { findToken(); char ch = data.charAt(index); switch (ch) { case '\0': tokens.add(new StreamEndToken(mark, mark)); stop = true; break; case '%': tokens.add(scanDirective()); break; case '-': if ("---".equals(data.substring(index, index + 3))) { index += 3; tokens.add(new DocumentStartToken(mark, mark)); } break; case '[': index++; tokens.add(new FlowSequenceStartToken(mark, mark)); break; case '{': index++; tokens.add(new FlowMappingStartToken(mark, mark)); break; case ']': index++; tokens.add(new FlowSequenceEndToken(mark, mark)); break; case '}': index++; tokens.add(new FlowMappingEndToken(mark, mark)); break; case '?': index++; tokens.add(new KeyToken(mark, mark)); break; case ':': index++; tokens.add(new ValueToken(mark, mark)); break; case ',': index++; tokens.add(new FlowEntryToken(mark, mark)); break; case '*': tokens.add(scanAlias()); break; case '&': tokens.add(scanAlias()); break; case '!': tokens.add(scanTag()); break; case '"': tokens.add(scanScalar()); break; default: throw new CanonicalException("invalid token"); } } scanned = true; } private Token scanDirective() { String chunk1 = data.substring(index, index + DIRECTIVE.length()); char chunk2 = data.charAt(index + DIRECTIVE.length()); if (DIRECTIVE.equals(chunk1) && "\n\0".indexOf(chunk2) != -1) { index += DIRECTIVE.length(); List<Integer> implicit = new ArrayList<Integer>(2); implicit.add(new Integer(1)); implicit.add(new Integer(1)); return new DirectiveToken<Integer>("YAML", implicit, mark, mark); } else { throw new CanonicalException("invalid directive"); } } private Token scanAlias() { boolean isTokenClassAlias; if (data.charAt(index) == '*') { isTokenClassAlias = true; } else { isTokenClassAlias = false; } index++; int start = index; while (", \n\0".indexOf(data.charAt(index)) == -1) { index++; } String value = data.substring(start, index); Token token; if (isTokenClassAlias) { token = new AliasToken(value, mark, mark); } else { token = new AnchorToken(value, mark, mark); } return token; } private Token scanTag() { index++; int start = index; while (" \n\0".indexOf(data.charAt(index)) == -1) { index++; } String value = data.substring(start, index); if (value.length() == 0) { value = "!"; } else if (value.charAt(0) == '!') { value = Tag.PREFIX + value.substring(1); } else if (value.charAt(0) == '<' && value.charAt(value.length() - 1) == '>') { value = value.substring(1, value.length() - 1); } else { value = "!" + value; } return new TagToken(new TagTuple("", value), mark, mark); } private Token scanScalar() { index++; StringBuilder chunks = new StringBuilder(); int start = index; boolean ignoreSpaces = false; while (data.charAt(index) != '"') { if (data.charAt(index) == '\\') { ignoreSpaces = false; chunks.append(data.substring(start, index)); index++; char ch = data.charAt(index); index++; if (ch == '\n') { ignoreSpaces = true; } else if (QUOTE_CODES.keySet().contains(ch)) { int length = QUOTE_CODES.get(ch); int code = Integer.parseInt(data.substring(index, index + length), 16); chunks.append(String.valueOf((char) code)); index += length; } else { if (!QUOTE_REPLACES.keySet().contains(ch)) { throw new CanonicalException("invalid escape code"); } chunks.append(QUOTE_REPLACES.get(ch)); } start = index; } else if (data.charAt(index) == '\n') { chunks.append(data.substring(start, index)); chunks.append(" "); index++; start = index; ignoreSpaces = true; } else if (ignoreSpaces && data.charAt(index) == ' ') { index++; start = index; } else { ignoreSpaces = false; index++; } } chunks.append(data.substring(start, index)); index++; return new ScalarToken(chunks.toString(), mark, mark, false); } private void findToken() { boolean found = false; while (!found) { while (" \t".indexOf(data.charAt(index)) != -1) { index++; } if (data.charAt(index) == '#') { while (data.charAt(index) != '\n') { index++; } } if (data.charAt(index) == '\n') { index++; } else { found = true; } } } }