/*
* JBoss, Home of Professional Open Source.
* Copyright 2012, Red Hat, Inc., and individual contributors
* as indicated by the @author tags. See the copyright.txt file in the
* distribution for a full listing of individual contributors.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
package org.exoplatform.commons.xml;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.HashMap;
import java.util.Map;
import org.xml.sax.SAXException;
/**
* Parses XML 1.0 and XML 1.1 declarations. This class can be used in a situation where the actual encoding of an XML document
* is known but the encoding stated in the XML declaration of the given XML file needs to be determined, e.g. if it is necessary
* to find out if the declared encoding is the same as the actual encoding.
*
* Usage Example:
* <code>new XMLDeclarationParser("<?xml version=\"1.0\" encoding=\"UTF-8\"?>").parse().get(XMLDeclarationParser.ENCODING)</code>
* returns <code>"UTF-8"</code>
*
* @author ppalaga@redhat.com
*
*/
public class XMLDeclarationParser {
public static final char APOS = '\'';
public static final char CR = '\r';
public static final char EQ = '=';
public static final char GT = '>';
public static final char LF = '\n';
public static final char LT = '<';
public static final char QUESTION_MARK = '?';
public static final char QUOT = '"';
public static final char SPACE = ' ';
public static final char TAB = '\t';
public static final String ENCODING = "encoding";
public static final String STANDALONE = "standalone";
public static final String VERSION = "version";
public static final String XML = "xml";
private static final int INVALID = -1;
private boolean atEndOfInput = false;
private Map<String, String> attributes = new HashMap<String, String>(4);
private StringBuilder charBuffer = new StringBuilder(16);
private int currentChar = INVALID;
private Reader in;
private int pos = 0;
public XMLDeclarationParser(Reader in) {
super();
this.in = in;
}
public XMLDeclarationParser(String xml) {
this(new StringReader(xml));
}
private void consumeOptionalWhiteSpace() throws IOException {
while (true && !atEndOfInput) {
int ch = current();
switch (ch) {
case SPACE:
case TAB:
case CR:
case LF:
next();
break;
default:
return;
}
}
}
private int current() throws IOException {
if (currentChar < 0) {
next();
}
return currentChar;
}
private void ensureNotEndOfInput() throws SAXException, IOException {
if (current() < 0) {
throw new SAXException("Unexpected end of input.");
}
}
private String key() throws IOException {
charBuffer.setLength(0);
ENDOFKEY: while (true && !atEndOfInput) {
int ch = current();
switch (ch) {
case EQ:
case SPACE:
case TAB:
case CR:
case LF:
break ENDOFKEY;
default:
charBuffer.append((char) ch);
next();
}
}
return charBuffer.toString();
}
private void keyVal() throws IOException, SAXException {
String key = key();
consumeOptionalWhiteSpace();
match(EQ);
consumeOptionalWhiteSpace();
String value = value();
attributes.put(key, value);
consumeOptionalWhiteSpace();
}
private void match(char toMatch) throws SAXException, IOException {
ensureNotEndOfInput();
int ch = current();
if (ch != toMatch) {
throw new SAXException("Unexpected character '" + (char) ch + "' at position " + pos + "; expected '" + toMatch
+ "'.");
}
next();
}
private void match(String toMatch) throws SAXException, IOException {
for (int i = 0; i < toMatch.length(); i++) {
match(toMatch.charAt(i));
}
}
private void matchWhiteSpace() throws IOException, SAXException {
ensureNotEndOfInput();
int ch = current();
switch (ch) {
case SPACE:
case TAB:
case CR:
case LF:
next();
break;
default:
throw new SAXException("Whitespace expected at postion " + pos + " of an XML declaration.");
}
consumeOptionalWhiteSpace();
}
private int next() throws IOException {
if (!atEndOfInput) {
currentChar = in.read();
pos++;
if (currentChar < 0) {
atEndOfInput = true;
}
}
return currentChar;
}
public Map<String, String> parse() throws SAXException, IOException {
match(LT);
match(QUESTION_MARK);
match(XML);
matchWhiteSpace();
while (current() != QUESTION_MARK && !atEndOfInput) {
keyVal();
}
match(QUESTION_MARK);
match(GT);
return attributes;
}
private String value() throws IOException, SAXException {
ensureNotEndOfInput();
int quote = current();
switch (quote) {
case QUOT:
case APOS:
next();
break;
default:
throw new SAXException("Unexpected character '" + (char) quote + "' at position " + pos + "; expected '" + QUOT
+ "' or '" + APOS + "'.");
}
charBuffer.setLength(0);
ENDOFLITERAL: while (true && !atEndOfInput) {
int ch = current();
if (ch == quote) {
next();
break ENDOFLITERAL;
} else {
charBuffer.append((char) ch);
next();
}
}
return charBuffer.toString();
}
}