/*******************************************************************************
* Copyright (c) 2006-2010 eBay Inc. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*******************************************************************************/
package org.ebayopensource.turmeric.runtime.binding.impl.parser.nv;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.Charset;
import java.util.Map;
import javax.xml.stream.XMLStreamException;
import org.ebayopensource.turmeric.runtime.binding.impl.parser.BaseXMLStreamReader;
import org.ebayopensource.turmeric.runtime.binding.impl.parser.NamespaceConvention;
import org.ebayopensource.turmeric.runtime.binding.utils.URLDecoderInputStream;
/**
* @author ichernyshev
*/
public final class NVStreamParser {
private final static char NO_CHAR = (char)-1;
private final static char[] DELIMITERS = new char[128];
private final static char[] VALID_CHARS = new char[128];
private final static char[] NSPREFIX_DEF;
private final static int NSPREFIX_DEF_CHECKSUM;
private final Reader m_reader;
private final char[] m_buf;
private int m_position;
private int m_size;
private final NamespaceConvention m_convention;
private int m_pathLen = -1;
private NVElementHolder[] m_path = new NVElementHolder[8];
private char[] m_pathDataBuffer = new char[256];
private StringBuilder m_value = new StringBuilder();
private int m_nsCount;
private String m_singleNamespace;
private int m_valueStart;
private int m_valueEnd;
private char[] m_unreadBuffer = {NO_CHAR, NO_CHAR};
private boolean m_decoded = false;
private boolean m_doubleQuoteDelimited = true;
public NVStreamParser(InputStream is, Charset charset, NamespaceConvention convention) {
m_buf = new char[10* 1024];
m_reader = new InputStreamReader(new URLDecoderInputStream(is), charset);
m_convention = convention;
}
public NVStreamParser(InputStream is, Charset charset, NamespaceConvention convention,
boolean doubleQuoteDelimited) {
this(is, charset, convention);
m_doubleQuoteDelimited = doubleQuoteDelimited;
}
public NVStreamParser(InputStream is, Charset charset, NamespaceConvention convention, Map<String, String> options) {
this(is, charset, convention);
setupOptions(options);
}
private void setupOptions(Map<String, String> options) {
if(options == null) {
return;
}
String s = options.get(BaseXMLStreamReader.KEY_DOUBLE_QUOTE_DELIMITED);
if( s != null ) {
m_doubleQuoteDelimited = Boolean.parseBoolean(s);
}
}
private void fillBuffer() throws IOException {
if (m_reader == null) {
// payload was provided as string
m_position = -1;
m_size = -1;
return;
}
m_position = 0;
m_size = m_reader.read(m_buf);
if (m_size == 0 || m_size == -1) {
m_position = -1;
m_size = -1;
}
}
private char readPlainCharacter() throws IOException {
char ch = m_unreadBuffer[0];
if (m_unreadBuffer[0] != NO_CHAR) {
m_unreadBuffer[0] = m_unreadBuffer[1];
m_unreadBuffer[1] = NO_CHAR;
return ch;
}
if (m_position == m_size) {
fillBuffer();
if (m_size == -1) {
return NO_CHAR;
}
}
return m_buf[m_position++];
}
private char read() throws IOException {
char c = readPlainCharacter();
// Decoder '&'
if (c != '%') {
return c;
}
char ch1 = readPlainCharacter();
char ch2 = readPlainCharacter();
if (ch1 == '2' && ch2 == '6') {
m_decoded = true;
return '&';
}
if (ch1 == '2' && ch2 == '5') {
m_decoded = true;
return '%';
}
m_unreadBuffer[0] = ch1;
m_unreadBuffer[1] = ch2;
return c;
}
public boolean parseLine() throws XMLStreamException {
m_pathLen = -1;
// parse line
// if it's end of file, return false
// if it's namespace prefix, remember that prefix and parse next line
// if it's normal line, parse it completely
try {
int pathLen = 0;
int pathDataBuffPos = 0;
boolean isInName = true;
boolean isInIndex = false;
boolean isInValue = false;
char indexLeftBracket = 0;
int lastDataStart = 0;
int lastDataChecksum = 0;
boolean hasElemData = false;
NVElementHolder currElem = getPathElem(0);
char[] pathDataBuffer = m_pathDataBuffer;
int pathDataBufferLen = pathDataBuffer.length;
m_value.setLength(0);
while (true) {
char c = (char) read();
// keep this most-executed block close to the beginning to shorten jumps
if (isInValue) {
if ( (m_decoded || c != '&') && c != NO_CHAR) {
m_decoded = false;
// this is not a delimiter, add to buffer
m_value.append(c);
continue;
}
} else {
m_decoded = false;
if (DELIMITERS[c & 0x7F] != c) {
// this is not a delimiter, add to buffer
if (pathDataBuffPos >= pathDataBufferLen) {
expandPathDataBuffer();
pathDataBuffer = m_pathDataBuffer;
pathDataBufferLen = pathDataBuffer.length;
}
pathDataBuffer[pathDataBuffPos++] = c;
lastDataChecksum ^= c;
hasElemData = true;
continue;
}
}
if (isInValue) {
// this is the end of the value
NVElementHolder firstElem = m_path[0];
if (firstElem.m_prefixChecksum == NSPREFIX_DEF_CHECKSUM &&
isSameText(firstElem.m_prefix, firstElem.m_prefixStart,
firstElem.m_prefixLength, NSPREFIX_DEF))
{
// this was NS prefix definition
if (pathLen != 1) {
throw new XMLStreamException(
"NS Prefix cannot have multple names in the path");
}
/* Now that, extra NV pair for header and other usage can get in front of the real NV payload
* because we don't honor the REST_PAYLOAD delimiter anymore. We should not throw this exception.
* if (m_hadDataLines) {
throw new XMLStreamException(
"NS Prefix cannot be added after data elements");
}
*/
updateValuePos();
addNsPrefixDef(firstElem);
} else {
// finished reading a line
updateValuePos();
// m_hadDataLines = true;
m_pathLen = pathLen;
validateLine();
return true;
}
// continue as if there was no this line
pathLen = 0;
pathDataBuffPos = 0;
lastDataStart = 0;
lastDataChecksum = 0;
isInValue = false;
isInName = true;
hasElemData = false;
currElem = getPathElem(0);
m_value.setLength(0);
continue;
}
if (c == '=' || c == '.') {
if (isInIndex) {
throw new XMLStreamException("Unexpected symbol '" + c +
"' found inside element indexing data");
}
if (isInName) {
// end of path element name
if (lastDataStart >= pathDataBuffPos) {
throw new XMLStreamException("Unexpected empty element name");
}
currElem.m_elemName = pathDataBuffer;
currElem.m_elemNameStart = lastDataStart;
currElem.m_elemNameLength = pathDataBuffPos - lastDataStart;
currElem.m_elemNameChecksum = lastDataChecksum;
lastDataStart = pathDataBuffPos;
lastDataChecksum = 0;
adjustAttributeName(currElem);
}
pathLen++;
if (c == '.') {
isInName = true;
currElem = getPathElem(pathLen);
continue;
}
// move into the value mode
isInName = false;
isInValue = true;
continue;
}
if (c == NO_CHAR || c == '&') {
if (isInIndex) {
throw new XMLStreamException(
"Indexing data has terminated unexpectedly with the end of stream");
}
if (hasElemData) {
throw new XMLStreamException(
"Name data has terminated unexpectedly with the end of line");
}
if (c == NO_CHAR) {
return false;
}
// continue as if there was no this line
pathLen = 0;
pathDataBuffPos = 0;
lastDataStart = 0;
lastDataChecksum = 0;
isInValue = false;
isInName = true;
hasElemData = false;
currElem = getPathElem(0);
m_value.setLength(0);
continue;
}
if (c == ':') {
if (isInIndex) {
throw new XMLStreamException("Unexpected symbol '" + c +
"' found inside element indexing data");
}
if (!isInName) {
throw new XMLStreamException("Namespace prefix is not expected after element name");
}
if (lastDataStart >= pathDataBuffPos) {
throw new XMLStreamException("Unexpected empty namespace prefix data");
}
currElem.m_prefix = pathDataBuffer;
currElem.m_prefixStart = lastDataStart;
currElem.m_prefixLength = pathDataBuffPos - lastDataStart;
currElem.m_prefixChecksum = lastDataChecksum;
lastDataStart = pathDataBuffPos;
lastDataChecksum = 0;
continue;
}
if (c == '[' || c == '(') {
if (isInIndex) {
throw new XMLStreamException("Unexpected symbol '" + c +
"' found inside element indexing data");
}
if (!isInName) {
throw new XMLStreamException("Unexpected symbol '" + c +
"' found outside of element name");
}
if (lastDataStart >= pathDataBuffPos) {
throw new XMLStreamException("Unexpected empty element name");
}
currElem.m_elemName = pathDataBuffer;
currElem.m_elemNameStart = lastDataStart;
currElem.m_elemNameLength = pathDataBuffPos - lastDataStart;
currElem.m_elemNameChecksum = lastDataChecksum;
lastDataStart = pathDataBuffPos;
lastDataChecksum = 0;
adjustAttributeName(currElem);
isInName = false;
isInIndex = true;
indexLeftBracket = c;
continue;
}
if (c == ']' || c == ')') {
if (!isInIndex ||
(c == ']' && indexLeftBracket != '[') ||
(c == ')' && indexLeftBracket != '('))
{
throw new XMLStreamException("Unexpected symbol '" + c +
"' found outside element indexing data");
}
if (isInName) {
throw new XMLStreamException("Unexpected symbol '" + c +
"' found inside of element name");
}
if (lastDataStart >= pathDataBuffPos) {
throw new XMLStreamException("Unexpected empty indexing data");
}
isInIndex = false;
currElem.m_index = parseIndex(lastDataStart, pathDataBuffPos);
lastDataStart = pathDataBuffPos;
lastDataChecksum = 0;
continue;
}
throw new XMLStreamException("Unexpected delimiter '" + c + "' found");
}
} catch (IOException ioe) {
throw new XMLStreamException(ioe);
}
}
public String getNsUriByPrefix(String prefix) {
return m_convention.getNamespaceURI(prefix);
}
public String getNsUriForElementHolder(NVStreamParser.NVElementHolder holder,
boolean allowNsDefaults, String impliedRootNs)
throws XMLStreamException
{
if (holder.m_prefixLength == 0) {
if (allowNsDefaults) {
if (m_singleNamespace != null) {
return m_singleNamespace;
}
String singleNamespace = m_convention.getSingleNamespace();
if (singleNamespace != null) {
return singleNamespace;
}
if (impliedRootNs != null) {
return impliedRootNs;
}
}
return "";
}
String prefix = new String(holder.m_prefix,
holder.m_prefixStart, holder.m_prefixLength);
String nsUri = getNsUriByPrefix(prefix);
if (nsUri == null) {
throw new XMLStreamException("Undefined namespace prefix " + prefix);
}
return nsUri;
}
public NVPathPart buildPathPart(NVStreamParser.NVElementHolder holder,
boolean allowNsDefaults, String impliedRootNs)
throws XMLStreamException
{
String nsUri = getNsUriForElementHolder(holder, allowNsDefaults, impliedRootNs);
String elemName = new String(holder.m_elemName,
holder.m_elemNameStart, holder.m_elemNameLength);
return new NVPathPart(nsUri, elemName, holder.m_index,
holder.m_isAttribute, holder.m_elemNameChecksum);
}
private NVElementHolder getPathElem(int pathPos) {
if (pathPos >= m_path.length) {
NVElementHolder[] tmp = new NVElementHolder[m_path.length * 2];
System.arraycopy(m_path, 0, tmp, 0, m_path.length);
m_path = tmp;
}
NVElementHolder result = m_path[pathPos];
if (result == null) {
result = new NVElementHolder();
m_path[pathPos] = result;
} else {
result.m_elemName = null;
result.m_elemNameStart = 0;
result.m_elemNameLength = 0;
result.m_elemNameChecksum = 0;
result.m_prefix = null;
result.m_prefixStart = 0;
result.m_prefixLength = 0;
result.m_prefixChecksum = 0;
result.m_isAttribute = false;
result.m_index = 0;
}
return result;
}
private void adjustAttributeName(NVElementHolder elem) throws XMLStreamException {
if (elem.m_elemName[elem.m_elemNameStart] != '@') {
return;
}
elem.m_elemNameStart++;
elem.m_elemNameLength--;
elem.m_isAttribute = true;
if (elem.m_elemNameLength == 0) {
throw new XMLStreamException("Unexpected empty element attribute name");
}
}
private int parseIndex(int start, int endPos) throws XMLStreamException {
int result = 0;
for (int i=start; i<endPos; i++) {
char c = m_pathDataBuffer[i];
if (c < '0' || c > '9') {
throw new XMLStreamException("Nun-numeric index contains symbol '" + c + "'");
}
int currDigit = c - '0';
result = result * 10 + currDigit;
}
return result;
}
private void expandPathDataBuffer() {
char[] tmp = new char[m_pathDataBuffer.length * 2];
System.arraycopy(m_pathDataBuffer, 0, tmp, 0, m_pathDataBuffer.length);
m_pathDataBuffer = tmp;
// replace all old pointers to free up unused memory
for (int i=0; i<m_path.length; i++) {
NVElementHolder holder = m_path[i];
if (holder == null) {
continue;
}
if (holder.m_prefix != null) {
holder.m_prefix = m_pathDataBuffer;
}
if (holder.m_elemName != null) {
holder.m_elemName = m_pathDataBuffer;
}
}
}
private void validateLine() throws XMLStreamException {
boolean hasAttribute = false;
for (int i=0; i<m_pathLen; i++) {
if (hasAttribute) {
throw new XMLStreamException("Attribute cannot contain child element");
}
NVElementHolder holder = m_path[i];
hasAttribute = holder.m_isAttribute;
if (holder.m_prefixLength > 0) {
validateName(holder.m_prefix, holder.m_prefixStart, holder.m_prefixLength);
}
validateName(holder.m_elemName, holder.m_elemNameStart, holder.m_elemNameLength);
}
}
private void validateName(char[] data, int start, int len) throws XMLStreamException {
int endPos = start + len;
for (int i=start; i<endPos; i++) {
char c = data[i];
if (VALID_CHARS[c & 0x7F] != c) {
throw new XMLStreamException("Invalid symbol '" + c + "' found for element in NV line");
}
}
}
private void addNsPrefixDef(NVElementHolder elem) throws XMLStreamException {
String prefix = new String(elem.m_elemName,
elem.m_elemNameStart, elem.m_elemNameLength);
String nsUri = getValueInternal();
m_convention.addMapping(prefix, nsUri);
m_nsCount++;
if (m_nsCount == 1) {
m_singleNamespace = nsUri;
} else {
m_singleNamespace = null;
}
}
private void updateValuePos() throws XMLStreamException {
int len = m_value.length();
if (m_doubleQuoteDelimited && len > 2 && m_value.charAt(0) == '"') {
if (m_value.charAt(len - 1) != '"') {
throw new XMLStreamException(
"Value starts with quotation mark, but ends without the same");
}
m_valueStart = 1;
m_valueEnd = len - 1;
} else {
m_valueStart = 0;
m_valueEnd = len;
}
}
private String getValueInternal() {
/* String finalSting = m_value.substring(m_valueStart, m_valueEnd);
try {
return new String(finalSting.getBytes("8859_1"), m_charset.displayName());
} catch (Exception e) {
return finalSting;
}
*/
return m_value.substring(m_valueStart, m_valueEnd);
}
private boolean isSameText(char[] data, int start, int length, char[] other) {
if (length != other.length) {
return false;
}
for (int i=0; i<length; i++) {
if (data[start + i] != other[i]) {
return false;
}
}
return true;
}
public int getElementPathLen() {
if (m_pathLen == -1) {
throw new IllegalStateException("No NV line parsed");
}
return m_pathLen;
}
public NVElementHolder[] getElementPath() {
if (m_pathLen == -1) {
throw new IllegalStateException("No NV line parsed");
}
return m_path;
}
public String getValue() {
if (m_pathLen == -1) {
throw new IllegalStateException("No NV line parsed");
}
return getValueInternal();
}
static {
NSPREFIX_DEF = NVConstants.NV_NAMESPACE_DEF_PREFIX.toCharArray();
NSPREFIX_DEF_CHECKSUM = NVPathPart.calcChecksum(NVConstants.NV_NAMESPACE_DEF_PREFIX);
for (int i=0; i<DELIMITERS.length; i++) {
DELIMITERS[i] = NO_CHAR;
}
DELIMITERS[':'] = ':';
DELIMITERS['.'] = '.';
DELIMITERS['('] = '(';
DELIMITERS[')'] = ')';
DELIMITERS['['] = '[';
DELIMITERS[']'] = ']';
DELIMITERS['='] = '=';
DELIMITERS['&'] = '&';
for (int i=0; i<VALID_CHARS.length; i++) {
VALID_CHARS[i] = NO_CHAR;
}
for (char i='0'; i<='9'; i++) {
VALID_CHARS[i] = i;
}
for (char i='a'; i<='z'; i++) {
VALID_CHARS[i] = i;
}
for (char i='A'; i<='Z'; i++) {
VALID_CHARS[i] = i;
}
VALID_CHARS['_'] = '_';
VALID_CHARS['-'] = '-';
}
public static class NVElementHolder {
public char[] m_elemName;
public int m_elemNameStart;
public int m_elemNameLength;
public int m_elemNameChecksum;
public char[] m_prefix;
public int m_prefixStart;
public int m_prefixLength;
public int m_prefixChecksum;
public boolean m_isAttribute;
public int m_index;
public String buildPrefixText() {
if (m_prefixLength == 0) {
return null;
}
return new String(m_prefix, m_prefixStart, m_prefixLength);
}
public String buildElemNameText() {
return new String(m_elemName, m_elemNameStart, m_elemNameLength);
}
}
}