/*
* Copyright (C) 2002-2011 XimpleWare, info@ximpleware.com
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
/*
*
* This class is created to update VTDNav's implementation with
* a more thread safe version
*/
package com.ximpleware;
/**
* The VTD Navigator allows one to navigate XML document represented in VTD
* records and Location caches. There is one and only one cursor that you can
* navigate to any part of the tree. If a method operating on a node doesn't
* accept the node as input, by default it refers to the cursor element. The
* hierarchy consists entirely of elements.
*/
public class VTDNavEx {
// Navigation directions
public final static int ROOT = 0;
public final static int PARENT = 1;
public final static int FIRST_CHILD = 2;
public final static int LAST_CHILD = 3;
public final static int NEXT_SIBLING = 4;
public final static int PREV_SIBLING = 5;
// Navigation directions
public final static int R = 0;
public final static int P = 1;
public final static int FC = 2;
public final static int LC = 3;
public final static int NS = 4;
public final static int PS = 5;
// token type definitions
public final static int TOKEN_STARTING_TAG = 0;
public final static int TOKEN_ENDING_TAG = 1;
public final static int TOKEN_ATTR_NAME = 2;
public final static int TOKEN_ATTR_NS = 3;
public final static int TOKEN_ATTR_VAL = 4;
public final static int TOKEN_CHARACTER_DATA = 5;
public final static int TOKEN_COMMENT = 6;
public final static int TOKEN_PI_NAME = 7;
public final static int TOKEN_PI_VAL = 8;
public final static int TOKEN_DEC_ATTR_NAME = 9;
public final static int TOKEN_DEC_ATTR_VAL = 10;
public final static int TOKEN_CDATA_VAL = 11;
public final static int TOKEN_DTD_VAL = 12;
public final static int TOKEN_DOCUMENT = 13;
// encoding format definition here
public final static int FORMAT_UTF8 = 2;
public final static int FORMAT_ASCII = 0;
public final static int FORMAT_ISO_8859_1 = 1;
public final static int FORMAT_ISO_8859_2 = 3;
public final static int FORMAT_ISO_8859_3 = 4;
public final static int FORMAT_ISO_8859_4 = 5;
public final static int FORMAT_ISO_8859_5 = 6;
public final static int FORMAT_ISO_8859_6 = 7;
public final static int FORMAT_ISO_8859_7 = 8;
public final static int FORMAT_ISO_8859_8 = 9;
public final static int FORMAT_ISO_8859_9 = 10;
public final static int FORMAT_ISO_8859_10 = 11;
public final static int FORMAT_ISO_8859_11 = 12;
public final static int FORMAT_ISO_8859_12 = 13;
public final static int FORMAT_ISO_8859_13 = 14;
public final static int FORMAT_ISO_8859_14 = 15;
public final static int FORMAT_ISO_8859_15 = 16;
public final static int FORMAT_ISO_8859_16 = 17;
public final static int FORMAT_WIN_1250 = 18;
public final static int FORMAT_WIN_1251 = 19;
public final static int FORMAT_WIN_1252 = 20;
public final static int FORMAT_WIN_1253 = 21;
public final static int FORMAT_WIN_1254 = 22;
public final static int FORMAT_WIN_1255 = 23;
public final static int FORMAT_WIN_1256 = 24;
public final static int FORMAT_WIN_1257 = 25;
public final static int FORMAT_WIN_1258 = 26;
public final static int FORMAT_UTF_16LE = 64;
public final static int FORMAT_UTF_16BE = 63;
// String style
public final static int STRING_RAW = 0;
public final static int STRING_REGULAR = 1;
public final static int STRING_NORMALIZED = 2;
// masks for obtaining various fields from a VTD token
protected final static long MASK_TOKEN_FULL_LEN = 0x000fffff00000000L;
protected final static long MASK_TOKEN_PRE_LEN = 0x000ff80000000000L;
protected final static long MASK_TOKEN_QN_LEN = 0x000007ff00000000L;
protected static long MASK_TOKEN_OFFSET = 0x000000003fffffffL;
protected final static long MASK_TOKEN_TYPE = 0xf000000000000000L;
protected final static long MASK_TOKEN_DEPTH = 0x0ff0000000000000L;
// tri-state variable for namespace lookup
protected final static long MASK_TOKEN_NS_MARK = 0x00000000c0000000L;
protected short maxLCDepthPlusOne = 4;
protected int rootIndex; // where the root element is at
protected int nestingLevel;
protected int[] context; // main navigation tracker aka context object
protected boolean atTerminal; // this variable is to make vn compatible with
// xpath's data model
// location cache part
protected int l2upper;
protected int l2lower;
protected int l3upper;
protected int l3lower;
protected int l2index;
protected int l3index;
protected int l1index;
// containers
protected FastLongBuffer vtdBuffer;
protected FastLongBuffer l1Buffer;
protected FastLongBuffer l2Buffer;
protected FastIntBuffer l3Buffer;
protected char[] XMLDoc;
// private int recentNS; // most recently visited NS node, experiment for
// now
// Hierarchical representation is an array of integers addressing elements
// tokens
protected ContextBuffer contextStack;
protected ContextBuffer contextStack2;// this is reserved for XPath
protected int LN; // record txt and attrbute for XPath eval purposes
// the document encoding
protected int encoding;
// protected boolean writeOffsetAdjustment;
// for string to token comparison
// protected int currentOffset;
// protected int currentOffset2;
// whether the navigation is namespace enabled or not.
protected boolean ns;
// intermediate buffer for push and pop purposes
protected int[] stackTemp;
protected int docOffset;
// length of the document
protected int docLen;
protected int vtdSize; // vtd record count
protected String name;
protected int nameIndex;
protected String localName;
protected int localNameIndex;
protected FastIntBuffer fib;// for store string value
protected boolean shallowDepth;
protected String URIName;
protected VTDNavEx() {
}
/**
* Initialize the VTD navigation object.
*
* @param RootIndex
* int
* @param maxDepth
* int
* @param encoding
* int
* @param NS
* boolean
* @param x
* byte[]
* @param vtd
* com.ximpleware.ILongBuffer
* @param l1
* com.ximpleware.ILongBuffer
* @param l2
* com.ximpleware.ILongBuffer
* @param l3
* com.ximpleware.IIntBuffer
* @param so
* int starting offset of the document(in byte)
* @param length
* int length of the document (in byte)
*/
protected VTDNavEx(final int RootIndex, final int enc, final boolean NS, final int depth, final char[] x,
final FastLongBuffer vtd, final FastLongBuffer l1, final FastLongBuffer l2, final FastIntBuffer l3,
final int so, // start offset of the starting offset(in byte)
final int length) // lengnth of the XML document (in byte))
{
// initialize all buffers
if (l1 == null || l2 == null || l3 == null || vtd == null || x == null || depth < 0 || RootIndex < 0 // ||
// encoding
// <=
// FORMAT_UTF8
// || encoding >= FORMAT_ISO_8859_1
|| so < 0 || length < 0) {
throw new IllegalArgumentException();
}
l1Buffer = l1;
l2Buffer = l2;
l3Buffer = l3;
vtdBuffer = vtd;
XMLDoc = x;
encoding = enc;
// System.out.println("encoding " + encoding);
rootIndex = RootIndex;
nestingLevel = depth + 1;
ns = NS; // namespace aware or not
if (ns == false) {
MASK_TOKEN_OFFSET = 0x000000007fffffffL; // this allows xml size to
// be 2GB
} else {
// if there is no namespace
MASK_TOKEN_OFFSET = 0x000000003fffffffL;
}
atTerminal = false; // this variable will only change value during XPath
// eval
// initialize the context object
this.context = new int[nestingLevel];
// depth value is the first entry in the context because root is
// singular.
context[0] = 0;
// set the value to zero
for (int i = 1; i < nestingLevel; i++) {
context[i] = -1;
}
// currentOffset = 0;
// contextStack = new ContextBuffer(1024, nestingLevel + 7);
contextStack = new ContextBuffer(10, nestingLevel + 9);
contextStack2 = new ContextBuffer(10, nestingLevel + 9);
stackTemp = new int[nestingLevel + 9];
// initial state of LC variables
l1index = l2index = l3index = -1;
l2lower = l3lower = -1;
l2upper = l3upper = -1;
docOffset = so;
docLen = length;
// System.out.println("offset " + offset + " length " + length);
// printL2Buffer();
vtdSize = vtd.size;
// writeOffsetAdjustment = false;
// recentNS = -1;
name = null;
nameIndex = -1;
localName = null;
localNameIndex = -1;
fib = new FastIntBuffer(5); // page size is 32 ints
shallowDepth = true;
}
/**
* Get the depth (>=0) of the current element. Creation date: (11/16/03
* 6:58:22 PM)
*
* @return int
*/
final public int getCurrentDepth() {
return context[0];
}
/**
* Get the index value of the current element. Creation date: (11/16/03
* 6:40:25 PM)
*
* @return int
*/
final public int getCurrentIndex() {
if (atTerminal) {
return LN;
}
return getCurrentIndex2();
// return (context[0] == 0) ? rootIndex : context[context[0]];
}
// this one is used in iterAttr() in autoPilot
final protected int getCurrentIndex2() {
switch (context[0]) {
case -1:
return 0;
case 0:
return rootIndex;
default:
return context[context[0]];
}
}
/**
* Get the encoding of the XML document.
*
* @return int
*/
final public int getEncoding() {
return encoding;
}
/**
* Get the maximum nesting depth of the XML document (>0). max depth is
* nestingLevel -1
*
* @return int
*/
final public int getNestingLevel() {
return nestingLevel;
}
/**
* Get root index value , which is the index val of root element
*
* @return int
*/
final public int getRootIndex() {
return rootIndex;
}
/**
* Get total number of VTD tokens for the current XML document.
*
* @return int
*/
final public int getTokenCount() {
return vtdSize;
}
/**
* Get the depth value of a token (>=0).
*
* @return int
* @param index
* int
*/
final public int getTokenDepth(final int index) {
final int i = (int) ((vtdBuffer.longAt(index) & MASK_TOKEN_DEPTH) >> 52);
if (i != 255) {
return i;
}
return -1;
}
/**
* Get the token length at the given index value please refer to VTD spec
* for more details Length is in terms of the UTF char unit For prefixed
* tokens, it is the qualified name length. When ns is not enabled, return
* the full name length for attribute name and element name When ns is
* enabled, return an int with upper 16 bit for prefix length, lower 16 bit
* for qname length
*
* @return int
* @param index
* int
*/
public int getTokenLength(int index) {
final int type = getTokenType(index);
int depth;
int len = 0;
long l;
int temp = 0;
switch (type) {
case TOKEN_ATTR_NAME:
case TOKEN_ATTR_NS:
case TOKEN_STARTING_TAG:
l = vtdBuffer.longAt(index);
return (ns == false) ? (int) ((l & MASK_TOKEN_QN_LEN) >> 32)
: ((int) ((l & MASK_TOKEN_QN_LEN) >> 32) | ((int) ((l & MASK_TOKEN_PRE_LEN) >> 32) << 5));
case TOKEN_CHARACTER_DATA:
case TOKEN_CDATA_VAL:
case TOKEN_COMMENT: // make sure this is total length
depth = getTokenDepth(index);
do {
len = len + (int) ((vtdBuffer.longAt(index) & MASK_TOKEN_FULL_LEN) >> 32);
temp = getTokenOffset(index) + (int) ((vtdBuffer.longAt(index) & MASK_TOKEN_FULL_LEN) >> 32);
index++;
} while (index < vtdSize && depth == getTokenDepth(index) && type == getTokenType(index)
&& temp == getTokenOffset(index));
// if (int k=0)
return len;
default:
return (int) ((vtdBuffer.longAt(index) & MASK_TOKEN_FULL_LEN) >> 32);
}
}
/**
* Get the starting offset (unit in native char) of the token at the given
* index.
*
* @return int
* @param index
* int
*/
final public int getTokenOffset(final int index) {
// return (context[0] != 0)
// ? (int) (vtdBuffer.longAt(context[context[0]]) & MASK_TOKEN_OFFSET)
// : (int) (vtdBuffer.longAt(rootIndex) & MASK_TOKEN_OFFSET);
return (int) (vtdBuffer.longAt(index) & MASK_TOKEN_OFFSET);
}
/**
* Get the token type of the token at the given index value. Creation date:
* (11/16/03 6:41:51 PM)
*
* @return int
* @param index
* int
*/
final public int getTokenType(final int index) {
return (int) ((vtdBuffer.longAt(index) & MASK_TOKEN_TYPE) >> 60) & 0xf;
}
/**
* Load the context info from ContextBuffer. Info saved including LC and
* current state of the context
*
* @return boolean
*
*/
public boolean pop() {
final boolean b = contextStack.load(stackTemp);
if (b == false) {
return false;
}
for (int i = 0; i < nestingLevel; i++) {
context[i] = stackTemp[i];
}
l1index = stackTemp[nestingLevel];
l2index = stackTemp[nestingLevel + 1];
l3index = stackTemp[nestingLevel + 2];
l2lower = stackTemp[nestingLevel + 3];
l2upper = stackTemp[nestingLevel + 4];
l3lower = stackTemp[nestingLevel + 5];
l3upper = stackTemp[nestingLevel + 6];
atTerminal = (stackTemp[nestingLevel + 7] == 1);
LN = stackTemp[nestingLevel + 8];
return true;
}
/**
* Store the context info into the ContextBuffer. Info saved including LC
* and current state of the context Creation date: (11/16/03 7:00:27 PM)
*/
public void push() {
for (int i = 0; i < nestingLevel; i++) {
stackTemp[i] = context[i];
}
stackTemp[nestingLevel] = l1index;
stackTemp[nestingLevel + 1] = l2index;
stackTemp[nestingLevel + 2] = l3index;
stackTemp[nestingLevel + 3] = l2lower;
stackTemp[nestingLevel + 4] = l2upper;
stackTemp[nestingLevel + 5] = l3lower;
stackTemp[nestingLevel + 6] = l3upper;
if (atTerminal) {
stackTemp[nestingLevel + 7] = 1;
} else {
stackTemp[nestingLevel + 7] = 0;
}
stackTemp[nestingLevel + 8] = LN;
contextStack.store(stackTemp);
}
/**
* A generic navigation method. Move the cursor to the element according to
* the direction constants If no such element, no position change and return
* false. Creation date: (12/2/03 1:43:50 PM) Legal direction constants are
*
* <pre>
* ROOT 0
* </pre>
*
* <pre>
* PARENT 1
* </pre>
*
* <pre>
* FIRST_CHILD 2
* </pre>
*
* <pre>
* LAST_CHILD 3
* </pre>
*
* <pre>
* NEXT_SIBLING 4
* </pre>
*
* <pre>
* PREV_SIBLING 5
* </pre>
*
* @return boolean
* @param direction
* int
* @exception com.ximpleware.NavException
* When direction value is illegal.
*/
public boolean toElement(final int direction) throws NavException {
int size;
switch (direction) {
case ROOT: // to document element!
if (context[0] != 0) {
/*
* for (int i = 1; i <= context[0]; i++) { context[i] =
* 0xffffffff; }
*/
context[0] = 0;
}
atTerminal = false;
l1index = l2index = l3index = -1;
return true;
case PARENT:
if (atTerminal == true) {
atTerminal = false;
return true;
}
if (context[0] > 0) {
// context[context[0]] = context[context[0] + 1] =
// 0xffffffff;
context[context[0]] = -1;
context[0]--;
return true;
} else if (context[0] == 0) {
context[0] = -1; // to be compatible with XPath Data model
return true;
} else {
return false;
}
case FIRST_CHILD:
case LAST_CHILD:
if (atTerminal) {
return false;
}
switch (context[0]) {
case -1:
context[0] = 0;
return true;
case 0:
if (l1Buffer.size > 0) {
context[0] = 1;
l1index = (direction == FIRST_CHILD) ? 0 : (l1Buffer.size - 1);
context[1] = l1Buffer.upper32At(l1index);
// (int) (vtdToken >> 32);
return true;
} else {
return false;
}
case 1:
l2lower = l1Buffer.lower32At(l1index);
if (l2lower == -1) {
return false;
}
context[0] = 2;
l2upper = l2Buffer.size - 1;
size = l1Buffer.size;
for (int i = l1index + 1; i < size; i++) {
final int temp = l1Buffer.lower32At(i);
if (temp != 0xffffffff) {
l2upper = temp - 1;
break;
}
}
// System.out.println(" l2 upper: " + l2upper + " l2
// lower : " + l2lower);
l2index = (direction == FIRST_CHILD) ? l2lower : l2upper;
context[2] = l2Buffer.upper32At(l2index);
return true;
case 2:
l3lower = l2Buffer.lower32At(l2index);
if (l3lower == -1) {
return false;
}
context[0] = 3;
l3upper = l3Buffer.size - 1;
size = l2Buffer.size;
for (int i = l2index + 1; i < size; i++) {
final int temp = l2Buffer.lower32At(i);
if (temp != 0xffffffff) {
l3upper = temp - 1;
break;
}
}
// System.out.println(" l3 upper : " + l3upper + " l3
// lower : " + l3lower);
l3index = (direction == FIRST_CHILD) ? l3lower : l3upper;
context[3] = l3Buffer.intAt(l3index);
return true;
default:
if (direction == FIRST_CHILD) {
size = vtdBuffer.size;
int index = context[context[0]] + 1;
while (index < size) {
final long temp = vtdBuffer.longAt(index);
final int token_type = (int) ((MASK_TOKEN_TYPE & temp) >> 60) & 0xf;
if (token_type == TOKEN_STARTING_TAG) {
final int depth = (int) ((MASK_TOKEN_DEPTH & temp) >> 52);
if (depth <= context[0]) {
return false;
} else if (depth == (context[0] + 1)) {
context[0] += 1;
context[context[0]] = index;
return true;
}
}
index++;
} // what condition
return false;
} else {
int index = context[context[0]] + 1;
int last_index = -1;
size = vtdBuffer.size;
while (index < size) {
final long temp = vtdBuffer.longAt(index);
final int depth = (int) ((MASK_TOKEN_DEPTH & temp) >> 52);
final int token_type = (int) ((MASK_TOKEN_TYPE & temp) >> 60) & 0xf;
if (token_type == TOKEN_STARTING_TAG) {
if (depth <= context[0]) {
break;
} else if (depth == (context[0] + 1)) {
last_index = index;
}
}
index++;
}
if (last_index == -1) {
return false;
} else {
context[0] += 1;
context[context[0]] = last_index;
return true;
}
}
}
case NEXT_SIBLING:
case PREV_SIBLING:
if (atTerminal) {
return false;
}
switch (context[0]) {
case -1:
case 0:
return false;
case 1:
if (direction == NEXT_SIBLING) {
if (l1index + 1 >= l1Buffer.size) {
return false;
}
l1index++; // global incremental
} else {
if (l1index - 1 < 0) {
return false;
}
l1index--; // global incremental
}
context[1] = l1Buffer.upper32At(l1index);
return true;
case 2:
if (direction == NEXT_SIBLING) {
if (l2index + 1 > l2upper) {
return false;
}
l2index++;
} else {
if (l2index - 1 < l2lower) {
return false;
}
l2index--;
}
context[2] = l2Buffer.upper32At(l2index);
return true;
case 3:
if (direction == NEXT_SIBLING) {
if (l3index + 1 > l3upper) {
return false;
}
l3index++;
} else {
if (l3index - 1 < l3lower) {
return false;
}
l3index--;
}
context[3] = l3Buffer.intAt(l3index);
return true;
default:
// int index = context[context[0]] + 1;
if (direction == NEXT_SIBLING) {
int index = context[context[0]] + 1;
size = vtdBuffer.size;
while (index < size) {
final long temp = vtdBuffer.longAt(index);
final int token_type = (int) ((MASK_TOKEN_TYPE & temp) >> 60) & 0xf;
if (token_type == TOKEN_STARTING_TAG) {
final int depth = (int) ((MASK_TOKEN_DEPTH & temp) >> 52);
if (depth < context[0]) {
return false;
} else if (depth == (context[0])) {
context[context[0]] = index;
return true;
}
}
index++;
}
return false;
} else {
int index = context[context[0]] - 1;
while (index > context[context[0] - 1]) {
// scan backforward
final long temp = vtdBuffer.longAt(index);
final int token_type = (int) ((MASK_TOKEN_TYPE & temp) >> 60) & 0xf;
if (token_type == TOKEN_STARTING_TAG) {
final int depth = (int) ((MASK_TOKEN_DEPTH & temp) >> 52);
/*
* if (depth < context[0]) { return false; }
* else
*/
if (depth == (context[0])) {
context[context[0]] = index;
return true;
}
}
index--;
} // what condition
return false;
}
}
default:
throw new NavException("illegal navigation options");
}
}
final public char[] toRawString(final int index, final int[] range) throws NavException {
final int type = getTokenType(index);
range[0] = getTokenOffset(index);
if (type == TOKEN_STARTING_TAG || type == TOKEN_ATTR_NAME || type == TOKEN_ATTR_NS) {
range[1] = getTokenLength(index) & 0xffff;
} else {
range[1] = getTokenLength(index);
}
return XMLDoc;
}
}