package lux.index; import static javax.xml.stream.XMLStreamConstants.CDATA; import static javax.xml.stream.XMLStreamConstants.CHARACTERS; import static javax.xml.stream.XMLStreamConstants.END_ELEMENT; import static javax.xml.stream.XMLStreamConstants.ENTITY_REFERENCE; import static javax.xml.stream.XMLStreamConstants.SPACE; import static javax.xml.stream.XMLStreamConstants.START_DOCUMENT; import static javax.xml.stream.XMLStreamConstants.START_ELEMENT; import java.util.ArrayList; import java.util.Arrays; import javax.xml.stream.XMLStreamReader; /** * * Each path-value is a string of path components, as defined in {@link XmlPathMapper}, followed by * a value token. Element and attribute string values are represented by 8-character hashes of their first N characters. * If the value fits in 8 characters, it is padded with nulls (unicode 0). The hash algorithm is analogous to that used by * java.lang.String, but arithmetic is done with shorts rather than ints, and we keep more of them so that * the likelihood of collisions is very small. * */ public class XPathValueMapper extends XmlPathMapper { public static final int HASH_SIZE = 8; int depth; int[] valueOffsets = new int[16]; char[][] values = new char[16][HASH_SIZE]; char[] attValue = new char[HASH_SIZE]; private MutableString charBuffer = new MutableString(); private ArrayList<char[]> pathValues = new ArrayList<char[]>(); public ArrayList<char[]> getPathValues() { return pathValues; } @Override public void reset () { super.reset(); pathValues.clear(); } @Override public void handleEvent(XMLStreamReader reader, int eventType) { switch (eventType) { case START_DOCUMENT: depth = -1; super.handleEvent(reader, eventType); break; case START_ELEMENT: super.handleEvent(reader, eventType); ++depth; if (depth >= values.length) { growValues(); } valueOffsets[depth] = 0; Arrays.fill(values[depth], '\u0000'); { // append to the currentPath buffer int l = currentPath.length(); for (int i = 0; i < reader.getAttributeCount(); i++) { getEventAttQName (charBuffer, reader, i); currentPath.append(" @").append(charBuffer).append(' '); hashString (reader.getAttributeValue(i).toCharArray(), attValue); currentPath.append(attValue); addValue(); // rewind currentPath currentPath.setLength(l); } } break; case END_ELEMENT: { int l = currentPath.length(); currentPath.append(' '); currentPath.append(values[depth]); addValue(); --depth; currentPath.setLength(l); super.handleEvent(reader, eventType); } break; case CDATA: case SPACE: case CHARACTERS: hashText (reader.getTextCharacters(), reader.getTextStart(), reader.getTextLength()); super.handleEvent(reader, eventType); break; case ENTITY_REFERENCE: hashText (reader.getText()); default: super.handleEvent(reader, eventType); break; } } private void addValue() { char[] value = new char[currentPath.length()]; currentPath.toString().getChars(0, currentPath.length(), value, 0); pathValues.add(value); } public static char[] hashString(char[] value, char[] buf) { Arrays.fill(buf, '\u0000'); for (int i = 0; i < value.length && i < HASH_SIZE; i++) { buf[i % HASH_SIZE] = value[i]; } for (int i = HASH_SIZE; i < value.length; i++) { buf[i % HASH_SIZE] = (char)(buf[i % HASH_SIZE] * 15 + value[i]); } return buf; } private void hashText(final char[] textCharacters, final int textStart, final int textLength) { for (int j = 0; j <= depth; j++) { int k = valueOffsets[j]; for (int i = textStart; i < textLength + textStart; i++) { values[j][k] = (char)(values[j][k] * 15 + textCharacters[i]); if (k == HASH_SIZE - 1) { k = 0; } else { k = (k + 1); } } valueOffsets[j] = k; } } private void hashText(final String text) { hashText (text.toCharArray(), 0, text.length()); } private void growValues () { values = Arrays.copyOf(values, values.length + 16); for (int i = 0; i < 16; i++) { values[i + values.length] = new char[HASH_SIZE]; } valueOffsets = Arrays.copyOf(valueOffsets, valueOffsets.length + 16); } } /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at http://mozilla.org/MPL/2.0/. */