package lux.index;
import static javax.xml.stream.XMLStreamConstants.*;
import java.util.HashMap;
import java.util.Map;
import javax.xml.stream.XMLStreamReader;
import lux.xml.StAXHandler;
import org.apache.commons.lang.StringUtils;
/**
* Accumulate counts of QNames and QName paths.
*
* Each path is a string of path components, separated by single space characters.
* The first path component is always <code>{}</code>. The others are element
* QNames of the form <code>local-name{namespace}</code>, where <code>{namespace}</code> is omitted when the namespace
* is empty. The sequence of element QNames may be followed by a single attribute QName of the form:
* <code>@local-name{namespace}</code>. Namespaces are encoded using URL-encoding so they will not
* contain unexpected characters (such as space and {}).
*
* TODO: a bunch of optimizations are possible here; there is a duplication of work in the subclasses,
* unneeded String creation, etc. Come back and fix that once we've settled on a definite implementation!
*/
public class XmlPathMapper implements StAXHandler {
protected MutableString currentPath = new MutableString(2048);
private MutableString currentQName = new MutableString();
private HashMap<CharSequence, Integer> eltQNameCounts = new HashMap<CharSequence, Integer>();
private HashMap<CharSequence, Integer> attQNameCounts = new HashMap<CharSequence, Integer>();
private HashMap<CharSequence, Integer> pathCounts = new HashMap<CharSequence, Integer>();
private HashMap<CharSequence, CharSequence> names = new HashMap<CharSequence, CharSequence>();
public Map<CharSequence,Integer> getEltQNameCounts () {
return eltQNameCounts;
}
public Map<CharSequence,Integer> getAttQNameCounts () {
return attQNameCounts;
}
public Map<CharSequence,Integer> getPathCounts () {
return pathCounts;
}
public int getEltQNameCount (String s) {
Integer i = eltQNameCounts.get(new MutableString (s));
if (i == null) {
return 0;
}
return i;
}
public int getAttQNameCount (String s) {
Integer i = attQNameCounts.get(new MutableString (s));
if (i == null) {
return 0;
}
return i;
}
public int getPathCount (String s) {
Integer i = pathCounts.get(new MutableString (s));
if (i == null) {
return 0;
}
return i;
}
public CharSequence getCurrentQName () {
return names.get(currentQName);
}
private boolean namespaceAware = true;
/*
* If false, the default, QNames are represented using prefix:localName without regard to
* any prefix to namespace mapping. Otherwise, XML namespaces are handled in the usual way.
*/
public boolean isNamespaceAware() {
return namespaceAware;
}
public void setNamespaceAware(boolean namespaceAware) {
this.namespaceAware = namespaceAware;
}
@Override
public void handleEvent(XMLStreamReader reader, int eventType) {
if (eventType == START_ELEMENT) {
getEventQName(currentQName, reader);
// qnameStack.add(qname);
currentPath.append (' ');
currentPath.append(currentQName);
incrCount(eltQNameCounts, currentQName);
incrCount(pathCounts, currentPath);
int len = currentPath.length();
for (int i = 0; i < reader.getAttributeCount(); i++) {
getEventAttQName (currentQName, reader, i);
incrCount (attQNameCounts, currentQName);
currentPath.append(" @").append(currentQName);
incrCount (pathCounts, currentPath);
currentPath.setLength(len);
}
}
else if (eventType == END_ELEMENT) {
getEventQName(currentQName, reader);
// snip off the last path step, including its '/' separator char
currentPath.setLength(currentPath.length() - currentQName.length() - 1);
}
else if (eventType == START_DOCUMENT) {
currentPath.append("{}");
}
}
protected void getEventAttQName(MutableString buf, XMLStreamReader reader, int i) {
encodeQName (buf, reader.getAttributeLocalName(i), reader.getAttributePrefix(i), reader.getAttributeNamespace(i));
}
private void getEventQName(MutableString buf, XMLStreamReader reader) {
encodeQName (buf, reader.getLocalName(), reader.getPrefix(), reader.getNamespaceURI());
}
private void encodeQName (MutableString buf, String localName, String prefix, String namespace) {
buf.setLength(0);
if (namespaceAware) {
buf.append(localName);
if (!StringUtils.isEmpty(namespace)) {
buf.append ('{').append(namespace).append("}");
}
}
else if (! prefix.isEmpty()) {
buf.append(prefix).append(':').append(localName);
}
else {
buf.append (localName);
}
}
private void incrCount(HashMap<CharSequence, Integer> map, MutableString o) {
if (map.containsKey(o))
map.put(o, map.get(o) + 1);
else {
MutableString copy = new MutableString(o);
map.put(copy, 1);
names.put(copy, copy);
}
}
@Override
public void reset() {
eltQNameCounts.clear();
attQNameCounts.clear();
pathCounts.clear();
names.clear();
currentPath.setLength(0);
currentQName.setLength(0);
}
}
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */