/**
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
*/
package net.sourceforge.pmd.lang.xml.ast;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentType;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.ProcessingInstruction;
import net.sourceforge.pmd.lang.ast.SourceCodePositioner;
/**
*
*/
class DOMLineNumbers {
private final Document document;
private String xmlString;
private SourceCodePositioner sourceCodePositioner;
DOMLineNumbers(Document document, String xmlString) {
this.document = document;
this.xmlString = xmlString;
this.sourceCodePositioner = new SourceCodePositioner(xmlString);
}
public void determine() {
determineLocation(document, 0);
}
private int determineLocation(Node n, int index) {
int nextIndex = index;
int nodeLength = 0;
int textLength = 0;
if (n.getNodeType() == Node.DOCUMENT_TYPE_NODE) {
nextIndex = xmlString.indexOf("<!DOCTYPE", nextIndex);
nodeLength = "<!DOCTYPE".length();
} else if (n.getNodeType() == Node.COMMENT_NODE) {
nextIndex = xmlString.indexOf("<!--", nextIndex);
} else if (n.getNodeType() == Node.ELEMENT_NODE) {
nextIndex = xmlString.indexOf("<" + n.getNodeName(), nextIndex);
nodeLength = xmlString.indexOf(">", nextIndex) - nextIndex + 1;
} else if (n.getNodeType() == Node.CDATA_SECTION_NODE) {
nextIndex = xmlString.indexOf("<![CDATA[", nextIndex);
} else if (n.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) {
ProcessingInstruction pi = (ProcessingInstruction) n;
nextIndex = xmlString.indexOf("<?" + pi.getTarget(), nextIndex);
} else if (n.getNodeType() == Node.TEXT_NODE) {
String te = unexpandEntities(n, n.getNodeValue(), true);
int newIndex = xmlString.indexOf(te, nextIndex);
if (newIndex == -1) {
// try again without escaping the quotes
te = unexpandEntities(n, n.getNodeValue(), false);
newIndex = xmlString.indexOf(te, nextIndex);
}
if (newIndex > 0) {
textLength = te.length();
nextIndex = newIndex;
}
} else if (n.getNodeType() == Node.ENTITY_REFERENCE_NODE) {
nextIndex = xmlString.indexOf("&" + n.getNodeName() + ";", nextIndex);
}
setBeginLocation(n, nextIndex);
if (n.hasChildNodes()) {
// next nodes begin after the current start tag
nextIndex += nodeLength;
NodeList childs = n.getChildNodes();
for (int i = 0; i < childs.getLength(); i++) {
nextIndex = determineLocation(childs.item(i), nextIndex);
}
}
if (n.getNodeType() == Node.ELEMENT_NODE) {
nextIndex += 2 + n.getNodeName().length() + 1; // </nodename>
} else if (n.getNodeType() == Node.DOCUMENT_TYPE_NODE) {
Node nextSibling = n.getNextSibling();
if (nextSibling.getNodeType() == Node.ELEMENT_NODE) {
nextIndex = xmlString.indexOf("<" + nextSibling.getNodeName(), nextIndex) - 1;
} else if (nextSibling.getNodeType() == Node.COMMENT_NODE) {
nextIndex = xmlString.indexOf("<!--", nextIndex);
} else {
nextIndex = xmlString.indexOf(">", nextIndex);
}
} else if (n.getNodeType() == Node.COMMENT_NODE) {
nextIndex += 4 + 3; // <!-- and -->
nextIndex += n.getNodeValue().length();
} else if (n.getNodeType() == Node.TEXT_NODE) {
nextIndex += textLength;
} else if (n.getNodeType() == Node.CDATA_SECTION_NODE) {
nextIndex += "<![CDATA[".length() + n.getNodeValue().length() + "]]>".length();
} else if (n.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) {
ProcessingInstruction pi = (ProcessingInstruction) n;
nextIndex += "<?".length() + pi.getTarget().length() + "?>".length() + pi.getData().length();
}
setEndLocation(n, nextIndex - 1);
return nextIndex;
}
private String unexpandEntities(Node n, String te, boolean withQuotes) {
String result = te;
DocumentType doctype = n.getOwnerDocument().getDoctype();
// implicit entities
result = result.replaceAll(Matcher.quoteReplacement("&"), "&");
result = result.replaceAll(Matcher.quoteReplacement("<"), "<");
result = result.replaceAll(Matcher.quoteReplacement(">"), ">");
if (withQuotes) {
result = result.replaceAll(Matcher.quoteReplacement("\""), """);
result = result.replaceAll(Matcher.quoteReplacement("'"), "'");
}
if (doctype != null) {
NamedNodeMap entities = doctype.getEntities();
String internalSubset = doctype.getInternalSubset();
if (internalSubset == null) {
internalSubset = "";
}
for (int i = 0; i < entities.getLength(); i++) {
Node item = entities.item(i);
String entityName = item.getNodeName();
Node firstChild = item.getFirstChild();
if (firstChild != null) {
result = result.replaceAll(Matcher.quoteReplacement(firstChild.getNodeValue()),
"&" + entityName + ";");
} else {
Matcher m = Pattern
.compile(Matcher.quoteReplacement("<!ENTITY " + entityName + " ") + "[']([^']*)[']>")
.matcher(internalSubset);
if (m.find()) {
result = result.replaceAll(Matcher.quoteReplacement(m.group(1)), "&" + entityName + ";");
}
}
}
}
return result;
}
private void setBeginLocation(Node n, int index) {
if (n != null) {
int line = sourceCodePositioner.lineNumberFromOffset(index);
int column = sourceCodePositioner.columnFromOffset(line, index);
n.setUserData(XmlNode.BEGIN_LINE, line, null);
n.setUserData(XmlNode.BEGIN_COLUMN, column, null);
}
}
private void setEndLocation(Node n, int index) {
if (n != null) {
int line = sourceCodePositioner.lineNumberFromOffset(index);
int column = sourceCodePositioner.columnFromOffset(line, index);
n.setUserData(XmlNode.END_LINE, line, null);
n.setUserData(XmlNode.END_COLUMN, column, null);
}
}
}