/**
* Copyright (C) 2010 Orbeon, Inc.
*
* This program is free software; you can redistribute it and/or modify it under the terms of the
* GNU Lesser General Public License as published by the Free Software Foundation; either version
* 2.1 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU Lesser General Public License for more details.
*
* The full text of the license is available at http://www.gnu.org/copyleft/lesser.html
*/
package org.orbeon.oxf.xml;
import org.orbeon.oxf.common.OXFException;
import org.orbeon.oxf.util.SecureUtils;
import org.w3c.dom.Node;
import org.xml.sax.Attributes;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import javax.xml.transform.Source;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.security.MessageDigest;
/**
* This digester is based on some existing public document (not sure which). There are some
* changes though. It is not clear anymore why we used that document as a base, as this is
* purely internal.
*
* The bottom line is that the digest should change whenever the infoset of the source XML
* document changes.
*/
public class DigestContentHandler implements XMLReceiver {
private static final int ELEMENT_CODE = Node.ELEMENT_NODE;
private static final int ATTRIBUTE_CODE = Node.ATTRIBUTE_NODE;
private static final int TEXT_CODE = Node.TEXT_NODE;
private static final int PROCESSING_INSTRUCTION_CODE = Node.PROCESSING_INSTRUCTION_NODE;
private static final int NAMESPACE_CODE = 0XAA01; // some code that is none of the above
private static final int COMMENT_CODE = 0XAA02; // some code that is none of the above
/**
* 4/6/2005 d : Previously we were using String.getBytes( "UnicodeBigUnmarked" ). ( Believe
* the code was copied from RFC 2803 ). This first tries to get a java.nio.Charset with
* the name if this fails it uses a sun.io.CharToByteConverter.
* Now in the case of "UnicodeBigUnmarked" there is no such Charset so a
* CharToByteConverter, utf-16be, is used. Unfortunately this negative lookup is expensive.
* ( Costing us a full second in the 50thread/512MB test. )
* The solution, of course, is just to use get the appropriate Charset and hold on to it.
*/
private static final Charset utf16BECharset = Charset.forName("UTF-16BE");
/**
* Encoder has state and therefore cannot be shared across threads.
*/
private final CharsetEncoder charEncoder = utf16BECharset.newEncoder();
private java.nio.CharBuffer charBuff = java.nio.CharBuffer.allocate(64);
private java.nio.ByteBuffer byteBuff = java.nio.ByteBuffer.allocate(128);
private final MessageDigest digest = SecureUtils.defaultMessageDigest();
/**
* Compute a digest for a SAX source.
*/
public static byte[] getDigest(Source source) {
final DigestContentHandler digester = new DigestContentHandler();
TransformerUtils.sourceToSAX(source, digester);
return digester.getResult();
}
private void ensureCharBuffRemaining(final int size) {
if (charBuff.remaining() < size) {
final int cpcty = (charBuff.capacity() + size) * 2;
final java.nio.CharBuffer newChBuf = java.nio.CharBuffer.allocate(cpcty);
newChBuf.put(charBuff);
charBuff = newChBuf;
}
}
private void updateWithCharBuf() {
final int reqSize = (int) charEncoder.maxBytesPerChar() * charBuff.position();
if (byteBuff.capacity() < reqSize) {
byteBuff = java.nio.ByteBuffer.allocate(2 * reqSize);
}
// Make ready for read
charBuff.flip();
final CoderResult cr = charEncoder.encode(charBuff, byteBuff, true);
try {
if (cr.isError()) cr.throwException();
// Make ready for read
byteBuff.flip();
final byte[] byts = byteBuff.array();
final int len = byteBuff.remaining();
final int strt = byteBuff.arrayOffset();
digest.update(byts, strt, len);
} catch (final CharacterCodingException e) {
throw new OXFException(e);
} catch (java.nio.BufferOverflowException e) {
throw new OXFException(e);
} catch (java.nio.BufferUnderflowException e) {
throw new OXFException(e);
} finally {
// Make ready for write
charBuff.clear();
byteBuff.clear();
}
}
private void updateWith(final String s) {
addToCharBuff(s);
updateWithCharBuf();
}
private void updateWith(final char[] chArr, final int ofst, final int len) {
ensureCharBuffRemaining(len);
charBuff.put(chArr, ofst, len);
updateWithCharBuf();
}
private void addToCharBuff(final char c) {
ensureCharBuffRemaining(1);
charBuff.put(c);
}
private void addToCharBuff(final String s) {
final int size = s.length();
ensureCharBuffRemaining(size);
charBuff.put(s);
}
public byte[] getResult() {
return digest.digest();
}
public void setDocumentLocator(Locator locator) {
}
public void startDocument() throws SAXException {
charBuff.clear();
byteBuff.clear();
charEncoder.reset();
}
public void endDocument() throws SAXException {
}
public void startPrefixMapping(String prefix, String uri) throws SAXException {
digest.update((byte) ((NAMESPACE_CODE >> 24) & 0xff));
digest.update((byte) ((NAMESPACE_CODE >> 16) & 0xff));
digest.update((byte) ((NAMESPACE_CODE >> 8) & 0xff));
digest.update((byte) (NAMESPACE_CODE & 0xff));
updateWith(prefix);
digest.update((byte) 0);
digest.update((byte) 0);
updateWith(uri);
digest.update((byte) 0);
digest.update((byte) 0);
}
public void endPrefixMapping(String prefix)
throws SAXException {
}
public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException {
digest.update((byte) ((ELEMENT_CODE >> 24) & 0xff));
digest.update((byte) ((ELEMENT_CODE >> 16) & 0xff));
digest.update((byte) ((ELEMENT_CODE >> 8) & 0xff));
digest.update((byte) (ELEMENT_CODE & 0xff));
addToCharBuff('{');
addToCharBuff(namespaceURI);
addToCharBuff('}');
addToCharBuff(localName);
updateWithCharBuf();
digest.update((byte) 0);
digest.update((byte) 0);
int attCount = atts.getLength();
digest.update((byte) ((attCount >> 24) & 0xff));
digest.update((byte) ((attCount >> 16) & 0xff));
digest.update((byte) ((attCount >> 8) & 0xff));
digest.update((byte) (attCount & 0xff));
for (int i = 0; i < attCount; i++) {
digest.update((byte) ((ATTRIBUTE_CODE >> 24) & 0xff));
digest.update((byte) ((ATTRIBUTE_CODE >> 16) & 0xff));
digest.update((byte) ((ATTRIBUTE_CODE >> 8) & 0xff));
digest.update((byte) (ATTRIBUTE_CODE & 0xff));
final String attURI = atts.getURI(i);
final String attNam = atts.getLocalName(i);
addToCharBuff('{');
addToCharBuff(attURI);
addToCharBuff('}');
addToCharBuff(attNam);
updateWithCharBuf();
digest.update((byte) 0);
digest.update((byte) 0);
final String val = atts.getValue(i);
updateWith(val);
}
}
public void endElement(String namespaceURI, String localName, String qName) throws SAXException {
}
public void characters(char ch[], int start, int length) throws SAXException {
digest.update((byte) ((TEXT_CODE >> 24) & 0xff));
digest.update((byte) ((TEXT_CODE >> 16) & 0xff));
digest.update((byte) ((TEXT_CODE >> 8) & 0xff));
digest.update((byte) (TEXT_CODE & 0xff));
updateWith(ch, start, length);
digest.update((byte) 0);
digest.update((byte) 0);
}
public void ignorableWhitespace(char ch[], int start, int length)
throws SAXException {
}
public void processingInstruction(String target, String data) throws SAXException {
digest.update((byte) ((PROCESSING_INSTRUCTION_CODE >> 24) & 0xff));
digest.update((byte) ((PROCESSING_INSTRUCTION_CODE >> 16) & 0xff));
digest.update((byte) ((PROCESSING_INSTRUCTION_CODE >> 8) & 0xff));
digest.update((byte) (PROCESSING_INSTRUCTION_CODE & 0xff));
updateWith(target);
digest.update((byte) 0);
digest.update((byte) 0);
updateWith(data);
digest.update((byte) 0);
digest.update((byte) 0);
}
public void skippedEntity(String name) throws SAXException {
}
public void startDTD(String name, String publicId, String systemId) throws SAXException {
}
public void endDTD() throws SAXException {
}
public void startEntity(String name) throws SAXException {
}
public void endEntity(String name) throws SAXException {
}
public void startCDATA() throws SAXException {
}
public void endCDATA() throws SAXException {
}
public void comment(char[] ch, int start, int length) throws SAXException {
// We do consider comments significant for the purpose of digesting. But should this be an option?
digest.update((byte) ((COMMENT_CODE >> 24) & 0xff));
digest.update((byte) ((COMMENT_CODE >> 16) & 0xff));
digest.update((byte) ((COMMENT_CODE >> 8) & 0xff));
digest.update((byte) (COMMENT_CODE & 0xff));
updateWith(ch, start, length);
digest.update((byte) 0);
digest.update((byte) 0);
}
}