/** Copyright (c) 2011 Delcyon, Inc. This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ package com.delcyon.capo.util.diff; import java.io.ByteArrayInputStream; import java.util.ArrayList; import java.util.Arrays; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; import org.w3c.dom.Text; import com.delcyon.capo.util.XMLSerializer; import com.delcyon.capo.util.diff.Diff.Side; import com.delcyon.capo.util.diff.InputStreamTokenizer.TokenList; import com.delcyon.capo.xml.XMLDiff; import com.delcyon.capo.xml.XPath; /** * @author jeremiah * THIS IS ONLY FOR THE TEXT CONTENT OF ELEMENTS. * If you want to compare two XML trees Use XMLDiff * Used to format a DiffEntry stream into a difference Element. * Used to get one side or another of a difference element. * Used to get one side or another of an element given that element, and it's corresponding difference element. */ public class XMLTextDiff { public static final String DIFF_ELEMENT_NAME = XMLDiff.XDIFF_PREFIX+":text"; public static final String DIFF_ENTRY_ELEMENT_NAME = XMLDiff.XDIFF_PREFIX+":diff"; public static final String SIDE_ATTRIBUTE_NAME = XMLDiff.XDIFF_PREFIX+":diff"; private transient DocumentBuilder documentBuilder; private ArrayList<ArrayList<Integer>> tokenLists = null; private TokenList tokenList = null; private transient XMLSerializer xmlSerializer; public XMLTextDiff() throws Exception { DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); documentBuilderFactory.setNamespaceAware(true); this.documentBuilder = documentBuilderFactory.newDocumentBuilder(); xmlSerializer = new XMLSerializer(); xmlSerializer.setNamespace(XMLDiff.XDIFF_PREFIX,XMLDiff.XDIFF_NAMESPACE_URI); } /** * Given a difference element that includes the non-differences, it will return the specified side. * @param differenceElement * @param side * @return * @throws Exception */ public Text getTextForSide(Element differenceElement, Side side) throws Exception { StringBuilder stringBuilder = new StringBuilder(); NodeList nodeList = XPath.selectNSNodes(differenceElement, DIFF_ENTRY_ELEMENT_NAME+"[@"+SIDE_ATTRIBUTE_NAME+" = '"+side+"' or @"+SIDE_ATTRIBUTE_NAME+" = '"+Side.BASE +" = "+ Side.MOD+"']","xdiff="+XMLDiff.XDIFF_NAMESPACE_URI); for (int nodeIndex = 0; nodeIndex < nodeList.getLength(); nodeIndex++) { Element chunkElement = (Element) nodeList.item(nodeIndex); stringBuilder.append(chunkElement.getTextContent()); } return differenceElement.getOwnerDocument().createTextNode(stringBuilder.toString()); } /** * Given two String will create a difference element for the two Strings. * @param baseText * @param otherText * @return * @throws Exception */ public Element getDifferenceElement(String baseText, String otherText) throws Exception { Diff diff = new Diff(baseText, otherText); return getDifferenceElement(diff); } /** * Given two textNodes will create a difference element for the textual content of the two nodes. * The text differencing will default to TokenList.NEW_LINE * @param baseText * @param otherText * @return * @throws Exception */ public Element getDifferenceElement(Text baseText, Text otherText) throws Exception { return getDifferenceElement(baseText, otherText, TokenList.NEW_LINE, null); } /** * Given two textNodes will create a difference element for the textual content of the two nodes. * @param baseText * @param otherText * @param tokenList * @param tokenLists can be null if tokenList isn't equal to CUSTOM * @return * @throws Exception */ public Element getDifferenceElement(Text baseText, Text otherText, TokenList tokenList, ArrayList<ArrayList<Integer>> tokenLists) throws Exception { Diff diff = null; if (tokenList == TokenList.CUSTOM) { diff = new Diff(baseText.getTextContent(), otherText.getTextContent(),tokenLists); } else { diff = new Diff(baseText.getTextContent(), otherText.getTextContent(),tokenList); } return getDifferenceElement(diff); } /** * Will get the difference element for a diff object * @param diff * @return * @throws Exception */ public Element getDifferenceElement(Diff diff) throws Exception { Document document = documentBuilder.newDocument(); Element diffRootElement = document.createElementNS(XMLDiff.XDIFF_NAMESPACE_URI,DIFF_ELEMENT_NAME); document.appendChild(diffRootElement); long baseStartPosition = 1l; long baseStopPosition = 1l; long otherStartPosition = 1l; long otherStopPosition = 1l; InputStreamTokenizer inputStreamTokenizer = null; if (diff.getTokenList() == TokenList.CUSTOM) { this.tokenList = diff.getTokenList(); tokenLists = diff.getTokenLists(); inputStreamTokenizer = new InputStreamTokenizer(diff.getDifferencesAsBytes(), tokenLists); xmlSerializer.export(this, diffRootElement, 0); } else { this.tokenList = diff.getTokenList(); inputStreamTokenizer = new InputStreamTokenizer(diff.getDifferencesAsBytes(), tokenList); xmlSerializer.export(this, diffRootElement, 0); } byte[] buffer = null; Side previousSide = null; String nodeText = ""; //this deals with the special case of data that doesn't end with a line break of some sort. boolean skipRead = false; while(true) { if (skipRead == false) { buffer = inputStreamTokenizer.readBytes(); } else { skipRead = false; } if (buffer.length == 0) { addDiffEntryElement(nodeText, document, previousSide,baseStartPosition,baseStopPosition,otherStartPosition,otherStopPosition); break; } else { DiffEntry diffEntry = DiffEntry.parseLineData(buffer); String text = new String(diffEntry.getData()); /* * The end of files can be tricky be cause they don't have to end in a line delimiter. * So if we get a line that's longer than what we expect, we should save it for the next go around, and use it as a line. * The other option is to append something onto the end of the stream, but this seems like it could have strange consequences */ if (text.length() != diffEntry.getExpectedTextLength()) { text = text.substring(0,diffEntry.getExpectedTextLength()); buffer = Arrays.copyOfRange(diffEntry.getData(),diffEntry.getExpectedTextLength(),diffEntry.getData().length); skipRead = true; } char directionChar = diffEntry.getDirectionChar(); Side currentSide = null; if (directionChar == '+') { currentSide = Side.MOD; } else if (directionChar == '-') { currentSide = Side.BASE; } //we've switched sides, so write the chunk out if (currentSide != previousSide) { addDiffEntryElement(nodeText, document, previousSide,baseStartPosition,baseStopPosition,otherStartPosition,otherStopPosition); //store all of the current values for the next chunk baseStartPosition = diffEntry.getBaseStreamPosition(); baseStopPosition = diffEntry.getBaseStreamPosition(); otherStartPosition = diffEntry.getOtherStreamPosition(); otherStopPosition = diffEntry.getOtherStreamPosition(); nodeText = text; } else { nodeText = nodeText+text; baseStopPosition = diffEntry.getBaseStreamPosition(); otherStopPosition = diffEntry.getOtherStreamPosition(); } previousSide = currentSide; } } return document.getDocumentElement(); } /** * Converts a DifferenceEntry into a DifferenceEntry Element * @param nodeText * @param document * @param previousSide * @param baseStartPosition * @param baseStopPosition * @param otherStartPosition * @param otherStopPosition */ private void addDiffEntryElement(String nodeText, Document document, Side previousSide, long baseStartPosition, long baseStopPosition, long otherStartPosition, long otherStopPosition) { Element diffElement = document.createElementNS(XMLDiff.XDIFF_NAMESPACE_URI,DIFF_ENTRY_ELEMENT_NAME); diffElement.appendChild(document.createTextNode(nodeText)); if (previousSide != null) { diffElement.setAttributeNS(XMLDiff.XDIFF_NAMESPACE_URI,SIDE_ATTRIBUTE_NAME, previousSide.toString()); } else { diffElement.setAttributeNS(XMLDiff.XDIFF_NAMESPACE_URI,SIDE_ATTRIBUTE_NAME, Side.BASE +" = "+ Side.MOD); } diffElement.setAttributeNS(XMLDiff.XDIFF_NAMESPACE_URI,XMLDiff.XDIFF_PREFIX+":"+Side.BASE.toString().toLowerCase()+"Start", baseStartPosition+""); diffElement.setAttributeNS(XMLDiff.XDIFF_NAMESPACE_URI,XMLDiff.XDIFF_PREFIX+":"+Side.BASE.toString().toLowerCase()+"Stop", baseStopPosition+""); diffElement.setAttributeNS(XMLDiff.XDIFF_NAMESPACE_URI,XMLDiff.XDIFF_PREFIX+":"+Side.MOD.toString().toLowerCase()+"Start", otherStartPosition+""); diffElement.setAttributeNS(XMLDiff.XDIFF_NAMESPACE_URI,XMLDiff.XDIFF_PREFIX+":"+Side.MOD.toString().toLowerCase()+"Stop", otherStopPosition+""); if (nodeText.length() != 0) { document.getDocumentElement().appendChild(diffElement); } } /** * Same at other, only takes Text nodes, and returns a TextNode * @param differenceElement * @param side * @param sideText * @return * @throws Exception */ public Text getAlternateSideText(Element differenceElement, Side side, Text sideText) throws Exception { return differenceElement.getOwnerDocument().createTextNode(getAlternateSideText(differenceElement,side, sideText.getTextContent())); } /** * * @param differenceElement (only has to contain differences, doesn't need similarities) * @param Side this tell the processor witch side the sideText belongs to. * @param sideText the text corresponding to the side parameter * @return a String corresponding to the text that's opposite of the Side parameter * For example if the side and side Text are the original, the the resultant Text will be the modified side * @throws Exception */ public String getAlternateSideText(Element differenceElement, Side side, String sideText) throws Exception { String inSideAttributePrefix = XMLDiff.XDIFF_PREFIX+":"+Side.BASE.toString().toLowerCase(); //flip the document names around if (side == Side.MOD) { inSideAttributePrefix = XMLDiff.XDIFF_PREFIX+":"+Side.MOD.toString().toLowerCase(); } xmlSerializer.marshall(differenceElement, this); InputStreamTokenizer inputStreamTokenizer = null; if (tokenList == TokenList.CUSTOM) { inputStreamTokenizer = new InputStreamTokenizer(new ByteArrayInputStream(sideText.getBytes()), tokenLists); } else { inputStreamTokenizer = new InputStreamTokenizer(new ByteArrayInputStream(sideText.getBytes()), tokenList); } StringBuilder stringBuilder = new StringBuilder(); long currentLine = 1l; //don't care about where things are the same since we have the original document, and we might not have the BOTH stuff in the diff NodeList nodeList = XPath.selectNSNodes(differenceElement, "//"+DIFF_ENTRY_ELEMENT_NAME+"[@"+SIDE_ATTRIBUTE_NAME+" = '"+Side.BASE+"' or @"+SIDE_ATTRIBUTE_NAME+" = '"+Side.MOD+"']","xdiff="+XMLDiff.XDIFF_NAMESPACE_URI); for (int nodeIndex = 0; nodeIndex < nodeList.getLength(); nodeIndex++) { Element diffEntryElement = (Element) nodeList.item(nodeIndex); Side chunkSide = Side.valueOf(diffEntryElement.getAttribute(SIDE_ATTRIBUTE_NAME)); long inSideStartLine = Long.parseLong(diffEntryElement.getAttribute(inSideAttributePrefix+"Start")); while (currentLine < inSideStartLine) { stringBuilder.append(new String(inputStreamTokenizer.readBytes())); currentLine++; } if (chunkSide == side) { long inSideStoptLine = Long.parseLong(diffEntryElement.getAttribute(inSideAttributePrefix+"Stop")); while (currentLine <= inSideStoptLine) { //read stuff into the void if it belongs to the sideDocuments side only inputStreamTokenizer.readBytes(); currentLine++; } } else { //add this nodes data stringBuilder.append(diffEntryElement.getTextContent()); } } //read the rest of the data since there are no chucks left at this point, everything should be equal while(true) { byte[] data = inputStreamTokenizer.readBytes(); if (data.length == 0) { break; } else { stringBuilder.append(new String(data)); } } //add the node, and lets get out of here return stringBuilder.toString(); } }