/* * Copyright 2007, Plutext Pty Ltd. * * This file is part of Docx4all. Docx4all is free software: you can redistribute it and/or modify it under the terms of version 3 of the GNU General Public License as published by the Free Software Foundation. Docx4all is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Docx4all. If not, see <http://www.gnu.org/licenses/>. */ package org.docx4all.util; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; import java.math.BigInteger; import java.text.BreakIterator; import java.util.ArrayList; import java.util.Collections; import java.util.Enumeration; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Set; import javax.swing.JEditorPane; import javax.swing.SwingConstants; import javax.swing.text.AttributeSet; import javax.swing.text.BadLocationException; import javax.swing.text.Document; import javax.swing.text.EditorKit; import javax.swing.text.Element; import javax.swing.text.Position; import javax.swing.text.Segment; import javax.swing.text.Style; import javax.swing.text.DefaultStyledDocument.ElementSpec; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.docx4all.swing.WordMLTextPane; import org.docx4all.swing.text.BadSelectionException; import org.docx4all.swing.text.DocumentElement; import org.docx4all.swing.text.ElementMLIteratorCallback; import org.docx4all.swing.text.SdtBlockInfo; import org.docx4all.swing.text.StyleSheet; import org.docx4all.swing.text.TextSelector; import org.docx4all.swing.text.WordMLDocument; import org.docx4all.swing.text.WordMLFragment; import org.docx4all.swing.text.WordMLStyleConstants; import org.docx4all.swing.text.WordMLFragment.ElementMLRecord; import org.docx4all.ui.main.Constants; import org.docx4all.ui.main.WordMLEditor; import org.docx4all.xml.DocumentML; import org.docx4all.xml.ElementML; import org.docx4all.xml.ElementMLFactory; import org.docx4all.xml.ElementMLIterator; import org.docx4all.xml.FldComplexML; import org.docx4all.xml.HyperlinkML; import org.docx4all.xml.ParagraphML; import org.docx4all.xml.ParagraphPropertiesML; import org.docx4all.xml.RunContentML; import org.docx4all.xml.RunDelML; import org.docx4all.xml.RunInsML; import org.docx4all.xml.RunML; import org.docx4all.xml.RunPropertiesML; import org.docx4all.xml.SdtBlockML; import org.docx4j.XmlUtils; import org.docx4j.openpackaging.packages.WordprocessingMLPackage; import org.plutext.client.Mediator; /** * @author Jojada Tirtowidjojo - 27/11/2007 */ public class DocUtil { private static Logger log = LoggerFactory.getLogger(DocUtil.class); private final static String TAB = " "; public final static boolean isSharedDocument(WordMLDocument doc) { boolean isShared = false; WordMLEditor resource = (WordMLEditor) WordMLEditor.getInstance(); StringBuilder plutextWebdav = new StringBuilder(); plutextWebdav.append("/"); plutextWebdav.append(resource.getPlutextWebdavUrlKeyword()); plutextWebdav.append("/"); String uri = (String) doc.getProperty(WordMLDocument.FILE_PATH_PROPERTY); if (uri != null && uri.indexOf(plutextWebdav.toString()) > -1) { DocumentElement elem = (DocumentElement) doc.getDefaultRootElement(); DocumentML docML = (DocumentML) elem.getElementML(); WordprocessingMLPackage wmlPackage = docML.getWordprocessingMLPackage(); if (wmlPackage != null) { isShared = XmlUtil.isSharedDocumentPackage(wmlPackage); } } return isShared; } public final static String getChunkingStrategy(WordMLDocument doc) { String theStrategy = null; DocumentElement elem = (DocumentElement) doc.getDefaultRootElement(); DocumentML docML = (DocumentML) elem.getElementML(); WordprocessingMLPackage wmlPackage = docML.getWordprocessingMLPackage(); if (wmlPackage != null) { org.docx4j.docProps.custom.Properties.Property chunkingStrategy = XmlUtil.getCustomProperty( wmlPackage, Constants.PLUTEXT_GROUPING_PROPERTY_NAME); if (chunkingStrategy != null) { theStrategy = chunkingStrategy.getLpwstr(); } } return theStrategy; } /** * Makes the xml content of document become the main document part * of WordprocessingMLPackage. * * @param kit EditorKit instance * @param doc Xml document * @param outputPackage WordprocessingMLPackage that will store the resulting output. * If it is null a brand new WordprocessingMLPackage will be created. * @return The passed in outputPackage argument if it is NOT NULL. * A brand new WordprocessingMLPackage; otherwise. */ public final static WordprocessingMLPackage write( final EditorKit kit, final Document doc, WordprocessingMLPackage outputPackage) { ByteArrayOutputStream out = new ByteArrayOutputStream(); try { kit.write(out, doc, 0, doc.getLength()); } catch (BadLocationException exc) { ;// ignore } catch (IOException exc) { exc.printStackTrace(); } ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); outputPackage = XmlUtil.deserialize(outputPackage, in); return outputPackage; } /** * Reads the xml document part of WordprocessingMLPackage and puts it into * Swing JEditorPane's document. * * @param targetView * The destination view for the reading * @param sourcePackage * WordprocessingMLPackage to read from * @return targetView's document */ public final static Document read( final JEditorPane targetView, final WordprocessingMLPackage sourcePackage) { EditorKit kit = targetView.getEditorKit(); Document theDoc = targetView.getDocument(); ByteArrayOutputStream out = new ByteArrayOutputStream(); XmlUtil.serialize(sourcePackage, out); //ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); InputStreamReader in = null; try { in = new InputStreamReader(new ByteArrayInputStream(out.toByteArray()), "UTF-16" ); } catch (UnsupportedEncodingException e) { // TODO Auto-generated catch block e.printStackTrace(); } out = null; try { kit.read(in, theDoc, 0); } catch (BadLocationException exc) { ;// ignore } catch (IOException exc) { exc.printStackTrace(); } return theDoc; } public final static HashMap<BigInteger, SdtBlockInfo> createSdtBlockInfoMap(WordMLDocument doc) { HashMap<BigInteger, SdtBlockInfo> theMap = new HashMap<BigInteger, SdtBlockInfo>(); try { doc.readLock(); DocumentElement root = (DocumentElement) doc .getDefaultRootElement(); for (int i = 0; i < root.getElementCount() - 1; i++) { DocumentElement elem = (DocumentElement) root.getElement(i); ElementML ml = elem.getElementML(); if (ml instanceof SdtBlockML) { try { Position pos = doc.createPosition(elem.getStartOffset()); org.docx4j.wml.SdtBlock sdt = (org.docx4j.wml.SdtBlock) ml .getDocxObject(); SdtBlockInfo info = new SdtBlockInfo(); info.setPosition(pos); info.setSdtBlock(sdt); theMap.put(sdt.getSdtPr().getId().getVal(), info); } catch (BadLocationException exc) { ;//ignore } } } } finally { doc.readUnlock(); } return theMap; } /** * When a text is inserted into a document its attributes are determined by * the text element at the insertion position. This text element is called * input attribute element and has to be editable. * * Given an insertion position there are two potential input attribute * elements, namely the one on the left of the insertion position and on the * right. By default this method returns the one on the left. In the case * where this default input attribute element does not exist or the * insertion position is at the beginning of a paragraph then the other * input attribute element will be visited. * * To override the default behaviour 'bias' parameter can be set to either * Position.Bias.Forward (for selecting the right element) or * Position.Bias.Backward (for the left one). * * @param doc * the document where input attribute element is desired * @param offset * Offset position * @param bias * Null, default behaviour. * Position.Bias.Forward, choose the right element. * Position.Bias.Backward, choose the left element * @return An editable WordMLDocument.TextElement if any; Null, otherwise. */ public final static WordMLDocument.TextElement getInputAttributeElement( WordMLDocument doc, int offset, Position.Bias bias) { WordMLDocument.TextElement theElem = null; if (bias == null) { if (doc.getParagraphMLElement(offset, true).getStartOffset() == offset) { theElem = (WordMLDocument.TextElement) doc .getCharacterElement(offset); } else { int pos = Math.max(offset - 1, 0); theElem = (WordMLDocument.TextElement) doc .getCharacterElement(pos); if (!canBecomeInputAttributeElement(theElem, offset)) { theElem = (WordMLDocument.TextElement) doc .getCharacterElement(offset); } } } else { int pos = (bias == Position.Bias.Forward) ? offset : Math.max( offset - 1, 0); theElem = (WordMLDocument.TextElement) doc.getCharacterElement(pos); } if (!canBecomeInputAttributeElement(theElem, offset)) { theElem = null; } return (WordMLDocument.TextElement) theElem; } /** * Determines whether the passed 'runContent' element can become an input * attribute element at given 'offset' position. * * The desired resulting effect is that when caret is placed at the middle * of hyperlink or tracked change text, user may edit the text but when * caret is at either end of hyperlink or tracked change text he may not * edit. * * @param runContent * @param offset * @return true or false */ private final static boolean canBecomeInputAttributeElement( WordMLDocument.TextElement runContent, int offset) { boolean canBecome = false; if (runContent.isEditable()) { // Check the parent of RunML; ie: runContent's grandparent. ElementML parent = runContent.getElementML().getParent() .getParent(); if (parent instanceof HyperlinkML || parent instanceof RunDelML || parent instanceof RunInsML) { canBecome = (runContent.getStartOffset() < offset && offset < runContent.getEndOffset()); } else { canBecome = true; } } return canBecome; } public final static void saveTextContentToElementML(WordMLDocument.TextElement elem) { if (elem == null) { return; } WordMLDocument doc = (WordMLDocument) elem.getDocument(); try { doc.readLock(); if (elem.getStartOffset() == elem.getEndOffset()) { return; } RunContentML rcml = (RunContentML) elem.getElementML(); if (!rcml.isDummy() && !rcml.isImplied()) { int count = elem.getEndOffset() - elem.getStartOffset(); String text = elem.getDocument().getText(elem.getStartOffset(), count); log.debug("saveTextContentToElementML(): text content=" + text); rcml.setTextContent(text); } } catch (BadLocationException exc) { ;// ignore } finally { doc.readUnlock(); } } public static final boolean canChangeIntoSdt(WordMLDocument doc, int offs, int length) { boolean canChange = false; try { doc.readLock(); DocumentElement elem = (DocumentElement) doc.getSdtBlockMLElement(offs); if (elem == null) { SdtBlockML sdt = ElementMLFactory.createSdtBlockML(); elem = (DocumentElement) doc.getParagraphMLElement(offs, false); if (offs == doc.getLength()) { ;//cannot change the last paragraph in the document } else if (offs == elem.getStartOffset() && elem.getEndOffset() == offs + length) { //Do not need to worry about whether 'elem' is the only child element //because at least 'elem' which is a ParagraphML can be changed into Sdt. //However, remember to check whether 'elem' can accept Sdt //as its sibling. canChange = elem.getElementML().canAddSibling(sdt, true); } else if (offs + length <= elem.getEndOffset()) { //[offs, offs + length] is in elem's span. //'elem' which is a ParagraphML can be changed into Sdt //but still need to check whether 'elem' can accept Sdt //as its sibling. canChange = elem.getElementML().canAddSibling(sdt, true); } else { DocumentElement parent = (DocumentElement) elem.getParentElement(); if (offs + length <= parent.getEndOffset()) { //[offs, offs + length] has to be within parent's span. int start = parent.getElementIndex(offs); int end = parent.getElementIndex(offs + length - 1); boolean changeable = true; while (start <= end && changeable) { DocumentElement temp = (DocumentElement) parent.getElement(start); ElementML ml = (ElementML) temp.getElementML().clone(); changeable = sdt.canAddChild(ml) && temp.getElementML().canAddSibling(sdt, true); start++; } canChange = changeable; } else { //consider as unchangeable } } } } finally { doc.readUnlock(); } return canChange; } public static final boolean canRemoveSdt(WordMLDocument doc, int offs, int length) { boolean removable = true; boolean hasSdt = false; try { doc.readLock(); int pos = offs; while (pos <= offs + length && removable) { DocumentElement elem = (DocumentElement) doc.getSdtBlockMLElement(pos); log.debug("canRemoveSdt(): pos = " + pos + " elem = " + elem); if (elem != null) { hasSdt = true; ElementML sdt = elem.getElementML(); for (int i=0; i < sdt.getChildrenCount() && removable; i++) { ElementML ml = (ElementML) sdt.getChild(i).clone(); removable = sdt.canAddSibling(ml, true); log.debug("canRemoveSdt(): sibling = " + ml + " canAddSibling = " + removable); } } else { elem = (DocumentElement) doc.getParagraphMLElement(pos, false); } log.debug("canRemoveSdt(): removable = " + removable); pos = elem.getEndOffset(); if (pos == offs + length) { //finish pos += 1; } } //while loop } finally { doc.readUnlock(); } return hasSdt && removable; } public static final boolean canInsertNewSdt(WordMLDocument doc, int offs) { SdtBlockML sdt = ElementMLFactory.createSdtBlockML(); boolean canInsert = (getElementToPasteAsSibling(doc, offs, sdt) != null); return canInsert; } public static final boolean hasSdt(WordMLDocument doc, int offs, int length) { boolean hasSdt = false; try { doc.readLock(); final DocumentElement root = (DocumentElement) doc.getDefaultRootElement(); int pos = offs; length = Math.max(length, 1); while (pos < (offs + length) && !hasSdt) { DocumentElement paraE = (DocumentElement) doc.getParagraphMLElement(pos, false); DocumentElement temp = (DocumentElement) paraE.getParentElement(); while (temp != root && !(temp.getElementML() instanceof SdtBlockML)) { temp = (DocumentElement) temp.getParentElement(); } if (temp == root) { pos = paraE.getEndOffset(); } else { //An SdtBlock element is found hasSdt = true; } } } finally { doc.readUnlock(); } return hasSdt; } public static final void setUniqueSdtBlockId( WordMLDocument doc, WordMLFragment fragment) { List<ElementMLRecord> paragraphRecords = fragment.getParagraphRecords(); if (paragraphRecords == null) { return; } try { doc.readLock(); final DocumentElement root = (DocumentElement) doc.getDefaultRootElement(); DocumentML docML = (DocumentML) root.getElementML(); Set<BigInteger> idSet = docML.getSdtBlockIdSet(); for (ElementMLRecord rec : paragraphRecords) { ElementML ml = rec.getElementML(); if (ml instanceof SdtBlockML) { SdtBlockML sdt = (SdtBlockML) ml; String idStr = sdt.getSdtProperties().getPlutextId(); if (idStr != null) { BigInteger id = BigInteger.valueOf(Long.valueOf(idStr)); if (idSet.contains(id)) { while (idSet.contains(id)) { //This while loop won't go for long. String s = Mediator.generateId(); id = BigInteger.valueOf(Long.valueOf(s)); } sdt.getSdtProperties().setPlutextId(id.toString()); } idSet.add(id); } } } } finally { doc.readUnlock(); } } public static final DocumentElement getElementToPasteAsSibling( WordMLDocument doc, int offs, ElementML sibling) { DocumentElement theElem = null; try { doc.readLock(); if (0 <= offs && offs <= doc.getLength()) { theElem = (DocumentElement) doc.getRunMLElement(offs); ElementML ml = theElem.getElementML(); boolean canPaste = ml.canAddSibling(sibling, true); while (!canPaste) { //try parent element. theElem = (DocumentElement) theElem.getParentElement(); ml = theElem.getElementML(); if (theElem != doc.getDefaultRootElement()) { if (ml.isImplied()) { //An implied ParagraphML for example. //Go to next parent. } else { canPaste = ml.canAddSibling(sibling, true); } } else { break; } } if (!canPaste) { theElem = null; } } } finally { doc.readUnlock(); } return theElem; } public static final boolean canMergeSdt(WordMLDocument doc, int offs, int length) { boolean canMerge = false; try { doc.readLock(); Element elem = doc.getSdtBlockMLElement(offs); if (elem != null && elem.getEndOffset() < (offs + length)) { elem = elem.getParentElement(); canMerge = (offs + length) <= elem.getEndOffset(); } } finally { doc.readUnlock(); } return canMerge; } /** * When an Sdt is split by WordMLEditorKit.SplitSdtAction, * it will be split at the beginning of paragraph containing * cursor. Therefore, any Sdt can always be split by such action. * * This method determines whether document 'doc' contains * an Sdt at 'offs' position. The existence of Sdt is * sufficient for determining whether an Sdt can be split. * * @param doc * @param offs * @return true if an Sdt at 'offs' position can be found; * false, otherwise. */ public static final boolean canSplitSdt(WordMLDocument doc, int offs) { boolean canSplit = false; try { doc.readLock(); DocumentElement sdt = (DocumentElement) doc.getSdtBlockMLElement(offs); canSplit = (sdt != null); } finally { doc.readUnlock(); } return canSplit; } public static final int getRevisionStart(WordMLDocument doc, int offs, int direction) { if (offs < 0 || offs >= doc.getLength()) { throw new IllegalArgumentException( "Document length=" + doc.getLength() + ". offs=" + offs); } if (direction != SwingConstants.NEXT && direction != SwingConstants.PREVIOUS) { throw new IllegalArgumentException( "direction=" + direction + ". Either SwingConstants.NEXT or SwingConstants.PREVIOUS is expected."); } int result = -1; try { doc.readLock(); DocumentElement run = getRunMLElement(doc, offs, direction); if (run == null) { ;//return -1 } else { ElementML ml = run.getElementML().getParent(); boolean isRevisionStart = (ml instanceof RunDelML || ml instanceof RunInsML) && ml.getChild(0) == run.getElementML(); if (isRevisionStart) { result = run.getStartOffset(); } else if (direction == SwingConstants.NEXT && run.getEndOffset() < doc.getLength() - 1) { result = getRevisionStart(doc, run.getEndOffset(), direction); } else if (direction == SwingConstants.PREVIOUS && run.getStartOffset() > 0) { result = getRevisionStart(doc, run.getStartOffset(), direction); } } } finally { doc.readUnlock(); } return result; } public static final int getRevisionEnd(WordMLDocument doc, int offs, int direction) { if (offs < 0 || offs >= doc.getLength()) { throw new IllegalArgumentException( "Document length=" + doc.getLength() + ". offs=" + offs); } if (direction != SwingConstants.NEXT && direction != SwingConstants.PREVIOUS) { throw new IllegalArgumentException( "direction=" + direction + ". Either SwingConstants.NEXT or SwingConstants.PREVIOUS is expected."); } int result = -1; try { doc.readLock(); DocumentElement run = getRunMLElement(doc, offs, direction); if (run == null) { ;//return -1 } else { ElementML ml = run.getElementML().getParent(); boolean isRevisionEnd = (ml instanceof RunDelML || ml instanceof RunInsML) && ml.getChild(ml.getChildrenCount()-1) == run.getElementML(); if (isRevisionEnd) { result = run.getEndOffset(); } else if (direction == SwingConstants.NEXT && run.getEndOffset() < doc.getLength() - 1) { result = getRevisionEnd(doc, run.getEndOffset(), direction); } else if (direction == SwingConstants.PREVIOUS && run.getStartOffset() > 0) { result = getRevisionEnd(doc, run.getStartOffset(), direction); } } } finally { doc.readUnlock(); } return result; } public static final DocumentElement getFldComplexStart( WordMLDocument doc, int offs, int direction) { DocumentElement theElem = null; try { doc.readLock(); while (theElem == null && 0 <= offs && offs < doc.getLength()) { theElem = getRunMLElement(doc, offs, direction); if (theElem == null) { break; } RunML runML = (RunML) theElem.getElementML(); if (runML.getFldChar() != null && runML.getFldChar().getFldCharType() == org.docx4j.wml.STFldCharType.BEGIN) { // break } else { offs = (direction == SwingConstants.NEXT) ? theElem.getEndOffset() : theElem.getStartOffset(); theElem = null; } } } finally { doc.readUnlock(); } return theElem; } public static final DocumentElement getFldComplexEnd( WordMLDocument doc, int offs, int direction) { DocumentElement theElem = null; try { doc.readLock(); while (theElem == null && 0 <= offs && offs < doc.getLength()) { theElem = getRunMLElement(doc, offs, direction); if (theElem == null) { break; } RunML runML = (RunML) theElem.getElementML(); if (runML.getFldChar() != null && runML.getFldChar().getFldCharType() == org.docx4j.wml.STFldCharType.END) { // break } else { offs = (direction == SwingConstants.NEXT) ? theElem.getEndOffset() : theElem.getStartOffset(); theElem = null; } } } finally { doc.readUnlock(); } return theElem; } public static final DocumentElement getRunMLElement(WordMLDocument doc, int offs, int direction) { if (offs < 0 || offs > doc.getLength()) { throw new IllegalArgumentException( "Document length=" + doc.getLength() + ". offs=" + offs); } if (direction != SwingConstants.NEXT && direction != SwingConstants.PREVIOUS) { throw new IllegalArgumentException( "direction=" + direction + ". Either SwingConstants.NEXT or SwingConstants.PREVIOUS is expected."); } DocumentElement run = null; try { doc.readLock(); if (direction == SwingConstants.NEXT) { run = (DocumentElement) doc.getRunMLElement(offs); if (run.getStartOffset() == offs) { ;//return run } else if (run.getEndOffset() == doc.getLength()) { run = null; } else { run = (DocumentElement) doc.getRunMLElement(run.getEndOffset()); } } else { run = (DocumentElement) doc.getRunMLElement(offs - 1); if (run.getEndOffset() == offs) { ;//return run } else if (run.getStartOffset() == 0){ run = null; } else { run = (DocumentElement) doc.getRunMLElement(run.getStartOffset() - 1); } } } finally { doc.readUnlock(); } return run; } public static final int getWordStart(WordMLTextPane editor, int offs) throws BadLocationException { WordMLDocument doc = (WordMLDocument) editor.getDocument(); if (offs == doc.getLength()) { throw new BadLocationException("No word at " + offs, offs); } Element para = doc.getParagraphMLElement(offs, true); int paraStart = para.getStartOffset(); int paraEnd = para.getEndOffset(); Segment seg = new Segment(); doc.getText(paraStart, paraEnd - paraStart, seg); if (seg.count > 0) { BreakIterator words = BreakIterator.getWordInstance(editor .getLocale()); words.setText(seg); int wordPosition = seg.offset + offs - paraStart; if (wordPosition >= words.last()) { wordPosition = words.last() - 1; } words.following(wordPosition); offs = paraStart + words.previous() - seg.offset; } return offs; } public static final int getWordEnd(WordMLTextPane editor, int offs) throws BadLocationException { WordMLDocument doc = (WordMLDocument) editor.getDocument(); if (offs == doc.getLength()) { throw new BadLocationException("No word at " + offs, offs); } Element para = doc.getParagraphMLElement(offs, true); int paraStart = para.getStartOffset(); int paraEnd = para.getEndOffset(); Segment seg = new Segment(); doc.getText(paraStart, paraEnd - paraStart, seg); if (seg.count > 0) { BreakIterator words = BreakIterator.getWordInstance(editor .getLocale()); words.setText(seg); int wordPosition = offs - paraStart + seg.offset; if (wordPosition >= words.last()) { wordPosition = words.last() - 1; } offs = paraStart + words.following(wordPosition) - seg.offset; } return offs; } /** * To determine whether DocumentElement's ElementML can be split into two * at 'atIndex' offset position. * * @param elem * @param atIndex * @return true if ElementML can be split; * false, otherwise */ public final static boolean canSplitElementML(DocumentElement elem, int atIndex) { if (elem.getStartOffset() == elem.getEndOffset() || elem.getParentElement() == null) { throw new IllegalArgumentException("Invalid elem=" + elem); } int offset = elem.getStartOffset() + atIndex; if (offset <= elem.getStartOffset() || elem.getEndOffset() <= offset) { throw new IllegalArgumentException("Invalid atIndex=" + atIndex); } boolean canSplit = false; ElementML elemML = elem.getElementML(); if (!elemML.isImplied() && (elemML instanceof SdtBlockML || elemML instanceof ParagraphML || elemML instanceof RunML || elemML instanceof RunContentML)) { WordMLDocument doc = (WordMLDocument) elem.getDocument(); try { new TextSelector(doc, offset, elem.getEndOffset() - offset); canSplit = true; } catch (BadSelectionException exc) { //Leaf element at 'offset' position //may be not editable. } } return canSplit; } /** * Splits DocumentElement's ElementML into two at 'atIndex' offset position. * These two resulting ElementML(s) will become siblings. * One is the existing one and the other is newly created. * * Please note that user is expected to call canSplitElementML() method * before executing this method. * * @param elem * @param atIndex * @return the newly created sibling of DocumentElement's ElementML. */ public final static ElementML splitElementML(DocumentElement elem, int atIndex) { WordMLDocument doc = (WordMLDocument) elem.getDocument(); int offset = elem.getStartOffset() + atIndex; TextSelector ts = null; try { ts = new TextSelector(doc, offset, elem.getEndOffset() - offset); } catch (BadSelectionException exc) { throw new IllegalArgumentException("Unable to split elem=" + elem); } List<ElementML> deletedElementMLs = null; List<DocumentElement> list = ts.getDocumentElements(); //Check first element DocumentElement tempE = list.get(0); if (tempE.isLeaf() && tempE.getStartOffset() < offset) { //Split into two RunContentMLs RunContentML leftML = (RunContentML) tempE.getElementML(); RunContentML rightML = (RunContentML) leftML.clone(); if (!leftML.isDummy()) { try { int start = tempE.getStartOffset(); int length = tempE.getEndOffset() - start; String text = doc.getText(start, length); String left = text.substring(0, offset - start); String right = text.substring(offset - start); leftML.setTextContent(left); rightML.setTextContent(right); } catch (BadLocationException exc) { ;// ignore } } // Prevent leftML from being deleted list.remove(0); deletedElementMLs = DocUtil.deleteElementML(list); // Include rightML as a deleted ElementML deletedElementMLs.add(0, rightML); } else { deletedElementMLs = DocUtil.deleteElementML(list); } list = null; ElementML elemML = elem.getElementML(); ElementML newSibling = null; if (elemML instanceof SdtBlockML) { newSibling = ElementMLFactory.createSdtBlockML(); List<ElementML> paragraphContents = new ArrayList<ElementML>(); for (ElementML ml: deletedElementMLs) { if (!ml.isImplied()) { if (ml instanceof RunML || ml instanceof RunContentML) { paragraphContents.add(ml); } else { newSibling.addChild(ml); } } } deletedElementMLs = null; if (!paragraphContents.isEmpty()) { tempE = (DocumentElement) doc.getParagraphMLElement(offset, false); ParagraphML paraML = (ParagraphML) tempE.getElementML(); ParagraphPropertiesML pPr = (ParagraphPropertiesML) paraML.getParagraphProperties(); if (pPr != null) { pPr = (ParagraphPropertiesML) pPr.clone(); } tempE = (DocumentElement) doc.getRunMLElement(offset); RunML runML = (RunML) tempE.getElementML(); RunPropertiesML rPr = (RunPropertiesML) runML.getRunProperties(); if (rPr != null) { rPr = (RunPropertiesML) rPr.clone(); } ElementML newParaML = ElementMLFactory.createParagraphML(paragraphContents, pPr, rPr); newSibling.addChild(0, newParaML); } } else if (elemML instanceof ParagraphML) { ParagraphML paraML = (ParagraphML) elemML; ParagraphPropertiesML pPr = (ParagraphPropertiesML) paraML.getParagraphProperties(); if (pPr != null) { pPr = (ParagraphPropertiesML) pPr.clone(); } DocumentElement runE = (DocumentElement) doc.getRunMLElement(offset); RunML runML = (RunML) runE.getElementML(); RunPropertiesML rPr = (RunPropertiesML) runML.getRunProperties(); if (rPr != null) { rPr = (RunPropertiesML) rPr.clone(); } newSibling = ElementMLFactory.createParagraphML(deletedElementMLs, pPr, rPr); } else if (elemML instanceof RunML) { RunML runML = (RunML) elemML; RunPropertiesML rPr = (RunPropertiesML) runML.getRunProperties(); if (rPr != null) { rPr = (RunPropertiesML) rPr.clone(); } newSibling = ElementMLFactory.createRunML(deletedElementMLs, rPr); } else { //must be a RunContentML newSibling = deletedElementMLs.get(0); } elemML.addSibling(newSibling, true); return newSibling; } public final static List<ElementML> deleteElementML(List<DocumentElement> list) { List<ElementML> deletedElementMLs = new ArrayList<ElementML>(list.size()); List<FldComplexML> fldComplexes = null; for (int i=0; i < list.size(); i++) { DocumentElement tempE = (DocumentElement) list.get(i); if (log.isDebugEnabled()) { log.debug("deleteElementML(): elem[" + i + "]=" + tempE); } ElementML ml = tempE.getElementML(); ml.delete(); deletedElementMLs.add(ml); if (ml.getGodParent() != null) { if (ml.getGodParent() instanceof FldComplexML) { FldComplexML fc = (FldComplexML) ml.getGodParent(); if (fldComplexes == null) { fldComplexes = new ArrayList<FldComplexML>(); fldComplexes.add(fc); } else if (fldComplexes.lastIndexOf(fc) == -1) { fldComplexes.add(fc); } else { //do nothing } } ml.getGodParent().deleteChild(ml); } } if (fldComplexes != null) { for (FldComplexML fc: fldComplexes) { //Those FldComplexML whose STFldCharType.BEGIN //or STFldCharType.END have been deleted //may have hidden children. //These hidden children also have to be deleted. //Recall that FldComplexML has instruction part //and value part. If instruction part is being //rendered then value part is hidden. RunML first = (RunML) fc.getChild(0); RunML last = (RunML) fc.getChild(fc.getChildrenCount()-1); if (first.getFldChar() == null || last.getFldChar() == null) { List<ElementML> remains = new ArrayList<ElementML>(fc.getChildren()); for (ElementML ml: remains) { //We do not put these remains //into 'deletedElementMLs' //because they are hidden. ml.delete(); } fc.delete(); } } } return deletedElementMLs; } public final static List<ElementSpec> getElementSpecs(ElementML elem) { ElementMLIterator parser = new ElementMLIterator(elem); ElementMLIteratorCallback result = new ElementMLIteratorCallback(); parser.cruise(result); return result.getElementSpecs(); } public final static List<String> getElementNamePath(DocumentElement elem, int pos) { List<String> thePath = null; if (elem.getStartOffset() <= pos && pos < elem.getEndOffset()) { thePath = new ArrayList<String>(); String name = elem.getElementML().getClass().getSimpleName(); thePath.add(name); while (!elem.isLeaf()) { int idx = elem.getElementIndex(pos); elem = (DocumentElement) elem.getElement(idx); ElementML ml = elem.getElementML(); if (ml.getGodParent() != null) { name = ml.getGodParent().getClass().getSimpleName(); thePath.add(name); } name = elem.getElementML().getClass().getSimpleName(); thePath.add(name); } } return thePath; } public final static DocumentElement getCommonParentElement( DocumentElement elem1, DocumentElement elem2) { if (elem1.getDocument() != elem2.getDocument()) { throw new IllegalArgumentException("Elements belong to two different documents"); } List<Element> path = new ArrayList<Element>(); Element temp = elem1; while (temp != null) { path.add(temp); temp = temp.getParentElement(); } temp = elem2; while (temp != null && path.indexOf(temp) == -1) { temp = temp.getParentElement(); } return (DocumentElement) temp; } /** * Returns a list of style names defined in document paragraphs and tables. * * @param doc * @return a list of style IDs. */ public final static List<String> getDefinedParagraphStyles(WordMLDocument doc) { Set<String> styles = new HashSet<String>(); try { doc.readLock(); //Default paragraph style AttributeSet attrs = doc.getStyleSheet().getStyle(StyleSheet.DEFAULT_STYLE); String styleName = (String) attrs.getAttribute( WordMLStyleConstants.DefaultParagraphStyleNameAttribute); styles.add(styleName); DocumentElement root = (DocumentElement) doc.getDefaultRootElement(); DocumentElement paraE = (DocumentElement) doc.getParagraphMLElement(0, false); while (paraE.getStartOffset() < doc.getLength()) { DocumentElement elem = paraE; while (elem != root) { attrs = elem.getAttributes(); String styleID = null; if (attrs.isDefined(WordMLStyleConstants.PStyleAttribute)) { styleID = (String) attrs.getAttribute( WordMLStyleConstants.PStyleAttribute); } if (attrs.isDefined(WordMLStyleConstants.TblStyleAttribute)) { styleID = (String) attrs.getAttribute( WordMLStyleConstants.TblStyleAttribute); } if (styleID != null) { //Search for style name Style temp = doc.getStyleSheet().getIDStyle(styleID); if (temp != null) { styleName = (String) temp.getAttribute(WordMLStyleConstants.StyleUINameAttribute); if (styleName != null) { styles.add(styleName); } } } elem = (DocumentElement) elem.getParentElement(); } //while (elem != root) paraE = (DocumentElement) doc.getParagraphMLElement(paraE.getEndOffset(), false); } //while (pos) } finally { doc.readUnlock(); } List<String> list = new ArrayList<String>(styles.size()); list.addAll(styles); Collections.sort(list); return list; } public final static List<Integer> getOffsetsOfParagraphSignature(WordMLDocument doc) { List<Integer> positions = new ArrayList<Integer>(); try { doc.readLock(); String s = doc.getText(0, doc.getLength()); int idx = s.indexOf(Constants.GROUPING_SIGNATURE); while (idx != -1) { DocumentElement para = (DocumentElement) doc.getParagraphMLElement(idx, false); if (para.getStartOffset() == idx && para.getParentElement() == doc.getDefaultRootElement()) { positions.add(Integer.valueOf(idx)); } idx = s.indexOf(Constants.GROUPING_SIGNATURE, idx + 2); } } catch (BadLocationException exc) { ;//should not happen } finally { doc.readUnlock(); } if (positions.isEmpty()) { positions = null; } return positions; } public final static List<Integer> getOffsetsOfStyledParagraphs( WordMLDocument doc, List<String> selectedStyleNames) { List<Integer> thePositions = new ArrayList<Integer>(); Style defaultStyle = doc.getStyleSheet().getStyle(StyleSheet.DEFAULT_STYLE); String defaultPStyle = (String) defaultStyle.getAttribute( WordMLStyleConstants.DefaultParagraphStyleNameAttribute); final DocumentElement root = (DocumentElement) doc.getDefaultRootElement(); for (int idx = 0; idx < root.getElementCount() - 1; idx++) { DocumentElement elem = (DocumentElement) root.getElement(idx); AttributeSet attrs = elem.getAttributes(); String styleID = null; if (attrs.isDefined(WordMLStyleConstants.PStyleAttribute)) { styleID = (String) attrs.getAttribute( WordMLStyleConstants.PStyleAttribute); } if (attrs.isDefined(WordMLStyleConstants.TblStyleAttribute)) { styleID = (String) attrs.getAttribute( WordMLStyleConstants.TblStyleAttribute); } if (styleID != null) { //Get style name Style temp = doc.getStyleSheet().getIDStyle(styleID); if (temp != null) { String styleName = (String) temp.getAttribute( WordMLStyleConstants.StyleUINameAttribute); if (styleName != null && selectedStyleNames.contains(styleName)) { //if style name is registered, put elem.getStartOffset() //in the returned list. thePositions.add(Integer.valueOf(elem.getStartOffset())); } } } else if (selectedStyleNames.contains(defaultPStyle)) { //Put elem.getStartOffset() in the returned list //because default style is listed in selectedStyleNames. thePositions.add(Integer.valueOf(elem.getStartOffset())); } } //for (idx) loop if (thePositions.isEmpty()) { thePositions = null; } return thePositions; } public final static void displayXml(Document doc) { org.docx4j.wml.Document jaxbDoc = null; if (doc instanceof WordMLDocument) { DocumentElement root = (DocumentElement) doc .getDefaultRootElement(); jaxbDoc = (org.docx4j.wml.Document) root.getElementML() .getDocxObject(); } else { WordprocessingMLPackage wmlPackage = (WordprocessingMLPackage) doc.getProperty( WordMLDocument.WML_PACKAGE_PROPERTY); jaxbDoc = (org.docx4j.wml.Document) wmlPackage.getMainDocumentPart().getJaxbElement(); } List<Object> list = jaxbDoc.getBody().getEGBlockLevelElts(); int i = 0; for (Object obj : list) { String s = XmlUtils.marshaltoString(obj, true); log.debug("displayXml(): BodyChild[" + i + "]=" + s); i++; } } public final static void displayStructure(Document doc) { Element e = doc.getDefaultRootElement(); displayStructure(doc, e, 0); } public final static void displayStructure(Document doc, Element elem, int numberOfTabs) { String leftMargin = getTabSpace(numberOfTabs); //====== Display Element class name ====== StringBuffer sb = new StringBuffer(leftMargin); sb.append("===== Element Class: "); sb.append(elem.getClass().getSimpleName()); log.debug(sb.toString()); //====== Display the Element offset position ====== int startOffset = elem.getStartOffset(); int endOffset = elem.getEndOffset(); sb = new StringBuffer(leftMargin); sb.append("Offsets ["); sb.append(startOffset); sb.append(", "); sb.append(endOffset); sb.append("]"); log.debug(sb.toString()); //====== Display the Element Attributes ====== AttributeSet attr = elem.getAttributes(); Enumeration<?> nameEnum = attr.getAttributeNames(); sb = new StringBuffer(leftMargin); sb.append("ATTRIBUTES:"); log.debug(sb.toString()); while (nameEnum.hasMoreElements()) { sb = new StringBuffer(leftMargin); Object attrName = nameEnum.nextElement(); sb.append(" (" + attrName + ", " + attr.getAttribute(attrName) + ")"); log.debug(sb.toString()); } //====== Display text content for a leaf element ====== if (elem.isLeaf()) { sb = new StringBuffer(leftMargin); try { String text = doc.getText(startOffset, endOffset - startOffset); if (text.length() > 25) { text = text.substring(0, 25); } sb.append("["); int lf = text.indexOf(Constants.NEWLINE); if (lf >= 0) { sb.append(text.substring(0, lf)); sb.append("<<NEWLINE>>"); sb.append(text.substring(lf + 1)); } else { sb.append(text); } sb.append("]"); log.debug(sb.toString()); } catch (BadLocationException ex) { } } //====== Display child elements ====== int count = elem.getElementCount(); for (int i = 0; i < count; i++) { displayStructure(doc, elem.getElement(i), numberOfTabs + 1); } } public final static void displayStructure(List<ElementSpec> list) { int depth = -1; for (int i = 0; i < list.size(); i++) { ElementSpec es = list.get(i); StringBuffer info = new StringBuffer(); ElementML elemML = (es.getAttributes() != null) ? WordMLStyleConstants.getElementML(es.getAttributes()) : null; if (es.getType() == ElementSpec.StartTagType) { if (elemML == null) { info.append(getTabSpace(++depth)); info.append("OPEN <NULL> - ..."); } else { info.append(getTabSpace(++depth)); info.append("OPEN <"); info.append(elemML.getTag()); info.append("> - "); info.append(elemML.toString()); } } else if (es.getType() == ElementSpec.ContentType) { if (elemML == null) { info.append(getTabSpace(depth + 1)); info.append("TEXT - RunContentML=NULL [...]"); } else { String text = ((RunContentML) elemML).getTextContent(); if (text.length() > 25) { text = text.substring(0, 25); } StringBuffer sb = new StringBuffer(); int lf = text.indexOf(Constants.NEWLINE); if (lf >= 0) { sb.append(text.substring(0, lf)); sb.append("<<NEWLINE>>"); sb.append(text.substring(lf + 1)); } else { sb.append(text); } info.append(getTabSpace(depth + 1)); info.append("TEXT - "); info.append(elemML.toString()); info.append("["); info.append(sb.toString()); info.append("]"); } } else { if (elemML == null) { info.append(getTabSpace(depth--)); info.append("CLOSE <NULL> - ..."); } else { info.append(getTabSpace(depth--)); info.append("CLOSE <"); info.append(elemML.getTag()); info.append("> - "); info.append(elemML.toString()); } depth = Math.max(depth, -1); } log.debug(info.toString()); } } private final static String getTabSpace(int numberOfTabs) { StringBuffer theSpace = new StringBuffer(); for (int i = 0; i < numberOfTabs; i++) { theSpace.append(TAB); } return theSpace.toString(); } private DocUtil() { ;//uninstantiable } }// DocUtil class