package org.docx4j.samples;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.docx4j.TraversalUtil;
import org.docx4j.XmlUtils;
import org.docx4j.finders.RangeFinder;
import org.docx4j.jaxb.Context;
import org.docx4j.model.fields.merge.DataFieldName;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.WordprocessingML.MainDocumentPart;
import org.docx4j.wml.Body;
import org.docx4j.wml.CTBookmark;
import org.docx4j.wml.CTMarkupRange;
import org.docx4j.wml.ContentAccessor;
import org.docx4j.wml.P;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Perform certain bookmark integrity checks, and optionally, write a fixed output docx
*/
public class BookmarksDuplicateCheck {
// protected static Logger log = LoggerFactory.getLogger(BookmarksDuplicateCheck.class);
/**
* Whether to attempt
*/
private static boolean remediate = true;
private static org.docx4j.wml.ObjectFactory factory = Context.getWmlObjectFactory();
public static void main(String[] args) throws Exception {
WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage
.load(new java.io.File(System.getProperty("user.dir")
+ "/your.docx"));
MainDocumentPart documentPart = wordMLPackage.getMainDocumentPart();
// Before..
// System.out.println(XmlUtils.marshaltoString(documentPart.getJaxbElement(), true, true));
org.docx4j.wml.Document wmlDocumentEl = (org.docx4j.wml.Document) documentPart
.getJaxbElement();
Body body = wmlDocumentEl.getBody();
BookmarksDuplicateCheck bti = new BookmarksDuplicateCheck();
List<Object> faulty = bti.inspectBookmarks(body.getContent());
if (remediate) {
for (Object o : faulty) {
if (o instanceof CTBookmark) {
CTBookmark start = (CTBookmark)o;
Object parent = start.getParent();
if (parent instanceof ContentAccessor) {
if (remove( ((ContentAccessor)parent).getContent(), o)) {
} else {
System.out.println("Couldn't find start " + start.getName() );
}
} else {
System.out.println("TODO: handle parent:" + parent.getClass().getName());
}
}
if (o instanceof CTMarkupRange /* ends */
&& (!(o instanceof CTBookmark) /* exclude starts - note inheritance hierarchy */ )) {
CTMarkupRange end = (CTMarkupRange)o;
Object parent = end.getParent();
if (parent instanceof ContentAccessor) {
if (remove( ((ContentAccessor)parent).getContent(), o)) {
} else {
System.out.println("Couldn't find end " + end.getId().longValue() );
}
} else {
System.out.println("TODO: handle parent:" + parent.getClass().getName());
}
}
}
if (faulty.size()==0) {
System.out.println("Nothing to fix");
} else {
// System.out.println(XmlUtils.marshaltoString(documentPart.getJaxbElement(), true, true));
wordMLPackage.save(new java.io.File(System.getProperty("user.dir") + "/OUT_BookmarksRemediated.docx"));
}
}
}
private static boolean remove(List list, Object deletion) {
int i = getIndex(list, deletion);
if (i>=0) {
Object o = list.remove(i);
return (o!=null);
}
return false;
}
private static int getIndex(List list, Object deletion) {
int i = 0;
for (Object o : list) {
if (o==deletion
|| XmlUtils.unwrap(o)==deletion) {
return i;
}
i++;
}
return -1;
}
private List<Object> inspectBookmarks(List<Object> paragraphs) throws Exception {
Set<String> names = new HashSet<String>();
Set<BigInteger> startIds = new HashSet<BigInteger>();
Set<BigInteger> endIds = new HashSet<BigInteger>();
List<Object> faulty = new ArrayList<Object>();
RangeFinder rt = new RangeFinder("CTBookmark", "CTMarkupRange");
new TraversalUtil(paragraphs, rt);
System.out.println("Checking starts " );
for (CTBookmark bm : rt.getStarts()) {
BigInteger id = bm.getId();
String name = bm.getName();
if (name==null && id == null) {
System.out.println("Name and ID missing!");
faulty.add(bm);
} else if (name!=null && id != null) {
if (!names.add(name)) {
System.out.println("Already have " + name);
faulty.add(bm);
}
if (!startIds.add(id)) {
System.out.println("Already have " + id.longValue());
faulty.add(bm);
}
} else if (name==null) {
System.out.println("Name missing for id " + id.longValue());
if (!startIds.add(id)) {
System.out.println(".. and already have " + id.longValue());
faulty.add(bm);
}
} else if (id==null) {
System.out.println("ID missing for name " + name);
if (!names.add(name)) {
System.out.println(".. and already have " + name);
faulty.add(bm);
}
}
}
System.out.println("Checking ends " );
for (CTMarkupRange bm : rt.getEnds()) {
BigInteger id = bm.getId();
if (id == null) {
System.out.println("ID missing!");
faulty.add(bm);
} else if (id != null) {
if (!endIds.add(id)) {
System.out.println("Already have " + id.longValue());
faulty.add(bm);
}
}
}
System.out.println("Matching ends" );
for (BigInteger i : startIds) {
if (!endIds.contains(i)) {
System.out.println(" Missing end for start " + i.longValue());
faulty.add(find(rt.getStarts(), i)); // so remove the corresponding start
}
}
System.out.println("Matching starts" );
for (BigInteger i : endIds) {
if (!startIds.contains(i)) {
System.out.println(" Missing start for end " + i.longValue());
faulty.add(find(rt.getEnds(), i)); // so remove the corresponding end
}
}
System.out.println("Total faulty objects: " + faulty.size());
return faulty;
}
private CTBookmark find(List<CTBookmark> starts, BigInteger id) {
for (CTBookmark bm : starts) {
if (bm.getId()==id) {
return bm;
}
}
return null; //shouldn't happen
}
private CTMarkupRange find(List<CTMarkupRange> ends, BigInteger id) {
for (CTMarkupRange bm : ends) {
if (bm.getId()==id) {
return bm;
}
}
return null; //shouldn't happen
}
}