package com.tom_roush.pdfbox.pdmodel; import com.tom_roush.pdfbox.cos.COSArray; import com.tom_roush.pdfbox.cos.COSBase; import com.tom_roush.pdfbox.cos.COSDictionary; import com.tom_roush.pdfbox.cos.COSInteger; import com.tom_roush.pdfbox.cos.COSName; import com.tom_roush.pdfbox.pdmodel.common.COSObjectable; import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Queue; /** * The page tree, which defines the ordering of pages in the document in an efficient manner. * * @author John Hewson */ public class PDPageTree implements COSObjectable, Iterable<PDPage> { private final COSDictionary root; private final PDDocument document; // optional /** * Constructor for embedding. */ public PDPageTree() { root = new COSDictionary(); root.setItem(COSName.TYPE, COSName.PAGES); root.setItem(COSName.KIDS, new COSArray()); root.setItem(COSName.COUNT, COSInteger.ZERO); document = null; } /** * Constructor for reading. * * @param root A page tree root. */ public PDPageTree(COSDictionary root) { if (root == null) { throw new IllegalArgumentException("root cannot be null"); } this.root = root; document = null; } /** * Constructor for reading. * * @param root A page tree root. * @param document The document which contains "root". */ PDPageTree(COSDictionary root, PDDocument document) { if (root == null) { throw new IllegalArgumentException("root cannot be null"); } this.root = root; this.document = document; } /** * Returns the given attribute, inheriting from parent tree nodes if necessary. * * @param node page object * @param key the key to look up * @return COS value for the given key */ public static COSBase getInheritableAttribute(COSDictionary node, COSName key) { COSBase value = node.getDictionaryObject(key); if (value != null) { return value; } COSDictionary parent = (COSDictionary) node.getDictionaryObject(COSName.PARENT, COSName.P); if (parent != null) { return getInheritableAttribute(parent, key); } return null; } /** * Returns an iterator which walks all pages in the tree, in order. */ @Override public Iterator<PDPage> iterator() { return new PageIterator(root); } /** * Helper to get kids from malformed PDFs. * @param node page tree node * @return list of kids */ private List<COSDictionary> getKids(COSDictionary node) { List<COSDictionary> result = new ArrayList<COSDictionary>(); COSArray kids = (COSArray)node.getDictionaryObject(COSName.KIDS); if (kids == null) { // probably a malformed PDF return result; } for (int i = 0, size = kids.size(); i < size; i++) { result.add((COSDictionary)kids.getObject(i)); } return result; } /** * Iterator which walks all pages in the tree, in order. */ private final class PageIterator implements Iterator<PDPage> { private final Queue<COSDictionary> queue = new ArrayDeque<COSDictionary>(); private PageIterator(COSDictionary node) { enqueueKids(node); } private void enqueueKids(COSDictionary node) { if (isPageTreeNode(node)) { List<COSDictionary> kids = getKids(node); for (COSDictionary kid : kids) { enqueueKids(kid); } } else { queue.add(node); } } @Override public boolean hasNext() { return !queue.isEmpty(); } @Override public PDPage next() { COSDictionary next = queue.poll(); // sanity check if (next.getCOSName(COSName.TYPE) != COSName.PAGE) { throw new IllegalStateException("Expected Page but got " + next); } ResourceCache resourceCache = document != null ? document.getResourceCache() : null; return new PDPage(next, resourceCache); } @Override public void remove() { throw new UnsupportedOperationException(); } } /** * Returns the page at the given index. * * @param index zero-based index */ public PDPage get(int index) { COSDictionary dict = get(index + 1, root, 0); // sanity check if (dict.getCOSName(COSName.TYPE) != COSName.PAGE) { throw new IllegalStateException("Expected Page but got " + dict); } ResourceCache resourceCache = document != null ? document.getResourceCache() : null; return new PDPage(dict, resourceCache); } /** * Returns the given COS page using a depth-first search. * * @param pageNum 1-based page number * @param node page tree node to search * @param encountered number of pages encountered so far * @return COS dictionary of the Page object */ private COSDictionary get(int pageNum, COSDictionary node, int encountered) { if (pageNum < 0) { throw new IndexOutOfBoundsException("Index out of bounds: " + pageNum); } if (isPageTreeNode(node)) { int count = node.getInt(COSName.COUNT, 0); if (pageNum <= encountered + count) { // it's a kid of this node for (COSDictionary kid : getKids(node)) { // which kid? if (isPageTreeNode(kid)) { int kidCount = kid.getInt(COSName.COUNT, 0); if (pageNum <= encountered + kidCount) { // it's this kid return get(pageNum, kid, encountered); } else { encountered += kidCount; } } else { // single page encountered++; if (pageNum == encountered) { // it's this page return get(pageNum, kid, encountered); } } } throw new IllegalStateException(); } else { throw new IndexOutOfBoundsException("Index out of bounds: " + pageNum); } } else { if (encountered == pageNum) { return node; } else { throw new IllegalStateException(); } } } /** * Returns true if the node is a page tree node (i.e. and intermediate). */ private boolean isPageTreeNode(COSDictionary node ) { // some files such as PDFBOX-2250-229205.pdf don't have Pages set as the Type, so we have // to check for the presence of Kids too return node.getCOSName(COSName.TYPE) == COSName.PAGES || node.containsKey(COSName.KIDS); } /** * Returns the index of the given page, or -1 if it does not exist. * * @param page The page to search for. * @return the zero-based index of the given page, or -1 if the page is not found. */ public int indexOf(PDPage page) { SearchContext context = new SearchContext(page); if (findPage(context, root)) { return context.index; } return -1; } private boolean findPage(SearchContext context, COSDictionary node) { for (COSDictionary kid : getKids(node)) { if (context.found) { break; } if (isPageTreeNode(kid)) { findPage(context, kid); } else { context.visitPage(kid); } } return context.found; } private static final class SearchContext { private final COSDictionary searched; private int index = -1; private boolean found; private SearchContext(PDPage page) { this.searched = page.getCOSObject(); } private void visitPage(COSDictionary current) { index++; found = searched.equals(current); } } /** * Returns the number of leaf nodes (page objects) that are descendants of this root within the * page tree. */ public int getCount() { return root.getInt(COSName.COUNT, 0); } @Override public COSDictionary getCOSObject() { return root; } /** * Removes the page with the given index from the page tree. * @param index zero-based page index */ public void remove(int index) { COSDictionary node = get(index + 1, root, 0); remove(node); } /** * Removes the given page from the page tree. * * @param page The page to remove. */ public void remove(PDPage page) { remove(page.getCOSObject()); } /** * Removes the given COS page. */ private void remove(COSDictionary node) { // remove from parent's kids COSDictionary parent = (COSDictionary) node.getDictionaryObject(COSName.PARENT, COSName.P); COSArray kids = (COSArray)parent.getDictionaryObject(COSName.KIDS); if (kids.removeObject(node)) { // update ancestor counts do { node = (COSDictionary) node.getDictionaryObject(COSName.PARENT, COSName.P); if (node != null) { node.setInt(COSName.COUNT, node.getInt(COSName.COUNT) - 1); } } while (node != null); } } /** * Adds the given page to this page tree. * * @param page The page to add. */ public void add(PDPage page) { // set parent COSDictionary node = page.getCOSObject(); node.setItem(COSName.PARENT, root); // todo: re-balance tree? (or at least group new pages into tree nodes of e.g. 20) // add to parent's kids COSArray kids = (COSArray)root.getDictionaryObject(COSName.KIDS); kids.add(node); // update ancestor counts do { node = (COSDictionary) node.getDictionaryObject(COSName.PARENT, COSName.P); if (node != null) { node.setInt(COSName.COUNT, node.getInt(COSName.COUNT) + 1); } } while (node != null); } }