package com.tom_roush.pdfbox.pdfparser;
import android.util.Log;
import com.tom_roush.pdfbox.cos.COSDictionary;
import com.tom_roush.pdfbox.cos.COSName;
import com.tom_roush.pdfbox.cos.COSObjectKey;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
/**
* This class will collect all XRef/trailer objects and creates correct
* xref/trailer information after all objects are read using startxref
* and 'Prev' information (unused XRef/trailer objects are discarded).
*
* In case of missing startxref or wrong startxref pointer all
* XRef/trailer objects are used to create xref table / trailer dictionary
* in order they occur.
*
* For each new xref object/XRef stream method {@link #nextXrefObj(long, XRefType)}
* must be called with start byte position. All following calls to
* {@link #setXRef(COSObjectKey, long)} or {@link #setTrailer(COSDictionary)}
* will add the data for this byte position.
*
* After all objects are parsed the startxref position must be provided
* using {@link #setStartxref(long)}. This is used to build the chain of
* active xref/trailer objects used for creating document trailer and xref table.
*
* @author Timo Böhme
*/
public class XrefTrailerResolver
{
/**
* A class which represents a xref/trailer object.
*/
private class XrefTrailerObj
{
protected COSDictionary trailer = null;
private XRefType xrefType;
private final Map<COSObjectKey, Long> xrefTable = new HashMap<COSObjectKey, Long>();
/**
* Default constructor.
*/
private XrefTrailerObj()
{
xrefType = XRefType.TABLE;
}
}
/**
* The XRefType of a trailer.
*/
public enum XRefType
{
/**
* XRef table type.
*/
TABLE,
/**
* XRef stream type.
*/
STREAM
}
private final Map<Long, XrefTrailerObj> bytePosToXrefMap = new HashMap<Long, XrefTrailerObj>();
private XrefTrailerObj curXrefTrailerObj = null;
private XrefTrailerObj resolvedXrefTrailer = null;
/**
* Returns the first trailer if at least one exists.
*
* @return the first trailer or null
*/
public final COSDictionary getFirstTrailer()
{
if (bytePosToXrefMap.isEmpty())
{
return null;
}
Set<Long> offsets = bytePosToXrefMap.keySet();
SortedSet<Long> sortedOffset = new TreeSet<Long>(offsets);
return bytePosToXrefMap.get(sortedOffset.first()).trailer;
}
/**
* Returns the last trailer if at least one exists.
*
* @return the last trailer ir null
*/
public final COSDictionary getLastTrailer()
{
if (bytePosToXrefMap.isEmpty())
{
return null;
}
Set<Long> offsets = bytePosToXrefMap.keySet();
SortedSet<Long> sortedOffset = new TreeSet<Long>(offsets);
return bytePosToXrefMap.get(sortedOffset.last()).trailer;
}
/**
* Signals that a new XRef object (table or stream) starts.
* @param startBytePos the offset to start at
* @param type the type of the Xref object
*/
public void nextXrefObj( final long startBytePos, XRefType type )
{
bytePosToXrefMap.put( startBytePos, curXrefTrailerObj = new XrefTrailerObj() );
curXrefTrailerObj.xrefType = type;
}
/**
* Returns the XRefTxpe of the resolved trailer.
*
* @return the XRefType or null.
*/
public XRefType getXrefType()
{
return ( resolvedXrefTrailer == null ) ? null : resolvedXrefTrailer.xrefType;
}
/**
* Populate XRef HashMap of current XRef object.
* Will add an Xreftable entry that maps ObjectKeys to byte offsets in the file.
* @param objKey The objkey, with id and gen numbers
* @param offset The byte offset in this file
*/
public void setXRef( COSObjectKey objKey, long offset )
{
if ( curXrefTrailerObj == null )
{
// should not happen...
Log.w("PdfBox-Android", "Cannot add XRef entry for '" + objKey.getNumber() + "' because XRef start was not signalled." );
return;
}
curXrefTrailerObj.xrefTable.put( objKey, offset );
}
/**
* Adds trailer information for current XRef object.
*
* @param trailer the current document trailer dictionary
*/
public void setTrailer( COSDictionary trailer )
{
if ( curXrefTrailerObj == null )
{
// should not happen...
Log.w("PdfBox-Android", "Cannot add trailer because XRef start was not signalled." );
return;
}
curXrefTrailerObj.trailer = trailer;
}
/**
* Returns the trailer last set by {@link #setTrailer(COSDictionary)}.
*
* @return the current trailer.
*
*/
public COSDictionary getCurrentTrailer()
{
return curXrefTrailerObj.trailer;
}
/**
* Sets the byte position of the first XRef
* (has to be called after very last startxref was read).
* This is used to resolve chain of active XRef/trailer.
*
* In case startxref position is not found we output a
* warning and use all XRef/trailer objects combined
* in byte position order.
* Thus for incomplete PDF documents with missing
* startxref one could call this method with parameter value -1.
*
* @param startxrefBytePosValue starting position of the first XRef
*
*/
public void setStartxref( long startxrefBytePosValue )
{
if ( resolvedXrefTrailer != null )
{
Log.w("PdfBox-Android", "Method must be called only ones with last startxref value." );
return;
}
resolvedXrefTrailer = new XrefTrailerObj();
resolvedXrefTrailer.trailer = new COSDictionary();
XrefTrailerObj curObj = bytePosToXrefMap.get( startxrefBytePosValue );
List<Long> xrefSeqBytePos = new ArrayList<Long>();
if ( curObj == null )
{
// no XRef at given position
Log.w("PdfBox-Android", "Did not found XRef object at specified startxref position " + startxrefBytePosValue );
// use all objects in byte position order (last entries overwrite previous ones)
xrefSeqBytePos.addAll( bytePosToXrefMap.keySet() );
Collections.sort( xrefSeqBytePos );
}
else
{
// copy xref type
resolvedXrefTrailer.xrefType = curObj.xrefType;
// found starting Xref object
// add this and follow chain defined by 'Prev' keys
xrefSeqBytePos.add( startxrefBytePosValue );
while ( curObj.trailer != null )
{
long prevBytePos = curObj.trailer.getLong( COSName.PREV, -1L );
if ( prevBytePos == -1 )
{
break;
}
curObj = bytePosToXrefMap.get( prevBytePos );
if ( curObj == null )
{
Log.w("PdfBox-Android", "Did not found XRef object pointed to by 'Prev' key at position " + prevBytePos );
break;
}
xrefSeqBytePos.add( prevBytePos );
// sanity check to prevent infinite loops
if ( xrefSeqBytePos.size() >= bytePosToXrefMap.size() )
{
break;
}
}
// have to reverse order so that later XRefs will overwrite previous ones
Collections.reverse( xrefSeqBytePos );
}
// merge used and sorted XRef/trailer
for ( Long bPos : xrefSeqBytePos )
{
curObj = bytePosToXrefMap.get( bPos );
if ( curObj.trailer != null )
{
resolvedXrefTrailer.trailer.addAll( curObj.trailer );
}
resolvedXrefTrailer.xrefTable.putAll( curObj.xrefTable );
}
}
/**
* Gets the resolved trailer. Might return <code>null</code> in case
* {@link #setStartxref(long)} was not called before.
*
* @return the trailer if available
*/
public COSDictionary getTrailer()
{
return ( resolvedXrefTrailer == null ) ? null : resolvedXrefTrailer.trailer;
}
/**
* Gets the resolved xref table. Might return <code>null</code> in case
* {@link #setStartxref(long)} was not called before.
*
* @return the xrefTable if available
*/
public Map<COSObjectKey, Long> getXrefTable()
{
return ( resolvedXrefTrailer == null ) ? null : resolvedXrefTrailer.xrefTable;
}
/** Returns object numbers which are referenced as contained
* in object stream with specified object number.
*
* This will scan resolved xref table for all entries having negated
* stream object number as value.
*
* @param objstmObjNr object number of object stream for which contained object numbers
* should be returned
*
* @return set of object numbers referenced for given object stream
* or <code>null</code> if {@link #setStartxref(long)} was not
* called before so that no resolved xref table exists
*/
public Set<Long> getContainedObjectNumbers( final int objstmObjNr )
{
if ( resolvedXrefTrailer == null )
{
return null;
}
final Set<Long> refObjNrs = new HashSet<Long>();
final long cmpVal = -objstmObjNr;
for ( Entry<COSObjectKey,Long> xrefEntry : resolvedXrefTrailer.xrefTable.entrySet() )
{
if ( xrefEntry.getValue() == cmpVal )
{
refObjNrs.add( xrefEntry.getKey().getNumber() );
}
}
return refObjNrs;
}
}