package com.tom_roush.pdfbox.pdfparser; import android.util.Log; import com.tom_roush.pdfbox.cos.COSBase; import com.tom_roush.pdfbox.cos.COSDocument; import com.tom_roush.pdfbox.cos.COSObject; import com.tom_roush.pdfbox.cos.COSStream; import java.io.IOException; import java.util.ArrayList; import java.util.List; /** * This will parse a PDF 1.5 object stream and extract all of the objects from the stream. * * @author Ben Litchfield */ public class PDFObjectStreamParser extends BaseParser { private List<COSObject> streamObjects = null; private final COSStream stream; /** * Constructor. * * @param stream The stream to parse. * @param document The document for the current parsing. * @throws IOException If there is an error initializing the stream. */ public PDFObjectStreamParser(COSStream stream, COSDocument document) throws IOException { super(new InputStreamSource(stream.getUnfilteredStream())); this.document = document; this.stream = stream; } /** * This will parse the tokens in the stream. This will close the * stream when it is finished parsing. * * @throws IOException If there is an error while parsing the stream. */ public void parse() throws IOException { try { //need to first parse the header. int numberOfObjects = stream.getInt( "N" ); List<Long> objectNumbers = new ArrayList<Long>( numberOfObjects ); streamObjects = new ArrayList<COSObject>( numberOfObjects ); for( int i=0; i<numberOfObjects; i++ ) { long objectNumber = readObjectNumber(); // skip offset readLong(); objectNumbers.add( objectNumber); } COSObject object; COSBase cosObject; int objectCounter = 0; while( (cosObject = parseDirObject()) != null ) { object = new COSObject(cosObject); object.setGenerationNumber(0); if (objectCounter >= objectNumbers.size()) { Log.e("PdfBox-Android", "/ObjStm (object stream) has more objects than /N " + numberOfObjects); break; } object.setObjectNumber( objectNumbers.get( objectCounter) ); streamObjects.add( object ); Log.d("PdfBox-Android", "parsed=" + object ); // According to the spec objects within an object stream shall not be enclosed // by obj/endobj tags, but there are some pdfs in the wild using those tags // skip endobject marker if present if (!seqSource.isEOF() && seqSource.peek() == 'e') { readLine(); } objectCounter++; } } finally { seqSource.close(); } } /** * This will get the objects that were parsed from the stream. * * @return All of the objects in the stream. */ public List<COSObject> getObjects() { return streamObjects; } }