/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pdfbox.pdfparser;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.cos.COSInteger;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.cos.COSStream;
/**
* This will parse a PDF 1.5 object stream and extract all of the objects from the stream.
*
* @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
* @version $Revision: 1.6 $
*/
public class PDFObjectStreamParser extends BaseParser
{
/**
* Log instance.
*/
private static final Log log =
LogFactory.getLog(PDFObjectStreamParser.class);
private List<COSObject> streamObjects = null;
private List<Integer> objectNumbers = null;
private COSStream stream;
/**
* Constructor.
*
* @since Apache PDFBox 1.3.0
* @param strm The stream to parse.
* @param doc The document for the current parsing.
* @param forceParcing flag to skip malformed or otherwise unparseable
* input where possible
* @throws IOException If there is an error initializing the stream.
*/
public PDFObjectStreamParser(
COSStream strm, COSDocument doc, boolean forceParsing)
throws IOException {
super(strm.getUnfilteredStream(), forceParsing);
setDocument( doc );
stream = strm;
}
/**
* Constructor.
*
* @param strm The stream to parse.
* @param doc The document for the current parsing.
*
* @throws IOException If there is an error initializing the stream.
*/
public PDFObjectStreamParser(COSStream strm, COSDocument doc)
throws IOException {
this(strm, doc, FORCE_PARSING);
}
/**
* This will parse the tokens in the stream. This will close the
* stream when it is finished parsing.
*
* @throws IOException If there is an error while parsing the stream.
*/
public void parse() throws IOException
{
try
{
//need to first parse the header.
int numberOfObjects = stream.getInt( "N" );
objectNumbers = new ArrayList<Integer>( numberOfObjects );
streamObjects = new ArrayList<COSObject>( numberOfObjects );
for( int i=0; i<numberOfObjects; i++ )
{
int objectNumber = readInt();
int offset = readInt();
objectNumbers.add( new Integer( objectNumber ) );
}
COSObject object = null;
COSBase cosObject = null;
int objectCounter = 0;
while( (cosObject = parseDirObject()) != null )
{
object = new COSObject(cosObject);
object.setGenerationNumber( COSInteger.ZERO );
COSInteger objNum =
COSInteger.get( objectNumbers.get( objectCounter).intValue() );
object.setObjectNumber( objNum );
streamObjects.add( object );
if(log.isDebugEnabled())
{
log.debug( "parsed=" + object );
}
objectCounter++;
}
}
finally
{
pdfSource.close();
}
}
/**
* This will get the objects that were parsed from the stream.
*
* @return All of the objects in the stream.
*/
public List<COSObject> getObjects()
{
return streamObjects;
}
}