/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pdfbox.pdfparser;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.cos.COSInteger;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.persistence.util.COSObjectKey;
/**
* This will parse a PDF 1.5 (or better) Xref stream and
* extract the xref information from the stream.
*
* @author <a href="mailto:justinl@basistech.com">Justin LeFebvre</a>
* @version $Revision: 1.0 $
*/
public class PDFXrefStreamParser extends BaseParser
{
private COSStream stream;
private XrefTrailerResolver xrefTrailerResolver;
/**
* Constructor.
*
* @since 1.3.0
* @param strm The stream to parse.
* @param doc The document for the current parsing.
* @param forceParsing flag to skip malformed or otherwise unparseable
* input where possible
* @param xrefTrailerResolver resolver to read the xref/trailer information
*
* @throws IOException If there is an error initializing the stream.
*/
public PDFXrefStreamParser(
COSStream strm, COSDocument doc, boolean forceParsing,
XrefTrailerResolver xrefTrailerResolver )
throws IOException
{
super(strm.getUnfilteredStream(), forceParsing);
setDocument(doc);
stream = strm;
this.xrefTrailerResolver = xrefTrailerResolver;
}
/**
* Parses through the unfiltered stream and populates the xrefTable HashMap.
* @throws IOException If there is an error while parsing the stream.
*/
public void parse() throws IOException
{
try
{
COSArray xrefFormat = (COSArray)stream.getDictionaryObject(COSName.W);
COSArray indexArray = (COSArray)stream.getDictionaryObject(COSName.INDEX);
/*
* If Index doesn't exist, we will use the default values.
*/
if(indexArray == null)
{
indexArray = new COSArray();
indexArray.add(COSInteger.ZERO);
indexArray.add(stream.getDictionaryObject(COSName.SIZE));
}
ArrayList<Integer> objNums = new ArrayList<Integer>();
/*
* Populates objNums with all object numbers available
*/
Iterator<COSBase> indexIter = indexArray.iterator();
while(indexIter.hasNext())
{
int objID = ((COSInteger)indexIter.next()).intValue();
int size = ((COSInteger)indexIter.next()).intValue();
for(int i = 0; i < size; i++)
{
objNums.add(new Integer(objID + i));
}
}
Iterator<Integer> objIter = objNums.iterator();
/*
* Calculating the size of the line in bytes
*/
int w0 = xrefFormat.getInt(0);
int w1 = xrefFormat.getInt(1);
int w2 = xrefFormat.getInt(2);
int lineSize = w0 + w1 + w2;
while(pdfSource.available() > 0 && objIter.hasNext())
{
byte[] currLine = new byte[lineSize];
pdfSource.read(currLine);
int type = 0;
/*
* Grabs the number of bytes specified for the first column in
* the W array and stores it.
*/
for(int i = 0; i < w0; i++)
{
type += (currLine[i] & 0x00ff) << ((w0 - i - 1)* 8);
}
//Need to remember the current objID
Integer objID = objIter.next();
/*
* 3 different types of entries.
*/
switch(type)
{
case 0:
/*
* Skipping free objects
*/
break;
case 1:
int offset = 0;
for(int i = 0; i < w1; i++)
{
offset += (currLine[i + w0] & 0x00ff) << ((w1 - i - 1) * 8);
}
int genNum = 0;
for(int i = 0; i < w2; i++)
{
genNum += (currLine[i + w0 + w1] & 0x00ff) << ((w2 - i - 1) * 8);
}
COSObjectKey objKey = new COSObjectKey(objID.intValue(), genNum);
xrefTrailerResolver.setXRef(objKey, offset);
break;
case 2:
/*
* These objects are handled by the dereferenceObjects() method
* since they're only pointing to object numbers
*/
break;
default:
break;
}
}
}
finally
{
pdfSource.close();
}
}
}