package com.tom_roush.pdfbox.text;
import android.util.Log;
import com.tom_roush.pdfbox.contentstream.PDFStreamEngine;
import com.tom_roush.pdfbox.contentstream.operator.DrawObject;
import com.tom_roush.pdfbox.contentstream.operator.state.Concatenate;
import com.tom_roush.pdfbox.contentstream.operator.state.Restore;
import com.tom_roush.pdfbox.contentstream.operator.state.Save;
import com.tom_roush.pdfbox.contentstream.operator.state.SetGraphicsStateParameters;
import com.tom_roush.pdfbox.contentstream.operator.state.SetMatrix;
import com.tom_roush.pdfbox.contentstream.operator.text.BeginText;
import com.tom_roush.pdfbox.contentstream.operator.text.EndText;
import com.tom_roush.pdfbox.contentstream.operator.text.MoveText;
import com.tom_roush.pdfbox.contentstream.operator.text.MoveTextSetLeading;
import com.tom_roush.pdfbox.contentstream.operator.text.NextLine;
import com.tom_roush.pdfbox.contentstream.operator.text.SetCharSpacing;
import com.tom_roush.pdfbox.contentstream.operator.text.SetFontAndSize;
import com.tom_roush.pdfbox.contentstream.operator.text.SetTextHorizontalScaling;
import com.tom_roush.pdfbox.contentstream.operator.text.SetTextLeading;
import com.tom_roush.pdfbox.contentstream.operator.text.SetTextRenderingMode;
import com.tom_roush.pdfbox.contentstream.operator.text.SetTextRise;
import com.tom_roush.pdfbox.contentstream.operator.text.SetWordSpacing;
import com.tom_roush.pdfbox.contentstream.operator.text.ShowText;
import com.tom_roush.pdfbox.contentstream.operator.text.ShowTextAdjusted;
import com.tom_roush.pdfbox.contentstream.operator.text.ShowTextLine;
import com.tom_roush.pdfbox.contentstream.operator.text.ShowTextLineAndSpace;
import com.tom_roush.pdfbox.pdmodel.PDPage;
import com.tom_roush.pdfbox.pdmodel.common.PDRectangle;
import com.tom_roush.pdfbox.pdmodel.font.PDFont;
import com.tom_roush.pdfbox.pdmodel.font.PDSimpleFont;
import com.tom_roush.pdfbox.pdmodel.font.PDType3Font;
import com.tom_roush.pdfbox.pdmodel.font.encoding.GlyphList;
import com.tom_roush.pdfbox.pdmodel.graphics.state.PDGraphicsState;
import com.tom_roush.pdfbox.util.Matrix;
import com.tom_roush.pdfbox.util.PDFBoxResourceLoader;
import com.tom_roush.pdfbox.util.Vector;
import java.io.IOException;
import java.io.InputStream;
/**
* PDFStreamEngine subclass for advanced processing of text via TextPosition.
*
* @see com.tom_roush.pdfbox.text.TextPosition
* @author Ben Litchfield
* @author John Hewson
*/
class PDFTextStreamEngine extends PDFStreamEngine
{
private int pageRotation;
private PDRectangle pageSize;
private final GlyphList glyphList;
private Matrix legacyCTM;
/**
* Constructor.
*/
PDFTextStreamEngine() throws IOException
{
addOperator(new BeginText());
addOperator(new Concatenate());
addOperator(new DrawObject()); // special text version
addOperator(new EndText());
addOperator(new SetGraphicsStateParameters());
addOperator(new Save());
addOperator(new Restore());
addOperator(new NextLine());
addOperator(new SetCharSpacing());
addOperator(new MoveText());
addOperator(new MoveTextSetLeading());
addOperator(new SetFontAndSize());
addOperator(new ShowText());
addOperator(new ShowTextAdjusted());
addOperator(new SetTextLeading());
addOperator(new SetMatrix());
addOperator(new SetTextRenderingMode());
addOperator(new SetTextRise());
addOperator(new SetWordSpacing());
addOperator(new SetTextHorizontalScaling());
addOperator(new ShowTextLine());
addOperator(new ShowTextLineAndSpace());
// load additional glyph list for Unicode mapping
String path = "com/tom_roush/pdfbox/resources/glyphlist/additional.txt";
InputStream input;
if(PDFBoxResourceLoader.isReady()) {
input = PDFBoxResourceLoader.getStream(path);
} else {
// Fallback
input = GlyphList.class.getClassLoader().getResourceAsStream(path);
}
glyphList = new GlyphList(GlyphList.getAdobeGlyphList(), input);
}
/**
* This will initialise and process the contents of the stream.
*
* @param page the page to process
* @throws java.io.IOException if there is an error accessing the stream.
*/
@Override
public void processPage(PDPage page) throws IOException
{
this.pageRotation = page.getRotation();
this.pageSize = page.getCropBox();
super.processPage(page);
}
@Override
protected void showText(byte[] string) throws IOException
{
legacyCTM = getGraphicsState().getCurrentTransformationMatrix().clone();
super.showText(string);
}
/**
* This method was originally written by Ben Litchfield for PDFStreamEngine.
*/
@Override
protected void showGlyph(Matrix textRenderingMatrix, PDFont font, int code, String unicode,
Vector displacement) throws IOException
{
//
// legacy calculations which were previously in PDFStreamEngine
//
PDGraphicsState state = getGraphicsState();
Matrix ctm = legacyCTM;
float fontSize = state.getTextState().getFontSize();
float horizontalScaling = state.getTextState().getHorizontalScaling() / 100f;
Matrix textMatrix = getTextMatrix();
// 1/2 the bbox is used as the height todo: why?
float glyphHeight = font.getBoundingBox().getHeight() / 2;
// transformPoint from glyph space -> text space
float height = font.getFontMatrix().transformPoint(0, glyphHeight).y;
// (modified) combined displacement, this is calculated *without* taking the character
// spacing and word spacing into account, due to legacy code in TextStripper
float tx = displacement.getX() * fontSize * horizontalScaling;
float ty = 0; // todo: support vertical writing mode
// (modified) combined displacement matrix
Matrix td = Matrix.getTranslateInstance(tx, ty);
// (modified) text rendering matrix
Matrix nextTextRenderingMatrix = td.multiply(textMatrix).multiply(ctm); // text space -> device space
float nextX = nextTextRenderingMatrix.getTranslateX();
float nextY = nextTextRenderingMatrix.getTranslateY();
// (modified) width and height calculations
float dxDisplay = nextX - textRenderingMatrix.getTranslateX();
float dyDisplay = height * textRenderingMatrix.getScalingFactorY();
//
// start of the original method
//
// Note on variable names. There are three different units being used in this code.
// Character sizes are given in glyph units, text locations are initially given in text
// units, and we want to save the data in display units. The variable names should end with
// Text or Disp to represent if the values are in text or disp units (no glyph units are
// saved).
float fontSizeText = getGraphicsState().getTextState().getFontSize();
float horizontalScalingText = getGraphicsState().getTextState().getHorizontalScaling()/100f;
//Matrix ctm = getGraphicsState().getCurrentTransformationMatrix();
float glyphSpaceToTextSpaceFactor = 1 / 1000f;
if (font instanceof PDType3Font)
{
// This will typically be 1000 but in the case of a type3 font
// this might be a different number
glyphSpaceToTextSpaceFactor = 1f / font.getFontMatrix().getScaleX();
}
float spaceWidthText = 0;
try
{
// to avoid crash as described in PDFBOX-614, see what the space displacement should be
spaceWidthText = font.getSpaceWidth() * glyphSpaceToTextSpaceFactor;
}
catch (Throwable exception)
{
Log.w("PdfBox-Android", exception.getMessage(), exception);
}
if (spaceWidthText == 0)
{
spaceWidthText = font.getAverageFontWidth() * glyphSpaceToTextSpaceFactor;
// the average space width appears to be higher than necessary so make it smaller
spaceWidthText *= .80f;
}
if (spaceWidthText == 0)
{
spaceWidthText = 1.0f; // if could not find font, use a generic value
}
// the space width has to be transformed into display units
float spaceWidthDisplay = spaceWidthText * fontSizeText * horizontalScalingText *
textRenderingMatrix.getScalingFactorX() * ctm.getScalingFactorX();
// use our additional glyph list for Unicode mapping
unicode = font.toUnicode(code, glyphList);
// when there is no Unicode mapping available, Acrobat simply coerces the character code
// into Unicode, so we do the same. Subclasses of PDFStreamEngine don't necessarily want
// this, which is why we leave it until this point in PDFTextStreamEngine.
if (unicode == null)
{
if (font instanceof PDSimpleFont)
{
char c = (char) code;
unicode = new String(new char[] { c });
}
else
{
// Acrobat doesn't seem to coerce composite font's character codes, instead it
// skips them. See the "allah2.pdf" TestTextStripper file.
return;
}
}
processTextPosition(new TextPosition(pageRotation, pageSize.getWidth(),
pageSize.getHeight(), textRenderingMatrix, nextX, nextY,
dyDisplay, dxDisplay,
spaceWidthDisplay, unicode, new int[] { code } , font, fontSize,
(int)(fontSize * textRenderingMatrix.getScalingFactorX())));
}
/**
* A method provided as an event interface to allow a subclass to perform some specific
* functionality when text needs to be processed.
*
* @param text The text to be processed.
*/
protected void processTextPosition(TextPosition text)
{
// subclasses can override to provide specific functionality
}
}