/*
* Copyright 2006-2012 ICEsoft Technologies Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the
* License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an "AS
* IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
package org.icepdf.core.util;
import org.icepdf.core.io.SeekableByteArrayInputStream;
import org.icepdf.core.io.SeekableInput;
import org.icepdf.core.io.SeekableInputConstrainedWrapper;
import org.icepdf.core.pobjects.*;
import org.icepdf.core.pobjects.fonts.*;
import org.icepdf.core.pobjects.graphics.*;
import org.icepdf.core.pobjects.graphics.text.GlyphText;
import org.icepdf.core.pobjects.graphics.text.PageText;
import java.awt.*;
import java.awt.geom.*;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Hashtable;
import java.util.Stack;
import java.util.Vector;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* The ContentParser is responsible for parsing a page's content streams. The
* parsed text, image and other PDF object types are added the pages Shapes
* object for later drawing and display.
*/
public class ContentParser {
private static final Logger logger =
Logger.getLogger(ContentParser.class.toString());
public static final float OVERPAINT_ALPHA = 0.4f;
private GraphicsState graphicState;
private Library library;
private Resources resources;
// private static HashTable tokenFrequency = new Hashtable(90);
// flag to handle none text based coordinate operand "cm" inside of a text block
private boolean inTextBlock;
// TextBlock affine transform can be altered by the "cm" operand an thus
// the text base affine transform must be accessible outside the parsTtext method
private AffineTransform textBlockBase;
/**
* @param l PDF library master object.
* @param r resources
*/
public ContentParser(Library l, Resources r) {
library = l;
resources = r;
}
/* private static void collectTokenFrequency(String token){
Float count = (Float)tokenFrequency.get(token);
float value;
if (count != null){
value = count.floatValue();
value ++;
tokenFrequency.remove(token);
tokenFrequency.put(token, new Float(value));
}
else{
tokenFrequency.put(token, new Float(1));
}
}
private void printTokenFrequency(){
Enumeration enum = tokenFrequency.keys();
while (enum.hasMoreElements()){
String key = (String)enum.nextElement();
Float tmp = (Float)tokenFrequency.get(key);
System.out.print(key + ", ");
if (tmp != null){
System.out.println(tmp.toString());
}
else{
System.out.println("");
}
}
}*/
/**
* Returns the current graphics state object being used by this content
* stream.
*
* @return current graphics context of content stream. May be null if
* parse method has not been previously called.
*/
public GraphicsState getGraphicsState() {
return graphicState;
}
/**
* Sets the graphics state object which will be used for the current content
* parsing. This method must be called before the parse method is called
* otherwise it will not have an effect on the state of the draw operands.
*
* @param graphicState graphics state of this content stream
*/
public void setGraphicsState(GraphicsState graphicState) {
this.graphicState = graphicState;
}
/**
* Parse a pages content stream.
*
* @param source byte stream containing page content
* @return a Shapes Ojbect containing all the pages text and images shapes.
* @throws InterruptedException if current parse thread is interruped.
*/
public Shapes parse(InputStream source) throws InterruptedException {
Shapes shapes = new Shapes();
// Normal, clean content parse where graphics state is null
if (graphicState == null) {
graphicState = new GraphicsState(shapes);
}
// If not null we have an Form XObject that contains a content stream
// and we must copy the previous graphics states draw settings in order
// preserve colour and fill data for the XOjbects content stream.
else {
// the graphics state gets a new coordinate system.
graphicState.setCTM(new AffineTransform());
// reset the clipping area.
graphicState.setClip(null);
// copy previous stroke info
setStroke(shapes, graphicState);
// assign new shapes to the new graphics state
graphicState.setShapes(shapes);
}
if (logger.isLoggable(Level.FINER)) {
String content;
if (source instanceof SeekableInput) {
content = Utils.getContentFromSeekableInput((SeekableInput) source, false);
} else {
InputStream[] inArray = new InputStream[]{source};
content = Utils.getContentAndReplaceInputStream(inArray, false);
source = inArray[0];
}
logger.finer("Content = " + content);
}
// great a parser to get tokens for stream
Parser parser;
// test case for progress bar
parser = new Parser(source);
// stack to help with the parse
Stack<Object> stack = new Stack<Object>();
// text block y offset.
float yBTstart = 0;
// long startTime = System.currentTimeMillis();
try {
// represents a geometric path constructed from straight lines, and
// quadratic and cubic (Beauctezier) curves. It can contain
// multiple sub paths.
GeneralPath geometricPath = null;
// loop through each token returned form the parser
Object tok;
while (true) {
if (Thread.interrupted()) {
throw new InterruptedException("ContentParser thread interrupted");
}
tok = parser.getStreamObject();
// add any names and numbers and every thing else on the
// stack for future reference
if (!(tok instanceof String)) {
stack.push(tok);
} else {
// Append a straight line segment from the current point to the
// point (x, y). The new current point is (x, y).
if (tok.equals(PdfOps.l_TOKEN)) {
// collectTokenFrequency(PdfOps.l_TOKEN);
float y = ((Number) stack.pop()).floatValue();
float x = ((Number) stack.pop()).floatValue();
geometricPath.lineTo(x, y);
}
// Begin a new subpath by moving the current point to
// coordinates (x, y), omitting any connecting line segment. If
// the previous path construction operator in the current path
// was also m, the new m overrides it; no vestige of the
// previous m operation remains in the path.
else if (tok.equals(PdfOps.m_TOKEN)) {
// collectTokenFrequency(PdfOps.m_TOKEN);
if (geometricPath == null) {
geometricPath = new GeneralPath();
}
float y = ((Number) stack.pop()).floatValue();
float x = ((Number) stack.pop()).floatValue();
geometricPath.moveTo(x, y);
}
// Append a cubic Bezier curve to the current path. The curve
// extends from the current point to the point (x3, y3), using
// (x1, y1) and (x2, y2) as the Bezier control points.
// The new current point is (x3, y3).
else if (tok.equals(PdfOps.c_TOKEN)) {
// collectTokenFrequency(PdfOps.c_TOKEN);
float y3 = ((Number) stack.pop()).floatValue();
float x3 = ((Number) stack.pop()).floatValue();
float y2 = ((Number) stack.pop()).floatValue();
float x2 = ((Number) stack.pop()).floatValue();
float y1 = ((Number) stack.pop()).floatValue();
float x1 = ((Number) stack.pop()).floatValue();
geometricPath.curveTo(x1, y1, x2, y2, x3, y3);
}
// Stroke the path
else if (tok.equals(PdfOps.S_TOKEN)) {
// collectTokenFrequency(PdfOps.S_TOKEN);
if (geometricPath != null) {
commonStroke(graphicState, shapes, geometricPath);
geometricPath = null;
}
}
// Font selection
else if (tok.equals(PdfOps.Tf_TOKEN)) {
consume_Tf(graphicState, stack, resources);
}
// Begin a text object, initializing the text matrix, Tm, and
// the text line matrix, Tlm, to the identity matrix. Text
// objects cannot be nested; a second BT cannot appear before
// an ET.
else if (tok.equals(PdfOps.BT_TOKEN)) {
// collectTokenFrequency(PdfOps.BT_TOKEN);
// start parseText, which parses until ET is reached
yBTstart = parseText(parser, shapes, yBTstart);
}
// Fill the path, using the nonzero winding number rule to
// determine the region to fill (see "Nonzero Winding
// Number Rule" ). Any subpaths that are open are implicitly
// closed before being filled. f or F
else if (tok.equals(PdfOps.F_TOKEN) ||
tok.equals(PdfOps.f_TOKEN)) {
// collectTokenFrequency(PdfOps.F_TOKEN);
// collectTokenFrequency(PdfOps.f_TOKEN);
if (geometricPath != null) {
geometricPath.setWindingRule(GeneralPath.WIND_NON_ZERO);
commonFill(shapes, geometricPath);
}
geometricPath = null;
}
// Saves Graphics State, should copy the entire graphics state onto
// the graphicsState object's stack
else if (tok.equals(PdfOps.q_TOKEN)) {
graphicState = consume_q(graphicState);
}
// Restore Graphics State, should restore the entire graphics state
// to its former value by popping it from the stack
else if (tok.equals(PdfOps.Q_TOKEN)) {
graphicState = consume_Q(graphicState, shapes);
}
// Append a rectangle to the current path as a complete subpath,
// with lower-left corner (x, y) and dimensions width and height
// in user space. The operation x y width height re is equivalent to
// x y m
// (x + width) y l
// (x + width) (y + height) l
// x (y + height) l
// h
else if (tok.equals(PdfOps.re_TOKEN)) {
// collectTokenFrequency(PdfOps.re_TOKEN);
if (geometricPath == null) {
geometricPath = new GeneralPath();
}
float h = ((Number) stack.pop()).floatValue();
float w = ((Number) stack.pop()).floatValue();
float y = ((Number) stack.pop()).floatValue();
float x = ((Number) stack.pop()).floatValue();
geometricPath.moveTo(x, y);
geometricPath.lineTo(x + w, y);
geometricPath.lineTo(x + w, y + h);
geometricPath.lineTo(x, y + h);
geometricPath.lineTo(x, y);
}
// Modify the current transformation matrix (CTM) by concatenating the
// specified matrix
else if (tok.equals(PdfOps.cm_TOKEN)) {
consume_cm(graphicState, stack, inTextBlock, textBlockBase);
}
// Close the current sub path by appending a straight line segment
// from the current point to the starting point of the sub path.
// This operator terminates the current sub path; appending
// another segment to the current path will begin a new subpath,
// even if the new segment begins at the endpoint reached by the
// h operation. If the current subpath is already closed,
// h does nothing.
else if (tok.equals(PdfOps.h_TOKEN)) {
// collectTokenFrequency(PdfOps.h_TOKEN);
if (geometricPath != null) {
geometricPath.closePath();
}
}
// Begin a marked-content sequence with an associated property
// list, terminated by a balancing EMC operator. tag is a name
// object indicating the role or significance of the sequence;
// properties is either an inline dictionary containing the
// property list or a name object associated with it in the
// Properties sub dictionary of the current resource dictionary
else if (tok.equals(PdfOps.BDC_TOKEN)) {
// collectTokenFrequency(PdfOps.BDC_TOKEN);
stack.pop(); // properties
stack.pop(); // name
}
// End a marked-content sequence begun by a BMC or BDC operator.
else if (tok.equals(PdfOps.EMC_TOKEN)) {
// collectTokenFrequency(PdfOps.EMC_TOKEN);
}
/**
* External Object (XObject) a graphics object whose contents
* are defined by a self-contained content stream, separate
* from the content stream in which it is used. There are three
* types of external object:
*
* - An image XObject (Section 4.8.4, "Image Dictionaries")
* represents a sampled visual image such as a photograph.
* - A form XObject (Section 4.9, "Form XObjects") is a
* self-contained description of an arbitrary sequence of
* graphics objects.
* - A PostScript XObject (Section 4.7.1, "PostScript XObjects")
* contains a fragment of code expressed in the PostScript
* page description language. PostScript XObjects are no
* longer recommended to be used. (NOT SUPPORTED)
*/
// Paint the specified XObject. The operand name must appear as
// a key in the XObject subdictionary of the current resource
// dictionary (see Section 3.7.2, "Resource Dictionaries"); the
// associated value must be a stream whose Type entry, if
// present, is XObject. The effect of Do depends on the value of
// the XObject's Subtype entry, which may be Image , Form, or PS
else if (tok.equals(PdfOps.Do_TOKEN)) {
// collectTokenFrequency(PdfOps.Do_TOKEN);
graphicState = consume_Do(graphicState, stack, shapes, resources, true);
}
// Fill the path, using the even-odd rule to determine the
// region to fill
else if (tok.equals(PdfOps.f_STAR_TOKEN)) {
// collectTokenFrequency(PdfOps.f_STAR_TOKEN);
if (geometricPath != null) {
// need to apply pattern..
geometricPath.setWindingRule(GeneralPath.WIND_EVEN_ODD);
commonFill(shapes, geometricPath);
}
geometricPath = null;
}
// Sets the specified parameters in the graphics state. The gs operand
// points to a name resource which should be a an ExtGState object.
// The graphics state parameters in the ExtGState must be concatenated
// with the the current graphics state.
else if (tok.equals(PdfOps.gs_TOKEN)) {
consume_gs(graphicState, stack, resources);
}
// End the path object without filling or stroking it. This
// operator is a "path-painting no-op," used primarily for the
// side effect of changing the current clipping path
else if (tok.equals(PdfOps.n_TOKEN)) {
// collectTokenFrequency(PdfOps.n_TOKEN);
//graphicState.setClip(geometricPath);
// clipping path outlines are visible when this is set to null;
geometricPath = null;
}
// Set the line width in the graphics state
else if (tok.equals(PdfOps.w_TOKEN) ||
tok.equals(PdfOps.LW_TOKEN)) {
consume_w(graphicState, stack, shapes);
}
// Modify the current clipping path by intersecting it with the
// current path, using the nonzero winding number rule to
// determine which regions lie inside the clipping path.
else if (tok.equals(PdfOps.W_TOKEN)) {
// collectTokenFrequency(PdfOps.W_TOKEN);
if (geometricPath != null) {
geometricPath.setWindingRule(GeneralPath.WIND_NON_ZERO);
geometricPath.closePath();
graphicState.setClip(geometricPath);
}
}
// Fill Color with ColorSpace
else if (tok.equals(PdfOps.sc_TOKEN) ||
tok.equals(PdfOps.scn_TOKEN)) {
consume_sc(graphicState, stack, library, resources);
}
// Close, fill, and then stroke the path, using the nonzero
// winding number rule to determine the region to fill. This
// operator has the same effect as the sequence h B. See also
// "Special Path-Painting Considerations"
else if (tok.equals(PdfOps.b_TOKEN)) {
// collectTokenFrequency(PdfOps.b_TOKEN);
if (geometricPath != null) {
geometricPath.setWindingRule(GeneralPath.WIND_NON_ZERO);
geometricPath.closePath();
commonFill(shapes, geometricPath);
commonStroke(graphicState, shapes, geometricPath);
}
geometricPath = null;
}
// Same as K, but for non-stroking operations.
else if (tok.equals(PdfOps.k_TOKEN)) { // Fill Color CMYK
consume_k(graphicState, stack, library);
}
// Same as g but for none stroking operations
else if (tok.equals(PdfOps.g_TOKEN)) {
consume_g(graphicState, stack, library);
}
// Sets the flatness tolerance in the graphics state, NOT SUPPORTED
// flatness is a number in the range 0 to 100, a value of 0 specifies
// the default tolerance
else if (tok.equals(PdfOps.i_TOKEN)) {
consume_i(stack);
}
// Miter Limit
else if (tok.equals(PdfOps.M_TOKEN)) {
consume_M(graphicState, stack, shapes);
}
// Set the line cap style of the graphic state, related to Line Join
// style
else if (tok.equals(PdfOps.J_TOKEN)) {
consume_J(graphicState, stack, shapes);
}
// Same as RG, but for non-stroking operations.
else if (tok.equals(PdfOps.rg_TOKEN)) { // Fill Color RGB
consume_rg(graphicState, stack, library);
}
// Sets the line dash pattern in the graphics state. A normal line
// is [] 0. See Graphics State -> Line dash patter for more information
// in the PDF Reference. Java 2d uses the same notation so there
// is not much work to be done other then parsing the data.
else if (tok.equals(PdfOps.d_TOKEN)) {
consume_d(graphicState, stack, shapes);
}
// Append a cubic Bezier curve to the current path. The curve
// extends from the current point to the point (x3, y3), using
// the current point and (x2, y2) as the Bezier control points.
// The new current point is (x3, y3).
else if (tok.equals(PdfOps.v_TOKEN)) {
// collectTokenFrequency(PdfOps.v_TOKEN);
float y3 = ((Number) stack.pop()).floatValue();
float x3 = ((Number) stack.pop()).floatValue();
float y2 = ((Number) stack.pop()).floatValue();
float x2 = ((Number) stack.pop()).floatValue();
geometricPath.curveTo(
(float) geometricPath.getCurrentPoint().getX(),
(float) geometricPath.getCurrentPoint().getY(),
x2,
y2,
x3,
y3);
}
// Set the line join style in the graphics state
else if (tok.equals(PdfOps.j_TOKEN)) {
consume_j(graphicState, stack, shapes);
}
// Append a cubic Bezier curve to the current path. The curve
// extends from the current point to the point (x3, y3), using
// (x1, y1) and (x3, y3) as the Bezier control points.
// The new current point is (x3, y3).
else if (tok.equals(PdfOps.y_TOKEN)) {
// collectTokenFrequency(PdfOps.y_TOKEN);
float y3 = ((Number) stack.pop()).floatValue();
float x3 = ((Number) stack.pop()).floatValue();
float y1 = ((Number) stack.pop()).floatValue();
float x1 = ((Number) stack.pop()).floatValue();
geometricPath.curveTo(x1, y1, x3, y3, x3, y3);
}
// Same as CS, but for nonstroking operations.
else if (tok.equals(PdfOps.cs_TOKEN)) {
consume_cs(graphicState, stack, resources);
}
// Color rendering intent in the graphics state
else if (tok.equals(PdfOps.ri_TOKEN)) {
// collectTokenFrequency(PdfOps.ri_TOKEN);
stack.pop();
}
// Set the color to use for stroking operations in a device, CIE-based
// (other than ICCBased), or Indexed color space. The number of operands
// required and their interpretation depends on the current stroking color space:
// - For DeviceGray, CalGray, and Indexed color spaces, one operand
// is required (n = 1).
// - For DeviceRGB, CalRGB, and Lab color spaces, three operands are
// required (n = 3).
// - For DeviceCMYK, four operands are required (n = 4).
else if (tok.equals(PdfOps.SC_TOKEN) ||
tok.equals(PdfOps.SCN_TOKEN)) { // Stroke Color with ColorSpace
consume_SC(graphicState, stack, library, resources);
}
// Fill and then stroke the path, using the nonzero winding
// number rule to determine the region to fill. This produces
// the same result as constructing two identical path objects,
// painting the first with f and the second with S. Note,
// however, that the filling and stroking portions of the
// operation consult different values of several graphics state
// parameters, such as the current color.
else if (tok.equals(PdfOps.B_TOKEN)) {
// collectTokenFrequency(PdfOps.B_TOKEN);
if (geometricPath != null) {
geometricPath.setWindingRule(GeneralPath.WIND_NON_ZERO);
commonFill(shapes, geometricPath);
commonStroke(graphicState, shapes, geometricPath);
}
geometricPath = null;
}
// Set the stroking color space to DeviceCMYK (or the DefaultCMYK color
// space; see "Default Color Spaces" on page 227) and set the color to
// use for stroking operations. Each operand must be a number between
// 0.0 (zero concentration) and 1.0 (maximum concentration). The
// behavior of this operator is affected by the overprint mode
// (see Section 4.5.6, "Overprint Control").
else if (tok.equals(PdfOps.K_TOKEN)) { // Stroke Color CMYK
consume_K(graphicState, stack, library);
}
/**
* Type3 operators, update the text state with data from these operands
*/
else if (tok.equals(PdfOps.d0_TOKEN)) {
// collectTokenFrequency(PdfOps.d0_TOKEN);
// save the stack
graphicState = graphicState.save();
// need two pops to get Wx and Wy data
float y = ((Number) stack.pop()).floatValue();
float x = ((Number) stack.pop()).floatValue();
TextState textState = graphicState.getTextState();
textState.setType3HorizontalDisplacement(new Point.Float(x, y));
}
// Close and stroke the path. This operator has the same effect
// as the sequence h S.
else if (tok.equals(PdfOps.s_TOKEN)) {
// collectTokenFrequency(PdfOps.s_TOKEN);
if (geometricPath != null) {
geometricPath.closePath();
commonStroke(graphicState, shapes, geometricPath);
geometricPath = null;
}
}
// Set the stroking color space to DeviceGray (or the DefaultGray color
// space; see "Default Color Spaces" ) and set the gray level to use for
// stroking operations. gray is a number between 0.0 (black)
// and 1.0 (white).
else if (tok.equals(PdfOps.G_TOKEN)) {
consume_G(graphicState, stack, library);
}
// Close, fill, and then stroke the path, using the even-odd
// rule to determine the region to fill. This operator has the
// same effect as the sequence h B*. See also "Special
// Path-Painting Considerations"
else if (tok.equals(PdfOps.b_STAR_TOKEN)) {
// collectTokenFrequency(PdfOps.b_STAR_TOKEN);
if (geometricPath != null) {
geometricPath.setWindingRule(GeneralPath.WIND_EVEN_ODD);
geometricPath.closePath();
commonStroke(graphicState, shapes, geometricPath);
commonFill(shapes, geometricPath);
}
geometricPath = null;
}
// Set the stroking color space to DeviceRGB (or the DefaultRGB color
// space; see "Default Color Spaces" on page 227) and set the color to
// use for stroking operations. Each operand must be a number between
// 0.0 (minimum intensity) and 1.0 (maximum intensity).
else if (tok.equals(PdfOps.RG_TOKEN)) { // Stroke Color RGB
consume_RG(graphicState, stack, library);
}
// Set the current color space to use for stroking operations. The
// operand name must be a name object. If the color space is one that
// can be specified by a name and no additional parameters (DeviceGray,
// DeviceRGB, DeviceCMYK, and certain cases of Pattern), the name may be
// specified directly. Otherwise, it must be a name defined in the
// ColorSpace sub dictionary of the current resource dictionary; the
// associated value is an array describing the color space.
// <b>Note:</b>
// The names DeviceGray, DeviceRGB, DeviceCMYK, and Pattern always
// identify the corresponding color spaces directly; they never refer to
// resources in the ColorSpace sub dictionary. The CS operator also sets
// the current stroking color to its initial value, which depends on the
// color space:
// <li>In a DeviceGray, DeviceRGB, CalGray, or CalRGB color space, the
// initial color has all components equal to 0.0.</li>
// <li>In a DeviceCMYK color space, the initial color is
// [0.0 0.0 0.0 1.0]. </li>
// <li>In a Lab or ICCBased color space, the initial color has all
// components equal to 0.0 unless that falls outside the intervals
// specified by the space's Range entry, in which case the nearest
// valid value is substituted.</li>
// <li>In an Indexed color space, the initial color value is 0. </li>
// <li>In a Separation or DeviceN color space, the initial tint value is
// 1.0 for all colorants. </li>
// <li>In a Pattern color space, the initial color is a pattern object
// that causes nothing to be painted. </li>
else if (tok.equals(PdfOps.CS_TOKEN)) {
consume_CS(graphicState, stack, resources);
} else if (tok.equals(PdfOps.d1_TOKEN)) {
// collectTokenFrequency(PdfOps.d1_TOKEN);
// save the stack
graphicState = graphicState.save();
// need two pops to get Wx and Wy data
float x2 = ((Number) stack.pop()).floatValue();
float y2 = ((Number) stack.pop()).floatValue();
float x1 = ((Number) stack.pop()).floatValue();
float y1 = ((Number) stack.pop()).floatValue();
float y = ((Number) stack.pop()).floatValue();
float x = ((Number) stack.pop()).floatValue();
TextState textState = graphicState.getTextState();
textState.setType3HorizontalDisplacement(
new Point2D.Float(x, y));
textState.setType3BBox(new PRectangle(
new Point2D.Float(x1, y1),
new Point2D.Float(x2, y2)));
}
// Fill and then stroke the path, using the even-odd rule to
// determine the region to fill. This operator produces the same
// result as B, except that the path is filled as if with f*
// instead of f. See also "Special Path-Painting Considerations"
else if (tok.equals(PdfOps.B_STAR_TOKEN)) {
// collectTokenFrequency(PdfOps.B_STAR_TOKEN);
if (geometricPath != null) {
geometricPath.setWindingRule(GeneralPath.WIND_EVEN_ODD);
commonStroke(graphicState, shapes, geometricPath);
commonFill(shapes, geometricPath);
}
geometricPath = null;
}
// Begin a marked-content sequence terminated by a balancing EMC
// operator.tag is a name object indicating the role or
// significance of the sequence.
else if (tok.equals(PdfOps.BMC_TOKEN)) {
// collectTokenFrequency(PdfOps.BMC_TOKEN);
stack.pop();
}
// Begin an inline image object
else if (tok.equals(PdfOps.BI_TOKEN)) {
// collectTokenFrequency(PdfOps.BI_TOKEN);
// start parsing image object, which leads to ID and EI
// tokends.
// ID - Begin in the image data for an inline image object
// EI - End an inline image object
parseInlineImage(parser, shapes);
}
// Begin a compatibility section. Unrecognized operators
// (along with their operands) will be ignored without error
// until the balancing EX operator is encountered.
else if (tok.equals(PdfOps.BX_TOKEN)) {
// collectTokenFrequency(PdfOps.BX_TOKEN);
}
// End a compatibility section begun by a balancing BX operator.
else if (tok.equals(PdfOps.EX_TOKEN)) {
// collectTokenFrequency(PdfOps.EX_TOKEN);
}
// Modify the current clipping path by intersecting it with the
// current path, using the even-odd rule to determine which
// regions lie inside the clipping path.
else if (tok.equals(PdfOps.W_STAR_TOKEN)) {
if (geometricPath != null) {
geometricPath.setWindingRule(GeneralPath.WIND_EVEN_ODD);
geometricPath.closePath();
graphicState.setClip(geometricPath);
}
}
/**
* Single marked-content point
*/
// Designate a marked-content point with an associated property
// list. tag is a name object indicating the role or significance
// of the point; properties is either an in line dictionary
// containing the property list or a name object associated with
// it in the Properties sub dictionary of the current resource
// dictionary.
else if (tok.equals(PdfOps.DP_TOKEN)) {
// collectTokenFrequency(PdfOps.DP_TOKEN);
stack.pop(); // properties
stack.pop(); // name
}
// Designate a marked-content point. tag is a name object
// indicating the role or significance of the point.
else if (tok.equals(PdfOps.MP_TOKEN)) {
// collectTokenFrequency(PdfOps.MP_TOKEN);
stack.pop();
}
// shading operator.
else if (tok.equals(PdfOps.sh_TOKEN)) {
// collectTokenFrequency(PdfOps.sh_TOKEN);
Object o = stack.peek();
// if a name then we are dealing with a pattern.
if (o instanceof Name) {
Name patternName = (Name) stack.pop();
Pattern pattern = resources.getShading(patternName.toString());
if (pattern != null) {
pattern.init();
// we paint the shape and color shading as defined
// by the pattern dictionary and respect the current clip
// apply a rudimentary softmask for an shading .
if (graphicState.getSoftMask() != null){
setAlpha(shapes,
graphicState.getAlphaRule(),
0.50f);
}else{
setAlpha(shapes,
graphicState.getAlphaRule(),
graphicState.getFillAlpha());
}
shapes.add(pattern.getPaint());
shapes.add(graphicState.getClip());
shapes.addFillCommand();
}
}
}
/**
* We've seen a couple cases when the text state parameters are written
* outside of text blocks, this should cover these cases.
*/
// Character Spacing
else if (tok.equals(PdfOps.Tc_TOKEN)) {
consume_Tc(graphicState, stack);
}
// Word spacing
else if (tok.equals(PdfOps.Tw_TOKEN)) {
consume_Tw(graphicState, stack);
}
// Text leading
else if (tok.equals(PdfOps.TL_TOKEN)) {
consume_TL(graphicState, stack);
}
// Rendering mode
else if (tok.equals(PdfOps.Tr_TOKEN)) {
consume_Tr(graphicState, stack);
}
// Horizontal scaling
else if (tok.equals(PdfOps.Tz_TOKEN)) {
consume_Tz(graphicState, stack);
}
// Text rise
else if (tok.equals(PdfOps.Ts_TOKEN)) {
consume_Ts(graphicState, stack);
}
}
}
}
catch (IOException e) {
// eat the result as it a normal occurrence
logger.finer("End of Content Stream");
}
catch (NoninvertibleTransformException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
} finally {
// End of stream set alpha state back to 1.0f, so that other
// streams aren't applied an incorrect alpha value.
setAlpha(shapes, AlphaComposite.SRC_OVER, 1.0f);
}
// long endTime = System.currentTimeMillis();
// System.out.println("Paring Duration " + (endTime - startTime));
// printTokenFrequency();
// Print off anything left on the stack, any "Stack" traces should
// indicate a parsing problem or a not supported operand
while (!stack.isEmpty()) {
String tmp = stack.pop().toString();
if (logger.isLoggable(Level.FINE)) {
logger.fine("STACK=" + tmp);
}
}
shapes.contract();
return shapes;
}
/**
* Specialized method for extracting text from documents.
*
* @param source content stream source.
* @return vector where each entry is the text extracted from a text block.
*/
public Shapes parseTextBlocks(InputStream source) {
// great a parser to get tokens for stream
Parser parser = new Parser(source);
Shapes shapes = new Shapes();
if (graphicState == null) {
graphicState = new GraphicsState(shapes);
}
// long startTime = System.currentTimeMillis();
try {
// loop through each token returned form the parser
Object tok = parser.getStreamObject();
Stack<Object> stack = new Stack<Object>();
double yBTstart = 0;
while (tok != null) {
// add any names and numbers and every thing else on the
// stack for future reference
if (tok instanceof String) {
if (tok.equals(PdfOps.BT_TOKEN)) {
// start parseText, which parses until ET is reached
yBTstart = parseText(parser, shapes, yBTstart);
// free up some memory along the way. we don't need
// a full stack consume Tf tokens.
stack.clear();
}
// for malformed core docs we need to consume any font
// to ensure we can result toUnicode values.
else if (tok.equals(PdfOps.Tf_TOKEN)) {
consume_Tf(graphicState, stack, resources);
stack.clear();
}
// pick up on xObject content streams.
else if (tok.equals(PdfOps.Do_TOKEN)){
consume_Do(graphicState, stack, shapes, resources, false);
stack.clear();
}
} else {
stack.push(tok);
}
tok = parser.getStreamObject();
}
// clear our temporary stack.
stack.clear();
} catch (IOException e) {
// eat the result as it a normal occurrence
logger.finer("End of Content Stream");
}
// long endTime = System.currentTimeMillis();
// System.out.println("Extraction Duration " + (endTime - startTime));
shapes.contract();
return shapes;
}
/**
* Parses Text found with in a BT block.
*
* @param parser parser containging BT tokens
* @param shapes container of all shapes for the page content being parsed
* @param previousBTStart y offset of previous BT definition.
* @return y offset of the this BT definition.
* @throws java.io.IOException end of content stream is found
*/
float parseText(Parser parser, Shapes shapes, double previousBTStart)
throws IOException {
Object nextToken;
Stack<Object> stack = new Stack<Object>();
inTextBlock = true;
float shift = 0;
// keeps track of previous text placement so that Compatibility and
// implementation note 57 is respected. That is text drawn after a TJ
// must not be less then the previous glyphs coords.
float previousAdvance = 0;
Point2D.Float advance = new Point2D.Float(0, 0);
textBlockBase = new AffineTransform(graphicState.getCTM());
// transformation matrix used to cMap core space to drawing space
graphicState.getTextState().tmatrix = new AffineTransform();
graphicState.getTextState().tlmatrix = new AffineTransform();
graphicState.scale(1, -1);
// get reference to PageText.
PageText pageText = shapes.getPageText();
// previous Td, TD or Tm y coordinate value for text extraction
boolean isYstart = true;
float yBTStart = 0;
// start parsing of the BT block
nextToken = parser.getStreamObject();
while (!nextToken.equals("ET")) { // ET - end text object
// add names to the stack, save for later parsing, colour state
// and graphics state (includes font).
if (nextToken instanceof String) {
// Normal text token, string, hex
if (nextToken.equals(PdfOps.Tj_TOKEN)) {
// collectTokenFrequency(PdfOps.Tj_TOKEN);
Object tjValue = stack.pop();
StringObject stringObject;
TextState textState;
if (tjValue instanceof StringObject) {
stringObject = (StringObject) tjValue;
textState = graphicState.getTextState();
// apply transparency
setAlpha(shapes, graphicState.getAlphaRule(), graphicState.getFillAlpha());
// draw string will take care of text pageText construction
Point2D.Float d = (Point2D.Float) drawString(
stringObject.getLiteralStringBuffer(
textState.font.getSubTypeFormat(),
textState.font.getFont()),
advance,
previousAdvance,
graphicState.getTextState(),
shapes);
graphicState.translate(d.x, 0);
shift += d.x;
previousAdvance = 0;
advance.setLocation(0, 0);
}
}
// Character Spacing
else if (nextToken.equals(PdfOps.Tc_TOKEN)) {
// collectTokenFrequency(PdfOps.Tc_TOKEN);
graphicState.getTextState().cspace = ((Number) stack.pop()).floatValue();
}
// Word spacing
else if (nextToken.equals(PdfOps.Tw_TOKEN)) {
// collectTokenFrequency(PdfOps.Tw_TOKEN);
graphicState.getTextState().wspace = ((Number) stack.pop()).floatValue();
}
// move to the start of he next line, offset from the start of the
// current line by (tx,ty)*tx
else if (nextToken.equals(PdfOps.Td_TOKEN)) {
// collectTokenFrequency(PdfOps.Td_TOKEN);
float y = ((Number) stack.pop()).floatValue();
float x = ((Number) stack.pop()).floatValue();
double oldY = graphicState.getCTM().getTranslateY();
graphicState.translate(-shift, 0);
shift = 0;
previousAdvance = 0;
advance.setLocation(0, 0);
// x,y are expressed in unscaled text space so we need to
// apply the transform
graphicState.translate(x * graphicState.getTextState().hScalling, -y );
float newY = (float) graphicState.getCTM().getTranslateY();
// capture x coord of BT y offset, tm, Td, TD.
if (isYstart) {
yBTStart = newY;
isYstart = false;
if (previousBTStart != yBTStart) {
pageText.newLine();
}
}
// ty will dictate the vertical shift, many pdf will use
// ty=0 do just do a horizontal shift for layout.
if (y != 0 && newY != oldY) {
pageText.newLine();
}
}
/**
* Tranformation matrix
* tm = |f1 f2 0|
* |f3 f4 0|
* |f5 f6 0|
*/
else if (nextToken.equals(PdfOps.Tm_TOKEN)) {
// collectTokenFrequency(PdfOps.Tm_TOKEN);
shift = 0;
previousAdvance = 0;
advance.setLocation(0, 0);
float f6 = ((Number) stack.pop()).floatValue();
float f5 = ((Number) stack.pop()).floatValue();
float f4 = ((Number) stack.pop()).floatValue();
float f3 = ((Number) stack.pop()).floatValue();
float f2 = ((Number) stack.pop()).floatValue();
float f1 = ((Number) stack.pop()).floatValue();
AffineTransform af = new AffineTransform(textBlockBase);
// grab old values.
double oldTransY = graphicState.getCTM().getTranslateY();
double oldScaleY = graphicState.getCTM().getScaleY();
// apply the transform
graphicState.getTextState().tmatrix = new AffineTransform(f1, f2, f3, f4, f5, f6);
af.concatenate(graphicState.getTextState().tmatrix);
graphicState.set(af);
graphicState.scale(1, -1);
// apply text size.
applyTextScaling(graphicState);
// text extraction logic
// capture x coord of BT y offset, tm, Td, TD.
if (isYstart) {
yBTStart = f6;
isYstart = false;
if (previousBTStart != yBTStart) {
pageText.newLine();
}
}
double newTransY = graphicState.getCTM().getTranslateY();
double newScaleY = graphicState.getCTM().getScaleY();
// f5 and f6 will dictate a horizontal or vertical shift
// this information could be used to detect new lines
if (oldTransY != newTransY) {
pageText.newLine();
} else if (Math.abs(oldScaleY) != Math.abs(newScaleY)) {
pageText.newLine();
}
}
// Font selection
else if (nextToken.equals(PdfOps.Tf_TOKEN)) {
consume_Tf(graphicState, stack, resources);
}
// TJ marks a vector, where.......
else if (nextToken.equals(PdfOps.TJ_TOKEN)) {
// collectTokenFrequency(PdfOps.TJ_TOKEN);
// apply transparency
setAlpha(shapes, graphicState.getAlphaRule(), graphicState.getFillAlpha());
Vector v = (Vector) stack.pop();
StringObject stringObject;
TextState textState;
Number f;
float lastTextAdvance = previousAdvance;
for (Object currentObject : v) {
if (currentObject instanceof StringObject) {
stringObject = (StringObject) currentObject;
textState = graphicState.getTextState();
// draw string takes care of PageText extraction
advance = (Point2D.Float) drawString(
stringObject.getLiteralStringBuffer(
textState.font.getSubTypeFormat(),
textState.font.getFont()),
advance, previousAdvance,
graphicState.getTextState(), shapes);
// update the text advance
lastTextAdvance = advance.x;
} else if (currentObject instanceof Number) {
f = (Number) currentObject;
advance.x -=
f.floatValue() * graphicState.getTextState().currentfont.getSize()
/ 1000.0;
}
previousAdvance = advance.x;
}
}
// Move to the start of the next line, offset from the start of the
// current line by (tx,ty)
else if (nextToken.equals(PdfOps.TD_TOKEN)) {
// collectTokenFrequency(PdfOps.TD_TOKEN);
float y = ((Number) stack.pop()).floatValue();
float x = ((Number) stack.pop()).floatValue();
graphicState.translate(-shift, 0);
shift = 0;
previousAdvance = 0;
advance.setLocation(0, 0);
graphicState.translate(x, -y);
graphicState.getTextState().leading = -y;
// capture x coord of BT y offset, tm, Td, TD.
if (isYstart) {
yBTStart = y;
isYstart = false;
}
// ty will dictate the vertical shift, many pdf will use
// ty=0 do just do a horizontal shift for layout.
if (y != 0f) {
pageText.newLine();
// pageText.newWord();
}
// if (y != 0f && previousBTStart != yBTStart){
// pageText.newLine();
// pageText.newWord();
// }
}
// Text leading
else if (nextToken.equals(PdfOps.TL_TOKEN)) {
// collectTokenFrequency(PdfOps.TL_TOKEN);
graphicState.getTextState().leading = ((Number) stack.pop()).floatValue();
}
// Saves Graphics State, should copy the entire graphics state onto
// the graphicsState object's stack
else if (nextToken.equals(PdfOps.q_TOKEN)) {
graphicState = consume_q(graphicState);
}
// Restore Graphics State, should restore the entire graphics state
// to its former value by popping it from the stack
else if (nextToken.equals(PdfOps.Q_TOKEN)) {
graphicState = consume_Q(graphicState, shapes);
}
// Modify the current transformation matrix (CTM) by concatenating the
// specified matrix
else if (nextToken.equals(PdfOps.cm_TOKEN)) {
consume_cm(graphicState, stack, inTextBlock, textBlockBase);
}
// Move to the start of the next line
else if (nextToken.equals(PdfOps.T_STAR_TOKEN)) {
// collectTokenFrequency(PdfOps.T_STAR_TOKEN);
graphicState.translate(-shift, 0);
shift = 0;
previousAdvance = 0;
advance.setLocation(0, 0);
graphicState.translate(0, graphicState.getTextState().leading);
// always indicates a new line
pageText.newLine();
} else if (nextToken.equals(PdfOps.BDC_TOKEN)) {
// collectTokenFrequency(PdfOps.BDC_TOKEN);
stack.pop();
stack.pop();
} else if (nextToken.equals(PdfOps.EMC_TOKEN)) {
// collectTokenFrequency(PdfOps.EMC_TOKEN);
}
// Sets the specified parameters in the graphics state. The gs operand
// points to a name resource which should be a an ExtGState object.
// The graphics state parameters in the ExtGState must be concatenated
// with the the current graphics state.
else if (nextToken.equals(PdfOps.gs_TOKEN)) {
consume_gs(graphicState, stack, resources);
}
// Set the line width in the graphics state
else if (nextToken.equals(PdfOps.w_TOKEN) ||
nextToken.equals(PdfOps.LW_TOKEN)) {
consume_w(graphicState, stack, shapes);
}
// Fill Color with ColorSpace
else if (nextToken.equals(PdfOps.sc_TOKEN) ||
nextToken.equals(PdfOps.scn_TOKEN)) {
consume_sc(graphicState, stack, library, resources);
}
// Same as K, but for nonstroking operations.
else if (nextToken.equals(PdfOps.k_TOKEN)) { // Fill Color CMYK
consume_k(graphicState, stack, library);
}
// Same as g but for none stroking operations
else if (nextToken.equals(PdfOps.g_TOKEN)) {
consume_g(graphicState, stack, library);
}
// Sets the flatness tolerance in the graphics state, NOT SUPPORTED
// flatness is a number in the range 0 to 100, a value of 0 specifies
// the default tolerance
else if (nextToken.equals(PdfOps.i_TOKEN)) {
consume_i(stack);
}
// Miter Limit
else if (nextToken.equals(PdfOps.M_TOKEN)) {
consume_M(graphicState, stack, shapes);
}
// Set the line cap style of the graphic state, related to Line Join
// style
else if (nextToken.equals(PdfOps.J_TOKEN)) {
consume_J(graphicState, stack, shapes);
}
// Same as RG, but for nonstroking operations.
else if (nextToken.equals(PdfOps.rg_TOKEN)) { // Fill Color RGB
consume_rg(graphicState, stack, library);
}
// Sets the line dash pattern in the graphics state. A normal line
// is [] 0. See Graphics State -> Line dash patter for more information
// in the PDF Reference. Java 2d uses the same notation so there
// is not much work to be done other then parsing the data.
else if (nextToken.equals(PdfOps.d_TOKEN)) {
consume_d(graphicState, stack, shapes);
}
// Sets the line dash pattern in the graphics state. A normal line
// is [] 0. See Graphics State -> Line dash patter for more information
// in the PDF Reference. Java 2d uses the same notation so there
// is not much work to be done other then parsing the data.
else if (nextToken.equals(PdfOps.d_TOKEN)) {
consume_d(graphicState, stack, shapes);
}
// Set the line join style in the graphics state
else if (nextToken.equals(PdfOps.j_TOKEN)) {
consume_j(graphicState, stack, shapes);
}
// Same as CS, but for non-stroking operations.
else if (nextToken.equals(PdfOps.cs_TOKEN)) {
consume_cs(graphicState, stack, resources);
}
// Set the color rendering intent in the graphics state
else if (nextToken.equals("ri")) {
// collectTokenFrequency(PdfOps.ri_TOKEN);
stack.pop();
}
// Set the color to use for stroking operations in a device, CIE-based
// (other than ICCBased), or Indexed color space. The number of operands
// required and their interpretation depends on the current stroking color space:
// - For DeviceGray, CalGray, and Indexed color spaces, one operand
// is required (n = 1).
// - For DeviceRGB, CalRGB, and Lab color spaces, three operands are
// required (n = 3).
// - For DeviceCMYK, four operands are required (n = 4).
else if (nextToken.equals(PdfOps.SC_TOKEN) ||
nextToken.equals(PdfOps.SCN_TOKEN)) { // Stroke Color with ColorSpace
consume_SC(graphicState, stack, library, resources);
}
// Set the stroking color space to DeviceCMYK (or the DefaultCMYK color
// space; see "Default Color Spaces" on page 227) and set the color to
// use for stroking operations. Each operand must be a number between
// 0.0 (zero concentration) and 1.0 (maximum concentration). The
// behavior of this operator is affected by the overprint mode
// (see Section 4.5.6, "Overprint Control").
else if (nextToken.equals(PdfOps.K_TOKEN)) { // Stroke Color CMYK
consume_K(graphicState, stack, library);
}
// Set the stroking color space to DeviceGray (or the DefaultGray color
// space; see "Default Color Spaces" ) and set the gray level to use for
// stroking operations. gray is a number between 0.0 (black)
// and 1.0 (white).
else if (nextToken.equals(PdfOps.G_TOKEN)) {
consume_G(graphicState, stack, library);
}
// Set the stroking color space to DeviceRGB (or the DefaultRGB color
// space; see "Default Color Spaces" on page 227) and set the color to
// use for stroking operations. Each operand must be a number between
// 0.0 (minimum intensity) and 1.0 (maximum intensity).
else if (nextToken.equals(PdfOps.RG_TOKEN)) { // Stroke Color RGB
consume_RG(graphicState, stack, library);
} else if (nextToken.equals(PdfOps.CS_TOKEN)) {
consume_CS(graphicState, stack, resources);
}
// Rendering mode
else if (nextToken.equals(PdfOps.Tr_TOKEN)) {
// collectTokenFrequency(PdfOps.Tr_TOKEN);
graphicState.getTextState().rmode = (int) ((Number) stack.pop()).floatValue();
}
// Horizontal scaling
else if (nextToken.equals(PdfOps.Tz_TOKEN)) {
// collectTokenFrequency(PdfOps.Tz_TOKEN);
consume_Tz(graphicState, stack);
}
// Text rise
else if (nextToken.equals(PdfOps.Ts_TOKEN)) {
// collectTokenFrequency(PdfOps.Ts_TOKEN);
graphicState.getTextState().trise = ((Number) stack.pop()).floatValue();
}
/**
* Begin a compatibility section. Unrecognized operators (along with
* their operands) will be ignored without error until the balancing
* EX operator is encountered.
*/
else if (nextToken.equals(PdfOps.BX_TOKEN)) {
// collectTokenFrequency(PdfOps.BX_TOKEN);
}
// End a compatibility section begun by a balancing BX operator.
else if (nextToken.equals(PdfOps.EX_TOKEN)) {
// collectTokenFrequency(PdfOps.EX_TOKEN);
}
// Move to the next line and show a text string.
else if (nextToken.equals(PdfOps.SINGLE_QUOTE_TOKEN)) {
// collectTokenFrequency(PdfOps.SINGLE_QUOTE_TOKEN);
graphicState.translate(-shift, graphicState.getTextState().leading);
// apply transparency
setAlpha(shapes, graphicState.getAlphaRule(), graphicState.getFillAlpha());
shift = 0;
previousAdvance = 0;
advance.setLocation(0, 0);
StringObject stringObject = (StringObject) stack.pop();
TextState textState = graphicState.getTextState();
Point2D.Float d = (Point2D.Float) drawString(
stringObject.getLiteralStringBuffer(
textState.font.getSubTypeFormat(),
textState.font.getFont()),
new Point2D.Float(0, 0), 0, graphicState.getTextState(),
shapes);
graphicState.translate(d.x, 0);
shift += d.x;
// pageText.newLine();
// pageText.newWord();
}
/**
* Move to the next line and show a text string, using aw as the
* word spacing and ac as the character spacing (setting the
* corresponding parameters in the text state). aw and ac are
* numbers expressed in unscaled text space units.
*/
else if (nextToken.equals(PdfOps.DOUBLE_QUOTE__TOKEN)) {
// collectTokenFrequency(PdfOps.DOUBLE_QUOTE__TOKEN);
StringObject stringObject = (StringObject) stack.pop();
graphicState.getTextState().cspace = ((Number) stack.pop()).floatValue();
graphicState.getTextState().wspace = ((Number) stack.pop()).floatValue();
graphicState.translate(-shift, graphicState.getTextState().leading);
// apply transparency
setAlpha(shapes, graphicState.getAlphaRule(), graphicState.getFillAlpha());
shift = 0;
previousAdvance = 0;
advance.setLocation(0, 0);
TextState textState = graphicState.getTextState();
Point2D.Float d = (Point2D.Float) drawString(
stringObject.getLiteralStringBuffer(
textState.font.getSubTypeFormat(),
textState.font.getFont()),
new Point2D.Float(0, 0), 0, graphicState.getTextState(),
shapes);
graphicState.translate(d.x, 0);
shift += d.x;
// pageText.newLine();
}
}
// push everything else on the stack for consumptions
else {
stack.push(nextToken);
}
nextToken = parser.getStreamObject();
}
// get rid of the rest
while (!stack.isEmpty()) {
String tmp = stack.pop().toString();
if (logger.isLoggable(Level.FINE)) {
logger.warning("Text=" + tmp);
}
}
graphicState.set(textBlockBase);
inTextBlock = false;
return yBTStart;
}
void parseInlineImage(Parser p, Shapes shapes) throws IOException {
try {
//int width = 0, height = 0, bitspercomponent = 0;
// boolean imageMask = false; // from old pdfgo never used
// PColorSpace cs = null; // from old pdfgo never used
Object tok;
Hashtable<Object, Object> iih = new Hashtable<Object, Object>();
tok = p.getStreamObject();
while (!tok.equals("ID")) {
if (tok.equals("BPC")) {
tok = new Name("BitsPerComponent");
} else if (tok.equals("CS")) {
tok = new Name("ColorSpace");
} else if (tok.equals("D")) {
tok = new Name("Decode");
} else if (tok.equals("DP")) {
tok = new Name("DecodeParms");
} else if (tok.equals("F")) {
tok = new Name("Filter");
} else if (tok.equals("H")) {
tok = new Name("Height");
} else if (tok.equals("IM")) {
tok = new Name("ImageMask");
} else if (tok.equals("I")) {
tok = new Name("Indexed");
} else if (tok.equals("W")) {
tok = new Name("Width");
}
Object tok1 = p.getStreamObject();
//System.err.println(tok+" - "+tok1);
iih.put(tok, tok1);
tok = p.getStreamObject();
}
// For inline images in content streams, we have to use
// a byte[], instead of going back to the original file,
// to reget the image data, because the inline image is
// only a small part of a content stream, which is also
// filtered, and potentially concatenated with other
// content streams.
// Long story short: it's too hard to re-get from PDF file
// Now, since non-inline-image streams can go back to the
// file, we have to fake it as coming from the file ...
ByteArrayOutputStream buf = new ByteArrayOutputStream(4096);
tok = p.peek2();
boolean ateEI = false;
while (tok != null && !tok.equals(" EI")) {
ateEI = p.readLineForInlineImage(buf);
if (ateEI)
break;
tok = p.peek2();
}
if (!ateEI) {
// get rid of trash...
p.getToken();
}
buf.flush();
buf.close();
byte[] data = buf.toByteArray();
SeekableByteArrayInputStream sbais =
new SeekableByteArrayInputStream(data);
SeekableInputConstrainedWrapper streamInputWrapper =
new SeekableInputConstrainedWrapper(sbais, 0L, data.length, true);
Stream st = new Stream(library, iih, streamInputWrapper);
st.setInlineImage(true);
//System.out.println("----------> ContentParser creating image from stream");
BufferedImage im = st.getImage(graphicState.getFillColor(), resources, true);
st.dispose(false);
AffineTransform af = new AffineTransform(graphicState.getCTM());
graphicState.scale(1, -1);
graphicState.translate(0, -1);
shapes.add(im);
graphicState.set(af);
} catch (IOException e) {
throw e;
} catch (Exception e) {
logger.log(Level.FINE, "Error parsing inline image.", e);
}
}
private static void consume_G(GraphicsState graphicState, Stack stack,
Library library) {
// collectTokenFrequency(PdfOps.G_TOKEN);
float gray = ((Number) stack.pop()).floatValue();
// Stroke Color Gray
graphicState.setStrokeColorSpace(
PColorSpace.getColorSpace(library, new Name("DeviceGray")));
graphicState.setStrokeColor(new Color(gray, gray, gray));
}
private static void consume_g(GraphicsState graphicState, Stack stack,
Library library) {
// collectTokenFrequency(PdfOps.g_TOKEN);
float gray = ((Number) stack.pop()).floatValue();
// Fill Color Gray
graphicState.setFillColorSpace(
PColorSpace.getColorSpace(library, new Name("DeviceGray")));
graphicState.setFillColor(new Color(gray, gray, gray));
}
private static void consume_RG(GraphicsState graphicState, Stack stack,
Library library) {
// collectTokenFrequency(PdfOps.RG_TOKEN);
float b = ((Number) stack.pop()).floatValue();
float gg = ((Number) stack.pop()).floatValue();
float r = ((Number) stack.pop()).floatValue();
b = Math.max(0.0f, Math.min(1.0f, b));
gg = Math.max(0.0f, Math.min(1.0f, gg));
r = Math.max(0.0f, Math.min(1.0f, r));
// set stoke colour
graphicState.setStrokeColorSpace(
PColorSpace.getColorSpace(library, new Name("DeviceRGB")));
graphicState.setStrokeColor(new Color(r, gg, b));
}
private static void consume_rg(GraphicsState graphicState, Stack stack,
Library library) {
// collectTokenFrequency(PdfOps.rg_TOKEN);
float b = ((Number) stack.pop()).floatValue();
float gg = ((Number) stack.pop()).floatValue();
float r = ((Number) stack.pop()).floatValue();
b = Math.max(0.0f, Math.min(1.0f, b));
gg = Math.max(0.0f, Math.min(1.0f, gg));
r = Math.max(0.0f, Math.min(1.0f, r));
// set fill colour
graphicState.setFillColorSpace(
PColorSpace.getColorSpace(library, new Name("DeviceRGB")));
graphicState.setFillColor(new Color(r, gg, b));
}
private static void consume_K(GraphicsState graphicState, Stack stack,
Library library) {
// collectTokenFrequency(PdfOps.K_TOKEN);
float k = ((Number) stack.pop()).floatValue();
float y = ((Number) stack.pop()).floatValue();
float m = ((Number) stack.pop()).floatValue();
float c = ((Number) stack.pop()).floatValue();
// float r = 0, gg = 0, b = 0;
// if ((c + k) <= 1.0)
// r = 1 - (c + k);
// if ((m + k) <= 1.0)
// gg = 1 - (m + k);
// if ((y + k) <= 1.0)
// b = 1 - (y + k);
PColorSpace pColorSpace =
PColorSpace.getColorSpace(library, new Name("DeviceCMYK"));
// set stroke colour
graphicState.setStrokeColorSpace(pColorSpace);
// graphicState.setStrokeColor(new Color(r, gg, b));
graphicState.setStrokeColor(pColorSpace.getColor(PColorSpace.reverse(new float[]{c, m, y, k})));
}
private static void consume_k(GraphicsState graphicState, Stack stack,
Library library) {
// collectTokenFrequency(PdfOps.k_TOKEN);
float k = ((Number) stack.pop()).floatValue();
float y = ((Number) stack.pop()).floatValue();
float m = ((Number) stack.pop()).floatValue();
float c = ((Number) stack.pop()).floatValue();
// float r = 0, gg = 0, b = 0;
// if ((c + k) <= 1.0)
// r = 1 - (c + k);
// if ((m + k) <= 1.0)
// gg = 1 - (m + k);
// if ((y + k) <= 1.0)
// b = 1 - (y + k);
// build a colour space.
PColorSpace pColorSpace =
PColorSpace.getColorSpace(library, new Name("DeviceCMYK"));
// set fill colour
graphicState.setFillColorSpace(pColorSpace);
// graphicState.setFillColor( new Color(r, gg, b));
graphicState.setFillColor(pColorSpace.getColor(PColorSpace.reverse(new float[]{c, m, y, k})));
}
private static void consume_CS(GraphicsState graphicState, Stack stack, Resources resources) {
// collectTokenFrequency(PdfOps.CS_TOKEN);
Name n = (Name) stack.pop();
// Fill Color ColorSpace, resources call uses factory call to PColorSpace.getColorSpace
// which returns an colour space including a pattern
graphicState.setStrokeColorSpace(resources.getColorSpace(n));
}
private static void consume_cs(GraphicsState graphicState, Stack stack, Resources resources) {
// collectTokenFrequency(PdfOps.cs_TOKEN);
Name n = (Name) stack.pop();
// Fill Color ColorSpace, resources call uses factory call to PColorSpace.getColorSpace
// which returns an colour space including a pattern
graphicState.setFillColorSpace(resources.getColorSpace(n));
}
private static void consume_SC(GraphicsState graphicState, Stack stack,
Library library, Resources resources) {
// collectTokenFrequency(PdfOps.SC_TOKEN);
// collectTokenFrequency(PdfOps.SCN_TOKEN);
Object o = stack.peek();
// if a name then we are dealing with a pattern
if (o instanceof Name) {
Name patternName = (Name) stack.pop();
Pattern pattern = resources.getPattern(patternName.toString());
// Create or update the current PatternColorSpace with an instance
// of the current pattern. These object will be used later during
// fill, show text and Do with image masks.
if (graphicState.getStrokeColorSpace() instanceof PatternColor) {
PatternColor pc = (PatternColor) graphicState.getStrokeColorSpace();
pc.setPattern(pattern);
} else {
PatternColor pc = new PatternColor(null, null);
pc.setPattern(pattern);
graphicState.setStrokeColorSpace(pc);
}
// two cases to take into account:
// for none coloured tiling patterns we must parse the component
// values that specify colour. otherwise we just use the name
// for all other pattern types.
if (pattern instanceof TilingPattern) {
TilingPattern tilingPattern = (TilingPattern) pattern;
if (tilingPattern.getPaintType() ==
TilingPattern.PAINTING_TYPE_UNCOLORED_TILING_PATTERN) {
// parsing is of the form 'C1...Cn name scn'
// first find out colour space specified by name
int compLength = graphicState.getStrokeColorSpace().getNumComponents();
// peek and then pop until a none Float is found
int nCount = 0;
// next calculate the colour based ont he space and c1..Cn
float colour[] = new float[compLength];
// peek and pop all of the colour floats
while (!stack.isEmpty() && stack.peek() instanceof Number &&
nCount < compLength) {
colour[nCount] = ((Number) stack.pop()).floatValue();
nCount++;
}
graphicState.setStrokeColor(graphicState.getStrokeColorSpace().getColor(colour));
tilingPattern.setUnColored(
graphicState.getStrokeColorSpace().getColor(colour));
}
}
} else if (o instanceof Number) {
// some pdfs encoding do not explicitly change the default colour
// space from the default DeviceGrey. The following code checks
// how many n values are available and if different then current
// graphicState.strokeColorSpace it is changed as needed
// first get assumed number of components
int colorSpaceN = graphicState.getStrokeColorSpace().getNumComponents();
// peek and then pop until a none Float is found
int nCount = 0;
// set colour to max of 4 which is cymk,
int compLength = 4;
float colour[] = new float[compLength];
// peek and pop all of the colour floats
while (!stack.isEmpty() && stack.peek() instanceof Number &&
nCount < compLength) {
colour[nCount] = ((Number) stack.pop()).floatValue();
nCount++;
}
// check to see if nCount and colorSpaceN are the same
if (nCount != colorSpaceN) {
// change the colour state to nCount equivalent
graphicState.setStrokeColorSpace(
PColorSpace.getColorSpace(library, nCount));
}
// shrink the array to the correct length
float[] f = new float[nCount];
System.arraycopy(colour, 0, f, 0, nCount);
graphicState.setStrokeColor(graphicState.getStrokeColorSpace().getColor(f));
}
}
private static void consume_sc(GraphicsState graphicState, Stack stack,
Library library, Resources resources) {
// collectTokenFrequency(PdfOps.sc_TOKEN);
// collectTokenFrequency(PdfOps.scn_TOKEN);
Object o = stack.peek();
// if a name then we are dealing with a pattern.
if (o instanceof Name) {
Name patternName = (Name) stack.pop();
Pattern pattern = resources.getPattern(patternName.toString());
// Create or update the current PatternColorSpace with an instance
// of the current pattern. These object will be used later during
// fill, show text and Do with image masks.
if (graphicState.getFillColorSpace() instanceof PatternColor) {
PatternColor pc = (PatternColor) graphicState.getFillColorSpace();
pc.setPattern(pattern);
} else {
PatternColor pc = new PatternColor(library, null);
pc.setPattern(pattern);
graphicState.setFillColorSpace(pc);
}
// two cases to take into account:
// for none coloured tiling patterns we must parse the component
// values that specify colour. otherwise we just use the name
// for all other pattern types.
if (pattern instanceof TilingPattern) {
TilingPattern tilingPattern = (TilingPattern) pattern;
if (tilingPattern.getPaintType() ==
TilingPattern.PAINTING_TYPE_UNCOLORED_TILING_PATTERN) {
// parsing is of the form 'C1...Cn name scn'
// first find out colour space specified by name
int compLength = graphicState.getFillColorSpace().getNumComponents();
// peek and then pop until a none Float is found
int nCount = 0;
// next calculate the colour based ont he space and c1..Cn
float colour[] = new float[compLength];
// peek and pop all of the colour floats
while (!stack.isEmpty() && stack.peek() instanceof Number &&
nCount < compLength) {
colour[nCount] = ((Number) stack.pop()).floatValue();
nCount++;
}
// fill colour to be used when painting.
graphicState.setFillColor(graphicState.getFillColorSpace().getColor(colour));
tilingPattern.setUnColored(
graphicState.getFillColorSpace().getColor(colour));
}
}
} else if (o instanceof Number) {
// some PDFs encoding do not explicitly change the default colour
// space from the default DeviceGrey. The following code checks
// how many n values are available and if different then current
// graphicState.fillColorSpace it is changed as needed
// first get assumed number of components
int colorSpaceN = graphicState.getFillColorSpace().getNumComponents();
// peek and then pop until a none Float is found
int nCount = 0;
// set colour to max of 4 which is cymk,
int compLength = 4;
float colour[] = new float[compLength];
// peek and pop all of the colour floats
while (!stack.isEmpty() && stack.peek() instanceof Number &&
nCount < compLength) {
colour[nCount] = ((Number) stack.pop()).floatValue();
nCount++;
}
// check to see if nCount and colorSpaceN are the same
if (nCount != colorSpaceN) {
// change the colour state to nCount equivalent
graphicState.setFillColorSpace(
PColorSpace.getColorSpace(library, nCount));
}
// shrink the array to the correct length
float[] f = new float[nCount];
System.arraycopy(colour, 0, f, 0, nCount);
graphicState.setFillColor(graphicState.getFillColorSpace().getColor(f));
}
}
private static GraphicsState consume_q(GraphicsState graphicState) {
// collectTokenFrequency(PdfOps.q_TOKEN);
return graphicState.save();
}
private static GraphicsState consume_Q(GraphicsState graphicState, Shapes shapes) {
// collectTokenFrequency(PdfOps.Q_TOKEN);
GraphicsState gs1 = graphicState.restore();
// point returned stack
if (gs1 != null) {
graphicState = gs1;
}
// otherwise start a new stack
else {
graphicState = new GraphicsState(shapes);
graphicState.set(new AffineTransform());
shapes.addNoClipCommand();
}
return graphicState;
}
private static void consume_cm(GraphicsState graphicState, Stack stack,
boolean inTextBlock, AffineTransform textBlockBase) {
// collectTokenFrequency(PdfOps.cm_TOKEN);
float f = ((Number) stack.pop()).floatValue();
float e = ((Number) stack.pop()).floatValue();
float d = ((Number) stack.pop()).floatValue();
float c = ((Number) stack.pop()).floatValue();
float b = ((Number) stack.pop()).floatValue();
float a = ((Number) stack.pop()).floatValue();
if (!inTextBlock) {
// get the current CTM
AffineTransform af = new AffineTransform(graphicState.getCTM());
// do the matrix concatenation math
af.concatenate(new AffineTransform(a, b, c, d, e, f));
// add the transformation to the graphics state
graphicState.set(af);
// update the clip, translate by this CM
graphicState.updateClipCM(new AffineTransform(a, b, c, d, e, f));
}
// apply the cm just as we would a tm
else {
// update the textBlockBase with the cm matrix
AffineTransform af = new AffineTransform(textBlockBase);
// apply the transform
graphicState.getTextState().tmatrix = new AffineTransform(a, b, c, d, e, f);
af.concatenate(graphicState.getTextState().tmatrix);
graphicState.set(af);
graphicState.scale(1, -1);
// apply text size.
applyTextScaling(graphicState);
}
}
private static void consume_i(Stack stack) {
// collectTokenFrequency(PdfOps.i_TOKEN);
stack.pop();
}
private static void consume_J(GraphicsState graphicState, Stack stack, Shapes shapes) {
// collectTokenFrequency(PdfOps.J_TOKEN);
// get the value from the stack
graphicState.setLineCap((int) (((Number) stack.pop()).floatValue()));
// Butt cap, stroke is squared off at the endpoint of the path
// there is no projection beyond the end of the path
if (graphicState.getLineCap() == 0) {
graphicState.setLineCap(BasicStroke.CAP_BUTT);
}
// Round cap, a semicircular arc with a diameter equal to the line
// width is drawn around the endpoint and filled in
else if (graphicState.getLineCap() == 1) {
graphicState.setLineCap(BasicStroke.CAP_ROUND);
}
// Projecting square cap. The stroke continues beyond the endpoint
// of the path for a distance equal to half the line width and is
// then squared off.
else if (graphicState.getLineCap() == 2) {
graphicState.setLineCap(BasicStroke.CAP_SQUARE);
}
// Mark the stroke as being changed and store state in the
// shapes object
setStroke(shapes, graphicState);
}
/**
* Process the xObject content.
*
* @param graphicState graphic state to appent
* @param stack stack of object being parsed.
* @param shapes shapes object.
* @param resources associated resources.
* @param viewParse true indicates parsing is for a normal view. If false
* the consumption of Do will skip Image based xObjects for performance.
*/
private static GraphicsState consume_Do(GraphicsState graphicState, Stack stack,
Shapes shapes, Resources resources,
boolean viewParse){
// collectTokenFrequency(PdfOps.Do_TOKEN);
String xobjectName = ((Name) (stack.pop())).getName();
// Form XObject
if (resources.isForm(xobjectName)) {
// Do operator steps:
// 1.)save the graphics context
graphicState = graphicState.save();
// Try and find the named reference 'xobjectName', pass in a copy
// of the current graphics state for the new content stream
Form formXObject = resources.getForm(xobjectName);
if (formXObject != null) {
// init formXobject
GraphicsState xformGraphicsState =
new GraphicsState(graphicState);
formXObject.setGraphicsState(xformGraphicsState);
if (formXObject.isTransparencyGroup()) {
// assign the state to the graphic state for later
// processing during the paint
xformGraphicsState.setTransparencyGroup(formXObject.isTransparencyGroup());
xformGraphicsState.setIsolated(formXObject.isIsolated());
xformGraphicsState.setKnockOut(formXObject.isKnockOut());
}
// according to spec the formXObject might not have
// resources reference as a result we pass in the current
// one in the hope that any resources can be found.
formXObject.setParentResources(resources);
formXObject.init();
// 2.) concatenate matrix entry with the current CTM
AffineTransform af =
new AffineTransform(graphicState.getCTM());
af.concatenate(formXObject.getMatrix());
shapes.add(af);
// 3.) Clip according to the form BBox entry
if (graphicState.getClip() != null) {
AffineTransform matrix = formXObject.getMatrix();
Area bbox = new Area(formXObject.getBBox());
Area clip = graphicState.getClip();
// create inverse of matrix so we can transform
// the clip to form space.
try {
matrix = matrix.createInverse();
} catch (NoninvertibleTransformException e) {
logger.warning("Error create xObject matrix inverse");
}
// apply the new clip now that they are in the
// same space.
Shape shape = matrix.createTransformedShape(clip);
bbox.intersect(new Area(shape));
shapes.add(bbox);
} else {
shapes.add(formXObject.getBBox());
}
shapes.addClipCommand();
// apply transparency in a loss manner trying to support
// the 2/3 transparency group rules.
if (graphicState.getSoftMask() != null){
// apply a rudimentary softmask for an shading .
setAlpha(shapes,
graphicState.getAlphaRule(),
0.50f);
}else{
setAlpha(shapes,
graphicState.getAlphaRule(),
graphicState.getFillAlpha());
}
// 4.) Paint the graphics objects in font stream.
// If we have a transparency group we paint it
// slightly different then a regular xObject as we
// need to capture the alpha which is only possible
// by paint the xObject to an image.
if (formXObject.isTransparencyGroup()) {
// add the hold form for further processing.
shapes.add(formXObject);
}
// the down side of painting to an image is that we
// lose quality if there is a affine transform, so
// if it isn't a group transparency we paint old way
// by just adding the objects to the shapes stack.
else {
shapes.add(formXObject.getShapes());
}
// makes sure we add xobject images so we can extract them.
if (formXObject.getShapes() != null) {
shapes.add(formXObject.getShapes().getImages());
}
// update text sprites with geometric path state
if (formXObject.getShapes() != null &&
formXObject.getShapes().getPageText() != null) {
// normalize each sprite.
formXObject.getShapes().getPageText()
.applyXObjectTransform(graphicState.getCTM());
}
shapes.addNoClipCommand();
formXObject.completed();
// clean up resource used by this form object
formXObject.disposeResources(true);
}
// 5.) Restore the saved graphics state
graphicState = graphicState.restore();
}
// Image XObject
else if (viewParse) {
setAlpha(shapes, graphicState.getAlphaRule(), graphicState.getFillAlpha());
Image im = resources.getImage(xobjectName,
graphicState.getFillColor());
if (im != null) {
AffineTransform af =
new AffineTransform(graphicState.getCTM());
graphicState.scale(1, -1);
graphicState.translate(0, -1);
// add the image
shapes.add(im);
graphicState.set(af);
}
}
return graphicState;
}
private static void consume_d(GraphicsState graphicState, Stack stack, Shapes shapes) {
// collectTokenFrequency(PdfOps.d_TOKEN);
float dashPhase;
float[] dashArray;
try {
// pop dashPhase off the stack
dashPhase = Math.abs(((Number) stack.pop()).floatValue());
// pop the dashVector of the stack
Vector dashVector = (Vector) stack.pop();
// if the dash vector size is zero we have a default none dashed
// line and thus we skip out
if (dashVector.size() > 0) {
// convert dash vector to a array of floats
final int sz = dashVector.size();
dashArray = new float[sz];
for (int i = 0; i < sz; i++) {
dashArray[i] = Math.abs(((Number) dashVector.get(i)).floatValue());
}
}
// default to standard black line
else {
dashPhase = 0;
dashArray = null;
}
// assign state now that everything is assumed good
// from a class cast exception point of view.
graphicState.setDashArray(dashArray);
graphicState.setDashPhase(dashPhase);
}
catch (ClassCastException e) {
logger.log(Level.FINE, "Dash pattern syntax error: ", e);
}
// update stroke state with possibly new dash data.
setStroke(shapes, graphicState);
}
private static void consume_j(GraphicsState graphicState, Stack stack, Shapes shapes) {
// collectTokenFrequency(PdfOps.j_TOKEN);
// grab the value
graphicState.setLineJoin((int) (((Number) stack.pop()).floatValue()));
// Miter Join - the outer edges of the strokes for the two
// segments are extended until they meet at an angle, like a picture
// frame
if (graphicState.getLineJoin() == 0) {
graphicState.setLineJoin(BasicStroke.JOIN_MITER);
}
// Round join - an arc of a circle with a diameter equal to the line
// width is drawn around the point where the two segments meet,
// connecting the outer edges of the strokes for the two segments
else if (graphicState.getLineJoin() == 1) {
graphicState.setLineJoin(BasicStroke.JOIN_ROUND);
}
// Bevel join - The two segments are finished with butt caps and the
// ends of the segments is filled with a triangle
else if (graphicState.getLineJoin() == 2) {
graphicState.setLineJoin(BasicStroke.JOIN_BEVEL);
}
// updates shapes with with the new stroke type
setStroke(shapes, graphicState);
}
private static void consume_w(GraphicsState graphicState, Stack stack, Shapes shapes) {
// collectTokenFrequency(PdfOps.w_TOKEN);
// collectTokenFrequency(PdfOps.LW_TOKEN);
graphicState.setLineWidth(((Number) stack.pop()).floatValue());
setStroke(shapes, graphicState);
}
private static void consume_M(GraphicsState graphicState, Stack stack, Shapes shapes) {
// collectTokenFrequency(PdfOps.M_TOKEN);
graphicState.setMiterLimit(((Number) stack.pop()).floatValue());
setStroke(shapes, graphicState);
}
private static void consume_gs(GraphicsState graphicState, Stack stack, Resources resources) {
// collectTokenFrequency(PdfOps.gs_TOKEN);
Object gs = stack.pop();
if (gs instanceof Name) {
// Get ExtGState and merge it with
ExtGState extGState =
resources.getExtGState(((Name) gs).getName());
if (extGState != null) {
graphicState.concatenate(extGState);
}
}
}
private static void consume_Tf(GraphicsState graphicState, Stack stack, Resources resources) {
// collectTokenFrequency(PdfOps.Tf_TOKEN);
//graphicState.translate(-shift,0);
//shift=0;
float size = ((Number) stack.pop()).floatValue();
Name name2 = (Name) stack.pop();
// build the new font and initialize it.
graphicState.getTextState().font = resources.getFont(name2.getName());
// in the rare case that the font can't be found then we try and build
// one so the document can be rendered in some shape or form.
if (graphicState.getTextState().font == null ||
graphicState.getTextState().font.getFont() == null){
// turn on the old awt font engine, as we have a null font
FontFactory fontFactory = FontFactory.getInstance();
boolean awtState = fontFactory.isAwtFontSubstitution();
fontFactory.setAwtFontSubstitution(true);
// get the first pages resources, no need to lock the page, already locked.
Resources res = resources.getLibrary().getCatalog().getPageTree().getPage(0,null).getResources();
// try and get a font off the first page.
Object pageFonts = res.getEntries().get("Font");
if (pageFonts instanceof Hashtable){
// get first font
graphicState.getTextState().font =
(org.icepdf.core.pobjects.fonts.Font)resources.getLibrary()
.getObject(((Hashtable)pageFonts).elements().nextElement());
// might get a null pointer but we'll get on on deriveFont too
graphicState.getTextState().font.init();
}
// return factory to original state.
fontFactory.setAwtFontSubstitution(awtState);
// if no fonts found then we just bail and accept the null pointer
}
graphicState.getTextState().currentfont =
graphicState.getTextState().font.getFont().deriveFont(size);
}
private static void consume_Tc(GraphicsState graphicState, Stack stack) {
// collectTokenFrequency(PdfOps.Tc_TOKEN);
graphicState.getTextState().cspace = ((Number) stack.pop()).floatValue();
}
private static void consume_Tz(GraphicsState graphicState, Stack stack) {
// collectTokenFrequency(PdfOps.Tz_TOKEN);
Object ob = stack.pop();
if (ob instanceof Number) {
float hScaling = ((Number) ob).floatValue();
// values is represented in percent but we want it as a none percent
graphicState.getTextState().hScalling = hScaling / 100f;
// apply text size.
applyTextScaling(graphicState);
}
}
private static void consume_Tw(GraphicsState graphicState, Stack stack) {
// collectTokenFrequency(PdfOps.Tw_TOKEN);
graphicState.getTextState().wspace = ((Number) stack.pop()).floatValue();
}
private static void consume_Tr(GraphicsState graphicState, Stack stack) {
// collectTokenFrequency(PdfOps.Tr_TOKEN);
graphicState.getTextState().rmode = (int) ((Number) stack.pop()).floatValue();
}
private static void consume_TL(GraphicsState graphicState, Stack stack) {
// collectTokenFrequency(PdfOps.TL_TOKEN);
graphicState.getTextState().leading = ((Number) stack.pop()).floatValue();
}
private static void consume_Ts(GraphicsState graphicState, Stack stack) {
// collectTokenFrequency(PdfOps.Ts_TOKEN);
graphicState.getTextState().trise = ((Number) stack.pop()).floatValue();
}
/**
* Utility method for calculating the advanceX need for the
* <code>displayText</code> given the strings parsed textState. Each of
* <code>displayText</code> glyphs and respective, text state is added to
* the shapes collection.
*
* @param displayText text that will be drawn to the screen
* @param advance current advanceX of last drawn string
* @param previousAdvance last advance of where the string should be drawn
* @param textState formating properties associated with displayText
* @param shapes collection of all shapes for page content being parsed.
* @return the modified advanceX value which can be used for the the next
* string that needs to be drawn
*/
private Point2D drawString(
StringBuilder displayText,
Point2D advance,
float previousAdvance,
TextState textState,
Shapes shapes) {
float advanceX = ((Point2D.Float) advance).x;
float advanceY = ((Point2D.Float) advance).y;
if (displayText.length() == 0) {
return new Point2D.Float(0, 0);
}
// Postion of previous Glyph, all relative to text block
float lastx = 0, lasty = 0;
// Make sure that the previous advanceX is greater then then where we
// are going to place the next glyph, see not 57 in 1.6 spec for more
// information.
char currentChar = displayText.charAt(0);
// Position of the specified glyph relative to the origin of glyphVector
float firstCharWidth = (float) textState.currentfont.echarAdvance(currentChar).getX();
if ((advanceX + firstCharWidth) < previousAdvance) {
advanceX = previousAdvance;
}
// Data need on font
FontFile currentFont = textState.currentfont;
boolean isVerticalWriting = textState.font.isVerticalWriting();
// int spaceCharacter = currentFont.getSpaceEchar();
// font metrics data
float textRise = textState.trise;
float charcterSpace = textState.cspace;
float whiteSpace = textState.wspace;
int textLength = displayText.length();
// create a new sprite to hold the text objects
TextSprite textSprites =
new TextSprite(currentFont,
textLength,
new AffineTransform(graphicState.getCTM()));
// glyph placement params
float currentX, currentY;
float newAdvanceX, newAdvanceY;
// System.out.println("-> " + displayText + " " + whiteSpace);
// Iterate through displayText to calculate the the new advanceX value
for (int i = 0; i < textLength; i++) {
currentChar = displayText.charAt(i);
// Position of the specified glyph relative to the origin of glyphVector
// advance is handled by the particular font implementation.
newAdvanceX = (float) currentFont.echarAdvance(currentChar).getX();
// System.out.println(currentChar + " : " + (int)currentChar + " : " + newAdvanceX + " : " +
// currentFont.echarAdvance(currentChar).getX() + " : " + currentFont.echarAdvance(' ').getX());
newAdvanceY = newAdvanceX;
if (!isVerticalWriting) {
// add fonts rise to the to glyph position (sup,sub scripts)
currentX = advanceX + lastx;
currentY = lasty - textRise;
lastx += newAdvanceX;
// add the space between chars value
lastx += charcterSpace;
// lastly add space widths,
if (displayText.charAt(i) == 32) { // currently to unreliable currentFont.getSpaceEchar()
// System.out.println("spacechar " + " : " + (int)currentFont.getSpaceEchar() );
lastx += whiteSpace;
}
} else {
// add fonts rise to the to glyph position (sup,sub scripts)
lasty += (newAdvanceY - textRise);
currentX = advanceX - (newAdvanceX / 2.0f);
currentY = advanceY + lasty;
}
// get normalized from from text sprite
GlyphText glyphText = textSprites.addText(
String.valueOf(currentChar), // cid
textState.currentfont.toUnicode(currentChar), // unicode value
currentX, currentY, newAdvanceX);
shapes.getPageText().addGlyph(glyphText);
}
// append the finally offset of the with of the character
advanceX += lastx;
advanceY += lasty;
/**
* The text rendering mode, Tmode, determines whether showing text
* causes glyph outlines to be stroked, filled, used as a clipping
* boundary, or some combination of the three.
*
* No Support for 4, 5, 6 and 7.
*
* 0 - Fill text
* 1 - Stroke text
* 2 - fill, then stroke text
* 3 - Neither fill nor stroke text (invisible)
* 4 - Fill text and add to path for clipping
* 5 - Stroke text and add to path for clipping.
* 6 - Fill, then stroke text and add to path for clipping.
* 7 - Add text to path for clipping.
*/
int rmode = textState.rmode;
// System.out.println("RMode " + rmode);
switch (rmode) {
// fill text: 0
case TextState.MODE_FILL:
drawModeFill(textSprites, shapes, rmode);
break;
// Stroke text: 1
case TextState.MODE_STROKE:
drawModeStroke(textSprites, textState, shapes, rmode);
break;
// Fill, then stroke text: 2
case TextState.MODE_FILL_STROKE:
drawModeFillStroke(textSprites, textState, shapes, rmode);
break;
// Neither fill nor stroke text (invisible): 3
case TextState.MODE_INVISIBLE:
// do nothing
break;
// Fill text and add to path for clipping: 4
case TextState.MODE_FILL_ADD:
drawModeFill(textSprites, shapes, rmode);
break;
// Stroke Text and add to path for clippsing: 5
case TextState.MODE_STROKE_ADD:
drawModeStroke(textSprites, textState, shapes, rmode);
break;
// Fill, then stroke text adn add to path for clipping: 6
case TextState.MODE_FILL_STROKE_ADD:
drawModeFillStroke(textSprites, textState, shapes, rmode);
break;
// Add text to path for clipping: 7
case TextState.MODE_ADD:
textSprites.setRMode(rmode);
shapes.add(textSprites);
break;
}
return new Point2D.Float(advanceX, advanceY);
}
/**
* Utility Method for adding a text sprites to the Shapes stack, given the
* specified rmode.
*
* @param textSprites text to add to shapes stack
* @param shapes shapes stack
* @param rmode write mode
*/
private void drawModeFill(TextSprite textSprites, Shapes shapes, int rmode) {
textSprites.setRMode(rmode);
shapes.add(graphicState.getFillColor());
shapes.add(textSprites);
}
/**
* Utility Method for adding a text sprites to the Shapes stack, given the
* specifed rmode.
*
* @param textSprites text to add to shapes stack
* @param shapes shapes stack
* @param textState text state used to build new stroke
* @param rmode write mode
*/
private void drawModeStroke(TextSprite textSprites, TextState textState,
Shapes shapes, int rmode) {
// setup textSprite with a strokeColor and the correct rmode
textSprites.setRMode(rmode);
textSprites.setStrokeColor(graphicState.getStrokeColor());
// save the old line width
float old = graphicState.getLineWidth();
// set the line width for the glyph
float lineWidth = graphicState.getLineWidth();
lineWidth /= textState.tmatrix.getScaleX();
graphicState.setLineWidth(lineWidth);
// update the stroke and add the text to shapes
setStroke(shapes, graphicState);
shapes.add(graphicState.getStrokeColor());
shapes.add(textSprites);
// restore graphics state
graphicState.setLineWidth(old);
setStroke(shapes, graphicState);
}
/**
* Utility Method for adding a text sprites to the Shapes stack, given the
* specifed rmode.
*
* @param textSprites text to add to shapes stack
* @param textState text state used to build new stroke
* @param shapes shapes stack
* @param rmode write mode
*/
private void drawModeFillStroke(TextSprite textSprites, TextState textState,
Shapes shapes, int rmode) {
// setup textSprite with a strokeColor and the correct rmode
textSprites.setRMode(rmode);
textSprites.setStrokeColor(graphicState.getStrokeColor());
// save the old line width
float old = graphicState.getLineWidth();
// set the line width for the glyph
float lineWidth = graphicState.getLineWidth();
lineWidth /= textState.tmatrix.getScaleX();
graphicState.setLineWidth(lineWidth);
// update the stroke and add the text to shapes
setStroke(shapes, graphicState);
shapes.add(graphicState.getFillColor());
shapes.add(textSprites);
// restore graphics state
graphicState.setLineWidth(old);
setStroke(shapes, graphicState);
}
/**
* Common stroke operations used by S and s. Takes into
* account patternColour and regular old fill colour.
*
* @param shapes current shapes stack
* @param geometricPath current path.
*/
private static void commonStroke(GraphicsState graphicState, Shapes shapes, GeneralPath geometricPath) {
// get current fill alpha and concatenate with overprinting if present
if (graphicState.isOverprintStroking()) {
setAlpha(shapes, graphicState.getAlphaRule(),
commonOverPrintAlpha(graphicState.getStrokeAlpha()));
}
// The knockout effect can only be achieved by changing the alpha
// composite to source. I don't have a test case for this for stroke
// but what we do for stroke is usually what we do for fill...
else if (graphicState.isKnockOut()) {
setAlpha(shapes, AlphaComposite.SRC, graphicState.getStrokeAlpha());
}
// found a PatternColor
if (graphicState.getStrokeColorSpace() instanceof PatternColor) {
// Create a pointer to the pattern colour
PatternColor patternColor = (PatternColor) graphicState.getStrokeColorSpace();
// grab the pattern from the colour
Pattern pattern = patternColor.getPattern();
// Start processing tiling pattern
if (pattern != null &&
pattern.getPatternType() == Pattern.PATTERN_TYPE_TILING) {
// currently not doing any special handling for colour or uncoloured
// paint, as it done when the scn or sc tokens are parsed.
TilingPattern tilingPattern = (TilingPattern) pattern;
// 1.)save the graphics context
graphicState = graphicState.save();
// 2.) install the graphic state
tilingPattern.setParentGraphicState(graphicState);
tilingPattern.init();
// 4.) Restore the saved graphics state
graphicState = graphicState.restore();
// 1x1 tiles don't seem to paint so we'll resort to using the
// first pattern colour or the uncolour.
if ((tilingPattern.getBBox().getWidth() > 1 &&
tilingPattern.getBBox().getHeight() > 1) ){
shapes.add(tilingPattern);
}
else{
// draw partial fill colour
if (tilingPattern.getPaintType() ==
TilingPattern.PAINTING_TYPE_UNCOLORED_TILING_PATTERN) {
shapes.add(tilingPattern.getUnColored());
}else{
shapes.add(tilingPattern.getFirstColor());
}
}
shapes.add(geometricPath);
shapes.addDrawCommand();
} else if (pattern != null &&
pattern.getPatternType() == Pattern.PATTERN_TYPE_SHADING) {
pattern.init();
shapes.add(pattern.getPaint());
shapes.add(geometricPath);
shapes.addDrawCommand();
}
} else {
setAlpha(shapes, graphicState.getAlphaRule(), graphicState.getStrokeAlpha());
shapes.add(graphicState.getStrokeColor());
shapes.add(geometricPath);
shapes.addDrawCommand();
}
// set alpha back to origional value.
if (graphicState.isOverprintStroking()) {
setAlpha(shapes, AlphaComposite.SRC_OVER, graphicState.getFillAlpha());
}
}
/**
* Utility method for fudging overprinting calculation for screen
* representation.
*
* @param alpha alph constant
* @return tweaked over printing alpha
*/
private static float commonOverPrintAlpha(float alpha) {
// if alpha is already present we reduce it and we minimize
// it if it is already lower then our over paint. This an approximation
// only for improved screen representation.
if (alpha != 1.0f && alpha > OVERPAINT_ALPHA) {
alpha -= OVERPAINT_ALPHA;
} else if (alpha < OVERPAINT_ALPHA) {
// alpha = 0.1f;
} else {
alpha = OVERPAINT_ALPHA;
}
return alpha;
}
/**
* Common fill operations used by f, F, F*, b, b*, B, B*. Takes into
* account patternColour and regular old fill colour.
*
* @param shapes current shapes stack
* @param geometricPath current path.
*/
private void commonFill(Shapes shapes, GeneralPath geometricPath) throws NoninvertibleTransformException {
// get current fill alpha and concatenate with overprinting if present
if (graphicState.isOverprintOther()) {
setAlpha(shapes, graphicState.getAlphaRule(),
commonOverPrintAlpha(graphicState.getFillAlpha()));
}
// The knockout effect can only be achieved by changing the alpha
// composite to source.
else if (graphicState.isKnockOut()) {
setAlpha(shapes, AlphaComposite.SRC, graphicState.getFillAlpha());
} else {
setAlpha(shapes, graphicState.getAlphaRule(), graphicState.getFillAlpha());
}
// found a PatternColor
if (graphicState.getFillColorSpace() instanceof PatternColor) {
// Create a pointer to the pattern colour
PatternColor patternColor = (PatternColor) graphicState.getFillColorSpace();
// grab the pattern from the colour
Pattern pattern = patternColor.getPattern();
// Start processing tiling pattern
if (pattern != null &&
pattern.getPatternType() == Pattern.PATTERN_TYPE_TILING) {
// currently not doing any special handling for colour or uncoloured
// paint, as it done when the scn or sc tokens are parsed.
TilingPattern tilingPattern = (TilingPattern) pattern;
// 1.)save the graphics context
graphicState = graphicState.save();
// 2.) install the graphic state
tilingPattern.setParentGraphicState(graphicState);
tilingPattern.init();
// 4.) Restore the saved graphics state
graphicState = graphicState.restore();
// 1x1 tiles don't seem to paint so we'll resort to using the
// first pattern colour or the uncolour.
if ((tilingPattern.getBBox().getWidth() > 1 ||
tilingPattern.getBBox().getHeight() > 1) ){
shapes.add(tilingPattern);
}
else{
// draw partial fill colour
if (tilingPattern.getPaintType() ==
TilingPattern.PAINTING_TYPE_UNCOLORED_TILING_PATTERN) {
shapes.add(tilingPattern.getUnColored());
}else{
shapes.add(tilingPattern.getFirstColor());
}
}
shapes.add(geometricPath);
shapes.addFillCommand();
} else if (pattern != null &&
pattern.getPatternType() == Pattern.PATTERN_TYPE_SHADING) {
pattern.init();
shapes.add(pattern.getPaint());
shapes.add(geometricPath);
shapes.addFillCommand();
}
} else {
shapes.add(graphicState.getFillColor());
shapes.add(geometricPath);
shapes.addFillCommand();
}
// add old alpha back to stack
if (graphicState.isOverprintOther()) {
setAlpha(shapes, graphicState.getAlphaRule(), graphicState.getFillAlpha());
}
}
/**
* Sets the state of the BasicStrok with the latest values from the
* graphicSate instance value:
* graphicState.lineWidth - line width
* graphicState.lineCap - line cap type
* graphicState.lineJoin - line join type
* graphicState.miterLimit - miter limit
*
* @param shapes current Shapes object for the page being parsed
* @param graphicState graphic state used to build this stroke instance.
*/
static void setStroke(Shapes shapes, GraphicsState graphicState) {
shapes.add(new BasicStroke(graphicState.getLineWidth(),
graphicState.getLineCap(),
graphicState.getLineJoin(),
graphicState.getMiterLimit(),
graphicState.getDashArray(),
graphicState.getDashPhase()));
}
/**
* Text scaling must be applied to the main graphic state. It can not
* be applied to the Text Matrix. We only have two test cases for its
* use but it appears that the scaling has to bee applied before a text
* write operand occurs, otherwise a call to Tm seems to break text
* positioning.
* <p/>
* Scalling is special as it can be negative and thus apply a horizontal
* flip on the graphic state.
*
* @param graphicState current graphics state.
*/
private static void applyTextScaling(GraphicsState graphicState) {
// get the current CTM
AffineTransform af = new AffineTransform(graphicState.getCTM());
// the mystery continues, it appears that only the negative or positive
// value of tz is actually used. If the original non 1 number is used the
// layout will be messed up.
graphicState.getTextState().hScalling =
graphicState.getTextState().hScalling >= 0?1:-1;
AffineTransform horizontalScalingTransform =
new AffineTransform(
graphicState.getTextState().hScalling,
0, 0, 1, 0, 0);
af.concatenate(horizontalScalingTransform);
// add the transformation to the graphics state
graphicState.set(af);
}
/**
* Adds a new Alpha Composite object ot the shapes stack.
*
* @param shapes - current shapes vector to add Alpha Composite to
* @param rule - rule to apply to the alphaComposite.
* @param alpha - alpha value, opaque = 1.0f.
*/
static void setAlpha(Shapes shapes, int rule, float alpha) {
// Build the alpha composite object and add it to the shapes
if (alpha == 0){
// zero alpha is actually a cut out effect for the current context
// stream
rule = AlphaComposite.SRC_OUT;
}
AlphaComposite alphaComposite =
AlphaComposite.getInstance(rule,
alpha);
shapes.add(alphaComposite);
}
}