/* * Copyright 2006-2017 ICEsoft Technologies Canada Corp. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the * License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an "AS * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.icepdf.core.util.content; import org.icepdf.core.pobjects.Page; import org.icepdf.core.pobjects.graphics.GraphicsState; import org.icepdf.core.pobjects.graphics.Shapes; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.util.Stack; /** * ContentParser interface for content streams. * * @since 5.0 */ public interface ContentParser { /** * Gets the shapes parsed by the last run of {@link #parse(byte[][], Page)}. * * @return Shapes associated with the content parser. */ Shapes getShapes(); /** * Gets the stack used by the content parser. Under normal execution the * stack should be empty. If the stack has elements remaining then is * generally means that a content parsing error has taken place. * * @return object stack. */ Stack<Object> getStack(); /** * Gets the graphic state object associated with the parser. Needed by * the Type3 font program. * * @return graphic state of the parsed content stream. */ GraphicsState getGraphicsState(); /** * Sets the external graphics state object associated with Form's and * Tiling Patterns. * * @param graphicState graphic state to pass to parser. */ void setGraphicsState(GraphicsState graphicState); /** * Parse the given stream bytes. * * @param streamBytes bytes that make of one or more content streams. * @return an instance of this content parser. * @throws InterruptedException thread was interrupted. * @throws IOException io exception during the pars. */ ContentParser parse(byte[][] streamBytes, Page page) throws InterruptedException, IOException; /** * Optimized text parsing call which will ignore any instructions that * are not related to text extraction. Images and other operands are * ignored speeding up the extraction process. * * @param source byte source to parse. * @return Shapes object which contains the extract PageText object. * @throws UnsupportedEncodingException encoding error. */ Shapes parseTextBlocks(byte[][] source) throws UnsupportedEncodingException, InterruptedException; /** * Sets the scale factor used by some graphic state parameters so that the * to users space CTM scale factor can be applied. In particular some * Type3 glyphs need to take into account this scaling factor. * * @param scale scale factor to apply to various graphic state parameters. */ void setGlyph2UserSpaceScale(float scale); }