/***************************************************************************** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * ****************************************************************************/ package org.apache.padaf.preflight.font.type1; import static org.apache.padaf.preflight.ValidationConstants.FONT_DICTIONARY_VALUE_ENCODING_MAC; import static org.apache.padaf.preflight.ValidationConstants.FONT_DICTIONARY_VALUE_ENCODING_MAC_EXP; import static org.apache.padaf.preflight.ValidationConstants.FONT_DICTIONARY_VALUE_ENCODING_PDFDOC; import static org.apache.padaf.preflight.ValidationConstants.FONT_DICTIONARY_VALUE_ENCODING_WIN; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.List; import org.apache.commons.io.IOUtils; import org.apache.fontbox.cff.Type1CharStringParser; import org.apache.fontbox.cff.Type1FontUtil; import org.apache.pdfbox.encoding.Encoding; import org.apache.pdfbox.encoding.MacRomanEncoding; import org.apache.pdfbox.encoding.PdfDocEncoding; import org.apache.pdfbox.encoding.StandardEncoding; import org.apache.pdfbox.encoding.WinAnsiEncoding; public class Type1Parser { protected static final char NAME_START = '/'; protected static final String NOTDEF = NAME_START + ".notdef"; protected static final int DEFAULT_LEN_IV = 4; private static final String PS_STANDARD_ENCODING = "StandardEncoding"; private static final String PS_ISOLATIN_ENCODING = "ISOLatin1Encoding"; private static final String TOKEN_ENCODING = "US-ASCII"; /** * The PostScript font stream. */ private PeekInputStream fontProgram = null; /** * The length in bytes of the clear-text portion of the Type1 font program. */ private int clearTextSize = 0; /** * The length in bytes of the eexec encrypted portion of the type1 font * program. */ private int eexecSize = 0; /** * This counter is used to know how many byte have been read. * It is used to read the clear part of the font. this computer is * updated during the parsing of the encoding part too but it is not * used. */ private int numberOfReadBytes = 0; /** * Object which contains information coming from the parsing. */ private Type1 type1Font = null; private Type1Parser(InputStream type1, int length1, int length2, Encoding enc) throws IOException { super(); this.fontProgram = new PeekInputStream(type1); this.clearTextSize = length1; this.eexecSize = length2; // ---- Instantiate the Encoding Map if (enc != null) { this.type1Font = new Type1(enc); } else { this.type1Font = new Type1(new StandardEncoding()); } this.type1Font.addCidWithLabel(-1, NOTDEF); } /** * * @param fontProgram the stream of the font program extracted from the PDF file. * @param clearTextLength the length in bytes of the clear part of the font program. * @param eexecLength the length in bytes of the encoded part. * @return * @throws IOException */ public static Type1Parser createParser( InputStream fontProgram, int clearTextLength, int eexecLength) throws IOException { Encoding encoding = getEncodingObject(""); return createParserWithEncodingObject(fontProgram, clearTextLength, eexecLength, encoding); } /** * * @param fontProgram the stream of the font program extracted from the PDF file. * @param clearTextLength the length in bytes of the clear part of the font program. * @param eexecLength the length in bytes of the encoded part. * @param encodingName The name of encoding which is used by this font program. * @return * @throws IOException */ public static Type1Parser createParserWithEncodingName( InputStream fontProgram, int clearTextLength, int eexecLength, String encodingName) throws IOException { Encoding encoding = getEncodingObject(encodingName); return createParserWithEncodingObject(fontProgram, clearTextLength, eexecLength, encoding); } private static Encoding getEncodingObject(String encodingName) { Encoding encoding = new StandardEncoding(); if (FONT_DICTIONARY_VALUE_ENCODING_MAC.equals(encodingName)) { encoding = new MacRomanEncoding(); } else if (FONT_DICTIONARY_VALUE_ENCODING_MAC_EXP.equals(encodingName)) { encoding = new MacRomanEncoding(); } else if (FONT_DICTIONARY_VALUE_ENCODING_WIN.equals(encodingName)) { encoding = new WinAnsiEncoding(); } else if (FONT_DICTIONARY_VALUE_ENCODING_PDFDOC.equals(encodingName)) { encoding = new PdfDocEncoding(); } return encoding; } /** * * @param fontProgram the stream of the font program extracted from the PDF file. * @param clearTextLength the length in bytes of the clear part of the font program. * @param eexecLength the length in bytes of the encoded part. * @param encodingName The encoding object which is used by this font program. * @return * @throws IOException */ public static Type1Parser createParserWithEncodingObject( InputStream fontProgram, int clearTextLength, int eexecLength, Encoding encoding) throws IOException { return new Type1Parser(fontProgram, clearTextLength, eexecLength, encoding); } public Type1 parse() throws IOException { parseClearPartOfFontProgram(this.fontProgram); decodeAndParseEExecPart(this.fontProgram); return this.type1Font; } private void parseClearPartOfFontProgram(PeekInputStream stream) throws IOException { skipComments(stream); parseFontInformationUntilEncodingPart(stream); } private void decodeAndParseEExecPart(PeekInputStream stream) throws IOException { byte[] eexecPart = readEexec(stream); byte[] decodedEExecPart = decodeEexec(eexecPart); PeekInputStream eexecStream = new PeekInputStream(new ByteArrayInputStream(decodedEExecPart)); parseEExecPart(eexecStream); } private void skipComments(PeekInputStream stream) throws IOException { int nextChar = stream.peek(); while (nextChar == '%') { if (nextChar == -1) { throw new IOException("Unexpected End Of File during a comment parsing"); } readLine(stream); nextChar = stream.peek(); } } private void parseFontInformationUntilEncodingPart(PeekInputStream stream) throws IOException { byte[] token = readToken(stream); while (!isEExecKeyWord(token)) { // add here specific operation to memorize useful information if (isEncodingKeyWord(token)) { parseEncodingDefinition(stream); } token = readToken(stream); } while (!isStartOfEExecReached()) { readNextCharacter(stream); } } private void parseEncodingDefinition(PeekInputStream stream) throws IOException { byte[] token = readToken(stream); String readableToken = new String(token, TOKEN_ENCODING); if (PS_ISOLATIN_ENCODING.equals(readableToken)) { this.type1Font.initEncodingWithISOLatin1Encoding(); } else if (PS_STANDARD_ENCODING.equals(readableToken)) { this.type1Font.initEncodingWithStandardEncoding(); } else { try { Integer.parseInt(readableToken); throwExceptionIfUnexpectedToken("array", readToken(stream)); readEndSetEncodingValues(stream); } catch (NumberFormatException e) { throw new IOException("Invalid encoding : Expected int value before \"array\" " + "key word if the Encoding isn't Standard or ISOLatin"); } } } private void parseEExecPart(PeekInputStream stream) throws IOException { int lenIV = DEFAULT_LEN_IV; byte[] previousToken = new byte[0]; while(!isEndOfStream(stream)) { byte[] token = readToken(stream); if (isLenIVKeyWord(token)) { // lenIV belong to Private Dictionary. // If you create a method to parse PrivateDict, please update this function byte[] l = readToken(stream); lenIV = Integer.parseInt(new String(l, TOKEN_ENCODING)); } else if (isBeginOfBinaryPart(token)) { try { int lengthOfBinaryPart = Integer.parseInt(new String(previousToken, TOKEN_ENCODING)); skipSingleBlankSeparator(stream); stream.read(new byte[lengthOfBinaryPart], 0, lengthOfBinaryPart); token = readToken(stream); // read the end of binary part } catch (NumberFormatException e) { throw new IOException("Binary part found but previous token wasn't an integer"); } } else if (isCharStringKeyWord(token)) { parseCharStringArray(stream, lenIV); } previousToken = token; } } private void parseCharStringArray(PeekInputStream stream, int lenIV) throws IOException { int numberOfElements = readNumberOfCharStrings(stream); goToBeginOfCharStringElements(stream); while (numberOfElements > 0) { readCharStringElement(stream, lenIV); --numberOfElements; } } private void goToBeginOfCharStringElements(PeekInputStream stream) throws IOException { byte[] token = new byte[0]; do { token = readToken(stream); } while(isNotBeginKeyWord(token)); } private void readCharStringElement(PeekInputStream stream, int lenIV) throws IOException { byte[] labelToken = readToken(stream); String label = new String(labelToken, TOKEN_ENCODING); byte[] sizeOfCharStringToken = readToken(stream); int sizeOfCharString = Integer.parseInt(new String(sizeOfCharStringToken,TOKEN_ENCODING)); readToken(stream); // skip "RD" or "-|" token skipSingleBlankSeparator(stream); // "RD" or "-|" are followed by a space byte[] descBinary = new byte[sizeOfCharString]; stream.read(descBinary, 0, sizeOfCharString); byte[] description = Type1FontUtil.charstringDecrypt(descBinary, lenIV); Type1CharStringParser t1p = new Type1CharStringParser(); List<Object> operations = t1p.parse(description); type1Font.addGlyphDescription(label, new GlyphDescription(operations)); readToken(stream); // skip "ND" or "|-" token } private boolean isNotBeginKeyWord(byte[] token) throws IOException { String word = new String(token, TOKEN_ENCODING); return !"begin".equals(word); } private boolean isBeginOfBinaryPart(byte[] token) throws IOException { String word = new String(token, TOKEN_ENCODING); return ("RD".equals(word) || "-|".equals(word)); } private boolean isLenIVKeyWord(byte[] token) throws IOException { String word = new String(token, TOKEN_ENCODING); return "/lenIV".equals(word); } private boolean isCharStringKeyWord(byte[] token) throws IOException { String word = new String(token, TOKEN_ENCODING); return "/CharStrings".equals(word); } private int readNumberOfCharStrings(PeekInputStream stream) throws IOException { byte[] token = readToken(stream); String word = new String(token, TOKEN_ENCODING); try { return Integer.parseInt(word); } catch (NumberFormatException e) { throw new IOException("Number of CharStrings elements is expected."); } } private void throwExceptionIfUnexpectedToken(String expectedValue, byte[] token) throws IOException { String valueToCheck = new String(token, TOKEN_ENCODING); if (!expectedValue.equals(valueToCheck)) { throw new IOException(expectedValue + " was expected but we received " + valueToCheck); } } private void readEndSetEncodingValues(PeekInputStream stream) throws IOException { byte[] token = readToken(stream); boolean lastTokenWasReadOnly = false; while ( !(lastTokenWasReadOnly && isDefKeyWord(token)) ) { if (isDupKeyWord(token)) { byte[] cidToken = readToken(stream); byte[] labelToken = readToken(stream); String cid = new String(cidToken, TOKEN_ENCODING); String label = new String(labelToken, TOKEN_ENCODING); try { this.type1Font.addCidWithLabel(Integer.parseInt(cid), label); } catch (NumberFormatException e) { throw new IOException("Invalid encoding : Expected CID value before \"" + label + "\" label"); } } else { lastTokenWasReadOnly = isReadOnlyKeyWord(token); } token = readToken(stream); } } private byte[] readEexec(PeekInputStream stream) throws IOException { int BUFFER_SIZE = 1024; byte[] buffer = new byte[BUFFER_SIZE]; ByteArrayOutputStream eexecPart = new ByteArrayOutputStream(); int lr = 0; int total = 0; do { lr = stream.read(buffer, 0, BUFFER_SIZE); if (lr == BUFFER_SIZE && (total + BUFFER_SIZE < eexecSize)) { eexecPart.write(buffer, 0, BUFFER_SIZE); total += BUFFER_SIZE; } else if (lr > 0 && (total + lr < eexecSize)) { eexecPart.write(buffer, 0, lr); total += lr; } else if (lr > 0 && (total + lr >= eexecSize)) { eexecPart.write(buffer, 0, eexecSize - total); total += (eexecSize - total); } } while (eexecSize > total && lr > 0); IOUtils.closeQuietly(eexecPart); return eexecPart.toByteArray(); } private byte[] decodeEexec(byte[] eexec) { return Type1FontUtil.eexecDecrypt(eexec); } private byte[] readLine(PeekInputStream stream) throws IOException { ArrayList<Byte> bytes = new ArrayList<Byte>(); int currentCharacter = 0; do { currentCharacter = readNextCharacter(stream); bytes.add((byte)(currentCharacter & 0xFF)); } while ( !('\n' == currentCharacter || '\r' == currentCharacter)) ; if ('\r' == currentCharacter && '\n' == stream.peek()) { currentCharacter = readNextCharacter(stream); bytes.add((byte)(currentCharacter & 0xFF)); } byte[] result = new byte[bytes.size()]; for (int i = 0 ; i < bytes.size(); ++i) { result[i] = bytes.get(i); } return result; } private byte[] readToken(PeekInputStream stream) throws IOException { byte[] token = new byte[0]; skipBlankSeparators(stream); int nextByte = stream.peek(); if (nextByte < 0) { throw new IOException("Unexpected End Of File"); } if (nextByte == '(') { token = readStringLiteral(stream); } else if (nextByte == '[') { token = readArray(stream); } else if (nextByte == '{') { token = readProcedure(stream); } else { token = readNameOrArgument(stream); } return token; } private byte[] readStringLiteral(PeekInputStream stream) throws IOException { int opened = 0; List<Integer> buffer = new ArrayList<Integer>(); int currentByte = 0; do { currentByte = readNextCharacter(stream); if (currentByte < 0) { throw new IOException("Unexpected End Of File"); } if (currentByte == '(') { opened++; } else if (currentByte == ')') { opened--; } buffer.add(currentByte); } while (opened != 0); return convertListOfIntToByteArray(buffer); } private byte[] readArray(PeekInputStream stream) throws IOException { int opened = 0; List<Integer> buffer = new ArrayList<Integer>(); int currentByte = 0; do { currentByte = readNextCharacter(stream); if (currentByte < 0) { throw new IOException("Unexpected End Of File"); } if (currentByte == '[') { opened++; } else if (currentByte == ']') { opened--; } buffer.add(currentByte); } while (opened != 0); return convertListOfIntToByteArray(buffer); } private byte[] readProcedure(PeekInputStream stream) throws IOException { int opened = 0; List<Integer> buffer = new ArrayList<Integer>(); int currentByte = 0; do { currentByte = readNextCharacter(stream); if (currentByte < 0) { throw new IOException("Unexpected End Of File"); } if (currentByte == '{') { opened++; } else if (currentByte == '}') { opened--; } buffer.add(currentByte); } while (opened != 0); return convertListOfIntToByteArray(buffer); } private byte[] readNameOrArgument(PeekInputStream stream) throws IOException { List<Integer> buffer = new ArrayList<Integer>(); int nextByte = 0; do { int currentByte = readNextCharacter(stream); if (currentByte < 0) { throw new IOException("Unexpected End Of File"); } buffer.add(currentByte); nextByte = stream.peek(); } while (isNotBlankSperator(nextByte) && isNotBeginOfName(nextByte) && isNotSeparator(nextByte)); return convertListOfIntToByteArray(buffer); } private boolean isNotBeginOfName(int character) { return ('/' != character); } private boolean isNotSeparator(int character) { return !('{' == character || '}' == character || '[' == character || ']' == character); } private byte[] convertListOfIntToByteArray(List<Integer> input) { byte[] res = new byte[input.size()]; for (int i = 0; i < res.length; ++i) { res[i] = input.get(i).byteValue(); } return res; } private int readNextCharacter(PeekInputStream stream) throws IOException { int currentByte = stream.read(); this.numberOfReadBytes++; return currentByte; } private void skipBlankSeparators(PeekInputStream stream) throws IOException { int nextByte = stream.peek(); while (isBlankSperator(nextByte)) { readNextCharacter(stream); nextByte = stream.peek(); } } private void skipSingleBlankSeparator(PeekInputStream stream) throws IOException { int nextByte = stream.peek(); if(isBlankSperator(nextByte)) { readNextCharacter(stream); } } private boolean isBlankSperator(int character) { return (character == ' ' || character == '\n' || character == '\r'); } private boolean isNotBlankSperator(int character) { return !isBlankSperator(character); } private boolean isEExecKeyWord(byte[] token) throws IOException { String word = new String(token, TOKEN_ENCODING); return "eexec".equals(word); } private boolean isDefKeyWord(byte[] token) throws IOException { String word = new String(token, TOKEN_ENCODING); return "def".equals(word); } private boolean isReadOnlyKeyWord(byte[] token) throws IOException { String word = new String(token, TOKEN_ENCODING); return "readonly".equals(word); } private boolean isEncodingKeyWord(byte[] token) throws IOException { String word = new String(token, TOKEN_ENCODING); return "/Encoding".equals(word); } private boolean isDupKeyWord(byte[] token) throws IOException { String word = new String(token, TOKEN_ENCODING); return "/Encoding".equals(word); } private boolean isStartOfEExecReached() { return (this.numberOfReadBytes == this.clearTextSize); } private boolean isEndOfStream(PeekInputStream stream) { try { skipBlankSeparators(stream); return false; } catch (IOException e) { return true; } } }