Type1Parser.java example

Explorer
with-aes-master
/*****************************************************************************
 * 
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 * 
 ****************************************************************************/

package org.apache.padaf.preflight.font.type1;

import static org.apache.padaf.preflight.ValidationConstants.FONT_DICTIONARY_VALUE_ENCODING_MAC;
import static org.apache.padaf.preflight.ValidationConstants.FONT_DICTIONARY_VALUE_ENCODING_MAC_EXP;
import static org.apache.padaf.preflight.ValidationConstants.FONT_DICTIONARY_VALUE_ENCODING_PDFDOC;
import static org.apache.padaf.preflight.ValidationConstants.FONT_DICTIONARY_VALUE_ENCODING_WIN;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.io.IOUtils;
import org.apache.fontbox.cff.Type1CharStringParser;
import org.apache.fontbox.cff.Type1FontUtil;
import org.apache.pdfbox.encoding.Encoding;
import org.apache.pdfbox.encoding.MacRomanEncoding;
import org.apache.pdfbox.encoding.PdfDocEncoding;
import org.apache.pdfbox.encoding.StandardEncoding;
import org.apache.pdfbox.encoding.WinAnsiEncoding;

public class Type1Parser {
	protected static final char NAME_START = '/';
	protected static final String NOTDEF = NAME_START + ".notdef";
	protected static final int DEFAULT_LEN_IV = 4;
	
	private static final String PS_STANDARD_ENCODING = "StandardEncoding";
	private static final String PS_ISOLATIN_ENCODING = "ISOLatin1Encoding";

	private static final String TOKEN_ENCODING = "US-ASCII";

	/**
	 * The PostScript font stream.
	 */
	private PeekInputStream fontProgram = null;
	/**
	 * The length in bytes of the clear-text portion of the Type1 font program.
	 */
	private int clearTextSize = 0;
	/**
	 * The length in bytes of the eexec encrypted portion of the type1 font
	 * program.
	 */
	private int eexecSize = 0;

	/**
	 * This counter is used to know how many byte have been read.
	 * It is used to read the clear part of the font. this computer is
	 * updated during the parsing of the encoding part too but it is not 
	 * used.
	 */
	private int numberOfReadBytes = 0;
	
	/**
	 * Object which contains information coming from the parsing.
	 */
	private Type1 type1Font = null;
	
	private Type1Parser(InputStream type1, int length1, int length2, Encoding enc) throws IOException {
		super();
		this.fontProgram = new PeekInputStream(type1);
		this.clearTextSize = length1;
		this.eexecSize = length2;
		// ---- Instantiate the Encoding Map
		if (enc != null) {
			this.type1Font = new Type1(enc);
		} else {
			this.type1Font = new Type1(new StandardEncoding());
		}
		this.type1Font.addCidWithLabel(-1, NOTDEF);
	}

	/**
	 * 
	 * @param fontProgram the stream of the font program extracted from the PDF file.
	 * @param clearTextLength the length in bytes of the clear part of the font program.
	 * @param eexecLength the length in bytes of the encoded part.
	 * @return
	 * @throws IOException
	 */
	public static Type1Parser createParser(
			InputStream fontProgram, 
			int clearTextLength, 
			int eexecLength) throws IOException {

		Encoding encoding = getEncodingObject("");
		return createParserWithEncodingObject(fontProgram, clearTextLength, eexecLength, encoding);		
	}

	/**
	 * 
	 * @param fontProgram the stream of the font program extracted from the PDF file.
	 * @param clearTextLength the length in bytes of the clear part of the font program.
	 * @param eexecLength the length in bytes of the encoded part.
	 * @param encodingName The name of encoding which is used by this font program.
	 * @return
	 * @throws IOException
	 */
	public static Type1Parser createParserWithEncodingName(
			InputStream fontProgram, 
			int clearTextLength, 
			int eexecLength, 
			String encodingName) throws IOException {

		Encoding encoding = getEncodingObject(encodingName);
		return createParserWithEncodingObject(fontProgram, clearTextLength, eexecLength, encoding);
	}

	private static Encoding getEncodingObject(String encodingName) {
		Encoding encoding = new StandardEncoding();
		if (FONT_DICTIONARY_VALUE_ENCODING_MAC.equals(encodingName)) {
			encoding = new MacRomanEncoding();
		} else if (FONT_DICTIONARY_VALUE_ENCODING_MAC_EXP.equals(encodingName)) {
			encoding = new MacRomanEncoding();
		} else if (FONT_DICTIONARY_VALUE_ENCODING_WIN.equals(encodingName)) {
			encoding = new WinAnsiEncoding();
		} else if (FONT_DICTIONARY_VALUE_ENCODING_PDFDOC.equals(encodingName)) {
			encoding = new PdfDocEncoding();
		}
		return encoding;
	}

	/**
	 * 
	 * @param fontProgram the stream of the font program extracted from the PDF file.
	 * @param clearTextLength the length in bytes of the clear part of the font program.
	 * @param eexecLength the length in bytes of the encoded part.
	 * @param encodingName The encoding object which is used by this font program.
	 * @return
	 * @throws IOException
	 */
	public static Type1Parser createParserWithEncodingObject(
			InputStream fontProgram, 
			int clearTextLength, 
			int eexecLength, 
			Encoding encoding)  throws IOException {

		return new Type1Parser(fontProgram, clearTextLength, eexecLength, encoding);
	}

	public Type1 parse() throws IOException {
		parseClearPartOfFontProgram(this.fontProgram);
		decodeAndParseEExecPart(this.fontProgram);
		return this.type1Font;
	}

	private void parseClearPartOfFontProgram(PeekInputStream stream) throws IOException {
		skipComments(stream);
		parseFontInformationUntilEncodingPart(stream);
	}

	private void decodeAndParseEExecPart(PeekInputStream stream) throws IOException {
		byte[] eexecPart = readEexec(stream);
		byte[] decodedEExecPart = decodeEexec(eexecPart);
		PeekInputStream eexecStream = new PeekInputStream(new ByteArrayInputStream(decodedEExecPart));
		parseEExecPart(eexecStream);
	}

	private void skipComments(PeekInputStream stream) throws IOException {
		int nextChar = stream.peek();
		while (nextChar == '%') {
			if (nextChar == -1) {
				throw new IOException("Unexpected End Of File during a comment parsing");
			}
			readLine(stream);
			nextChar = stream.peek();
		}
	}

	private void parseFontInformationUntilEncodingPart(PeekInputStream stream) throws IOException {
		byte[] token = readToken(stream);
		while (!isEExecKeyWord(token)) {
			// add here specific operation to memorize useful information
			if (isEncodingKeyWord(token)) {
				parseEncodingDefinition(stream);
			}
			token = readToken(stream);
		}

		while (!isStartOfEExecReached()) {
			readNextCharacter(stream);
		}
	}

	private void parseEncodingDefinition(PeekInputStream stream) throws IOException {
		byte[] token = readToken(stream);
		String readableToken = new String(token, TOKEN_ENCODING);
		if (PS_ISOLATIN_ENCODING.equals(readableToken)) {
			this.type1Font.initEncodingWithISOLatin1Encoding();
		} else if (PS_STANDARD_ENCODING.equals(readableToken)) {
			this.type1Font.initEncodingWithStandardEncoding();
		} else {
			try {
				Integer.parseInt(readableToken);
				throwExceptionIfUnexpectedToken("array", readToken(stream));
				readEndSetEncodingValues(stream);
			} catch (NumberFormatException e) {
				throw new IOException("Invalid encoding : Expected int value before \"array\" " 
						+ "key word if the Encoding isn't Standard or ISOLatin");
			}
		}
	}

	private void parseEExecPart(PeekInputStream stream) throws IOException {
		int lenIV = DEFAULT_LEN_IV;
		byte[] previousToken = new byte[0];
		while(!isEndOfStream(stream)) {
			byte[] token = readToken(stream);
			if (isLenIVKeyWord(token)) {
				// lenIV belong to Private Dictionary. 
				// If you create a method to parse PrivateDict, please update this function
				byte[] l = readToken(stream);
				lenIV = Integer.parseInt(new String(l, TOKEN_ENCODING));
			} else if (isBeginOfBinaryPart(token)) {
				try {
					int lengthOfBinaryPart = Integer.parseInt(new String(previousToken, TOKEN_ENCODING));
					skipSingleBlankSeparator(stream);
					stream.read(new byte[lengthOfBinaryPart], 0, lengthOfBinaryPart);
					token = readToken(stream); // read the end of binary part
				} catch (NumberFormatException e) {
					throw new IOException("Binary part found but previous token wasn't an integer");
				}
			} else if (isCharStringKeyWord(token)) {
				parseCharStringArray(stream, lenIV);
			}
			previousToken = token;
		}
	}

	private void parseCharStringArray(PeekInputStream stream, int lenIV) throws IOException {
		int numberOfElements = readNumberOfCharStrings(stream);
		goToBeginOfCharStringElements(stream);
		
		while (numberOfElements > 0) {
			readCharStringElement(stream, lenIV);
			--numberOfElements;
		}
	}

	private void goToBeginOfCharStringElements(PeekInputStream stream) throws IOException {
		byte[] token = new byte[0];
		do {
			token = readToken(stream);
		} while(isNotBeginKeyWord(token));
	}

	private void readCharStringElement(PeekInputStream stream, int lenIV) throws IOException {
		byte[] labelToken = readToken(stream);
		String label = new String(labelToken, TOKEN_ENCODING);

		byte[] sizeOfCharStringToken = readToken(stream);
		int sizeOfCharString = Integer.parseInt(new String(sizeOfCharStringToken,TOKEN_ENCODING));

		readToken(stream); // skip "RD" or "-|" token
		skipSingleBlankSeparator(stream); // "RD" or "-|" are followed by a space

		byte[] descBinary = new byte[sizeOfCharString];
		stream.read(descBinary, 0, sizeOfCharString);
		byte[] description = Type1FontUtil.charstringDecrypt(descBinary, lenIV);
		Type1CharStringParser t1p = new Type1CharStringParser();
		List<Object> operations = t1p.parse(description);
		type1Font.addGlyphDescription(label, new GlyphDescription(operations));

		readToken(stream); // skip "ND" or "|-" token
	}
	
	private boolean isNotBeginKeyWord(byte[] token) throws IOException {
		String word = new String(token, TOKEN_ENCODING);
		return !"begin".equals(word);		
	}

	private boolean isBeginOfBinaryPart(byte[] token) throws IOException {
		String word = new String(token, TOKEN_ENCODING);
		return ("RD".equals(word) || "-|".equals(word));
	}

	private boolean isLenIVKeyWord(byte[] token) throws IOException {
		String word = new String(token, TOKEN_ENCODING);
		return "/lenIV".equals(word);
	}

	private boolean isCharStringKeyWord(byte[] token) throws IOException {
		String word = new String(token, TOKEN_ENCODING);
		return "/CharStrings".equals(word);
	}

	private int readNumberOfCharStrings(PeekInputStream stream) throws IOException {
		byte[] token = readToken(stream);
		String word = new String(token, TOKEN_ENCODING);
		try {
			return Integer.parseInt(word);
		} catch (NumberFormatException e) {
			throw new IOException("Number of CharStrings elements is expected.");
		}
	}
	
	private void throwExceptionIfUnexpectedToken(String expectedValue, byte[] token) throws IOException {
		String valueToCheck = new String(token, TOKEN_ENCODING);
		if (!expectedValue.equals(valueToCheck)) {
			throw new IOException(expectedValue + " was expected but we received " + valueToCheck);
		}
	}

	private void readEndSetEncodingValues(PeekInputStream stream) throws IOException {
		byte[] token = readToken(stream);
		boolean lastTokenWasReadOnly = false;
		while ( !(lastTokenWasReadOnly && isDefKeyWord(token)) ) {
			if (isDupKeyWord(token)) {
				byte[] cidToken = readToken(stream);
				byte[] labelToken = readToken(stream);
				String cid = new String(cidToken, TOKEN_ENCODING);
				String label = new String(labelToken, TOKEN_ENCODING);
				try {	
					this.type1Font.addCidWithLabel(Integer.parseInt(cid), label);
				} catch (NumberFormatException e) {
					throw new IOException("Invalid encoding : Expected CID value before \"" + label + "\" label");
				}
			} else {
				lastTokenWasReadOnly = isReadOnlyKeyWord(token);
			}
			token = readToken(stream);
		}
	}

	private byte[] readEexec(PeekInputStream stream) throws IOException {
		int BUFFER_SIZE = 1024;
		byte[] buffer = new byte[BUFFER_SIZE];
		ByteArrayOutputStream eexecPart = new ByteArrayOutputStream();
		int lr = 0;
		int total = 0;
		do {
			lr = stream.read(buffer, 0, BUFFER_SIZE);
			if (lr == BUFFER_SIZE && (total + BUFFER_SIZE < eexecSize)) {
				eexecPart.write(buffer, 0, BUFFER_SIZE);
				total += BUFFER_SIZE;
			} else if (lr > 0 && (total + lr < eexecSize)) {
				eexecPart.write(buffer, 0, lr);
				total += lr;
			} else if (lr > 0 && (total + lr >= eexecSize)) {
				eexecPart.write(buffer, 0, eexecSize - total);
				total += (eexecSize - total);
			}
		} while (eexecSize > total && lr > 0);
		IOUtils.closeQuietly(eexecPart);
		return eexecPart.toByteArray();
	}

	private byte[] decodeEexec(byte[] eexec) {
		return Type1FontUtil.eexecDecrypt(eexec);
	}

	private byte[] readLine(PeekInputStream stream) throws IOException {
		ArrayList<Byte> bytes = new ArrayList<Byte>();
		int currentCharacter = 0;

		do {
			currentCharacter = readNextCharacter(stream);
			bytes.add((byte)(currentCharacter & 0xFF));
		} while ( !('\n' == currentCharacter || '\r' == currentCharacter)) ;

		if ('\r' == currentCharacter && '\n' == stream.peek()) {
			currentCharacter = readNextCharacter(stream);
			bytes.add((byte)(currentCharacter & 0xFF));
		}

		byte[] result = new byte[bytes.size()];
		for (int i = 0 ; i < bytes.size(); ++i) {
			result[i] = bytes.get(i);
		}
		return result;
	}

	private byte[] readToken(PeekInputStream stream) throws IOException {
		byte[] token = new byte[0];
		skipBlankSeparators(stream);

		int nextByte = stream.peek();
		if (nextByte < 0) {
			throw new IOException("Unexpected End Of File");
		} 

		if (nextByte == '(') {
			token = readStringLiteral(stream);
		} else if (nextByte == '[') {
			token = readArray(stream);
		} else if (nextByte == '{') {
			token = readProcedure(stream);
		} else {
			token = readNameOrArgument(stream); 
		}

		return token;
	}
	
	private byte[] readStringLiteral(PeekInputStream stream) throws IOException {
		int opened = 0;
		List<Integer> buffer = new ArrayList<Integer>();
		
		int currentByte = 0;
		do {
			currentByte = readNextCharacter(stream);
			if (currentByte < 0) {
				throw new IOException("Unexpected End Of File");
			}

			if (currentByte == '(') {
				opened++;
			} else if (currentByte == ')') {
				opened--;
			}

			buffer.add(currentByte);
		} while (opened != 0);

		return convertListOfIntToByteArray(buffer);
	}

	private byte[] readArray(PeekInputStream stream) throws IOException {
		int opened = 0;
		List<Integer> buffer = new ArrayList<Integer>();
		
		int currentByte = 0;
		do {
			currentByte = readNextCharacter(stream);
			if (currentByte < 0) {
				throw new IOException("Unexpected End Of File");
			}

			if (currentByte == '[') {
				opened++;
			} else if (currentByte == ']') {
				opened--;
			}

			buffer.add(currentByte);
		} while (opened != 0);

		return convertListOfIntToByteArray(buffer);
	}

	private byte[] readProcedure(PeekInputStream stream) throws IOException {
		int opened = 0;
		List<Integer> buffer = new ArrayList<Integer>();
		
		int currentByte = 0;
		do {
			currentByte = readNextCharacter(stream);
			if (currentByte < 0) {
				throw new IOException("Unexpected End Of File");
			}

			if (currentByte == '{') {
				opened++;
			} else if (currentByte == '}') {
				opened--;
			}

			buffer.add(currentByte);
		} while (opened != 0);

		return convertListOfIntToByteArray(buffer);
	}
	
	private byte[] readNameOrArgument(PeekInputStream stream) throws IOException {
		List<Integer> buffer = new ArrayList<Integer>();
		int nextByte = 0;
		do {
			int currentByte = readNextCharacter(stream);
			if (currentByte < 0) {
				throw new IOException("Unexpected End Of File");
			}
			buffer.add(currentByte);
			nextByte = stream.peek();
		} while (isNotBlankSperator(nextByte) && isNotBeginOfName(nextByte) && isNotSeparator(nextByte));

		return convertListOfIntToByteArray(buffer);
	}
	
	private boolean isNotBeginOfName(int character) {
		return ('/' != character);
	}
	
	private boolean isNotSeparator(int character) {
		return !('{' == character || '}' == character || '[' == character || ']' == character);
	}
	
	private byte[] convertListOfIntToByteArray(List<Integer> input) {
		byte[] res = new byte[input.size()];
		for (int i = 0; i < res.length; ++i) {
			res[i] = input.get(i).byteValue();
		}
		return res;
	}

	private int readNextCharacter(PeekInputStream stream) throws IOException {
		int currentByte = stream.read();
		this.numberOfReadBytes++;
		return currentByte;
	}

	private void skipBlankSeparators(PeekInputStream stream) throws IOException {
		int nextByte = stream.peek();
		while (isBlankSperator(nextByte)) {
			readNextCharacter(stream);
			nextByte = stream.peek();
		}
	}

	private void skipSingleBlankSeparator(PeekInputStream stream) throws IOException {
		int nextByte = stream.peek();
		if(isBlankSperator(nextByte)) {
			readNextCharacter(stream);
		}
	}
	private boolean isBlankSperator(int character) {
		return (character == ' ' || character == '\n' || character == '\r');
	}

	private boolean isNotBlankSperator(int character) {
		return !isBlankSperator(character);
	}
	
	private boolean isEExecKeyWord(byte[] token) throws IOException {
		String word = new String(token, TOKEN_ENCODING);
		return "eexec".equals(word);
	}

	private boolean isDefKeyWord(byte[] token) throws IOException {
		String word = new String(token, TOKEN_ENCODING);
		return "def".equals(word);
	}
	
	private boolean isReadOnlyKeyWord(byte[] token) throws IOException {
		String word = new String(token, TOKEN_ENCODING);
		return "readonly".equals(word);
	}
	
	private boolean isEncodingKeyWord(byte[] token) throws IOException {
		String word = new String(token, TOKEN_ENCODING);
		return "/Encoding".equals(word);
	}

	private boolean isDupKeyWord(byte[] token) throws IOException {
		String word = new String(token, TOKEN_ENCODING);
		return "/Encoding".equals(word);
	}

	private boolean isStartOfEExecReached() {
		return (this.numberOfReadBytes == this.clearTextSize);
	}
	
	private boolean isEndOfStream(PeekInputStream stream) {
		try {
			skipBlankSeparators(stream);
			return false;
		} catch (IOException e) {
			return true;
		}
	}
}