/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* $Id$ */ package org.apache.fop.fonts.type1; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map.Entry; import java.util.Scanner; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; public class PostscriptParser { protected static final Log LOG = LogFactory.getLog(PostscriptParser.class); /* Patterns used to identify Postscript elements */ private static final String DICTIONARY = "dict"; private static final String FIXED_ARRAY = "array"; private static final String VARIABLE_ARRAY = "["; private static final String SUBROUTINE = "{"; /* A list of parsed subroutines so if they are encountered during the parsing * phase of another element, they can be read and pattern matched. */ private HashMap<String, PSSubroutine> subroutines = new HashMap<String, PSSubroutine>(); /** * Parses the postscript document and returns a list of elements * @param segment The byte array containing the postscript data * @return A list of found Postscript elements * @throws IOException */ public List<PSElement> parse(byte[] segment) throws IOException { List<PSElement> parsedElements = new ArrayList<PSElement>(); /* Currently only scan and store the top level element. For deeper * Postscript parsing you can push and pop elements from a stack */ PSElement foundElement = null; String operator = null; StringBuilder token = new StringBuilder(); List<String> tokens = new ArrayList<String>(); int startPoint = -1; boolean specialDelimiter = false; boolean lastWasSpecial = false; for (int i = 0; i < segment.length; i++) { byte cur = segment[i]; if (foundElement != null && foundElement.hasMore()) { foundElement.parse(cur, i); continue; } else { char c = (char)cur; if (!lastWasSpecial) { specialDelimiter = (c == '{' || c == '}' || c == '[' || c == ']' || (!token.toString().equals("") && c == '/')); boolean isNotBreak = !(c == ' ' || c == '\r' || cur == 15 || cur == 12 || cur == 10); if (isNotBreak && !specialDelimiter) { token.append(c); continue; } } else { lastWasSpecial = false; token.append(c); if (token.toString().equals("/")) { continue; } } } try { boolean setOp = false; if ((foundElement == null || !foundElement.hasMore()) && token.length() > 1 && token.charAt(0) == '/' && tokens.size() != 1 || hasEndToken(token.toString())) { operator = token.toString(); setOp = true; if (tokens.size() > 2 && tokens.get(tokens.size() - 1).equals("def")) { PSVariable newVar = new PSVariable(tokens.get(0), startPoint); newVar.setValue(tokens.get(1)); newVar.setEndPoint(i - operator.length()); parsedElements.add(newVar); } tokens.clear(); startPoint = i - token.length(); } if (operator != null) { if (foundElement instanceof PSSubroutine) { PSSubroutine sub = (PSSubroutine)foundElement; subroutines.put(sub.getOperator(), sub); parsedElements.add(sub); if (!setOp) { operator = ""; } } else { if (foundElement != null) { if (!hasMatch(foundElement.getOperator(), parsedElements)) { parsedElements.add(foundElement); } else { LOG.warn("Duplicate " + foundElement.getOperator() + " in font file, Ignoring."); } } } //Compare token against patterns and create an element if matched foundElement = createElement(operator, token.toString(), startPoint); } } finally { tokens.add(token.toString()); token = new StringBuilder(); if (specialDelimiter) { specialDelimiter = false; lastWasSpecial = true; //Retrace special postscript character so it can be processed separately i--; } } } return parsedElements; } private boolean hasEndToken(String token) { return token.equals("currentdict"); } private boolean hasMatch(String operator, List<PSElement> elements) { for (PSElement element : elements) { if (element.getOperator().equals(operator)) { return true; } } return false; } public PSElement createElement(String operator, String elementID, int startPoint) { if (operator.equals("")) { return null; } if (elementID.equals(FIXED_ARRAY)) { return new PSFixedArray(operator, startPoint); } else if (elementID.equals(VARIABLE_ARRAY)) { return new PSVariableArray(operator, startPoint); } else if (elementID.equals(SUBROUTINE)) { return new PSSubroutine(operator, startPoint); } else if (!operator.equals("/Private") && elementID.equals(DICTIONARY)) { return new PSDictionary(operator, startPoint); } return null; } /** * A base Postscript element class */ public abstract class PSElement { /* The identifying operator for this element */ protected String operator; private List<Byte> token; /* Determines whether there is any more data to be read whilst parsing */ protected boolean hasMore = true; /* The locations of any entries containing binary data (e.g. arrays) */ protected LinkedHashMap<String, int[]> binaryEntries; /* The tokens parsed from the current element */ protected List<String> tokens; /* Determines whether binary data is currently being read / parsed */ protected boolean readBinary; /* The location of the element within the binary data */ private int startPoint = -1; protected int endPoint = -1; /* A flag to determine if unexpected postscript has been found in the element */ private boolean foundUnexpected; public PSElement(String operator, int startPoint) { this.operator = operator; this.startPoint = startPoint; token = new ArrayList<Byte>(); binaryEntries = new LinkedHashMap<String, int[]>(); tokens = new ArrayList<String>(); } /** * Gets the Postscript element operator * @return The operator returned as a string */ public String getOperator() { return operator; } /** * The start location of the element within the source binary data * @return The start location returned as an integer */ public int getStartPoint() { return startPoint; } /** * The end location of the element within the source binary data * @return The end location returned as an integer */ public int getEndPoint() { return endPoint; } /** * Takes over the task of tokenizing the byte data * @param cur The current byte being read */ public void parse(byte cur, int pos) throws UnsupportedEncodingException { if (!readBinary) { char c = (char)cur; boolean specialDelimiter = (c == '{' || c == '}' || c == '[' || c == ']' || c == '(' || c == ')'); boolean isNotValidBreak = !(c == ' ' || cur == 15 || cur == 12 || c == '\r' || c == 10); if (isNotValidBreak && !specialDelimiter) { token.add(cur); } else { parseToken(pos); } if (specialDelimiter) { token.add(cur); parseToken(pos); } } else { parseByte(cur, pos); } } private void parseToken(int pos) throws UnsupportedEncodingException { byte[] bytesToken = new byte[token.size()]; for (int i = 0; i < token.size(); i++) { bytesToken[i] = token.get(i); } parseToken(new String(bytesToken, "ASCII"), pos); token.clear(); } /** * Passes responsibility for processing the byte stream to the PostScript object * @param cur The byte currently being read * @param pos The position of the given byte */ public abstract void parseByte(byte cur, int pos); /** * Delegates the parse routine to a sub class * @param token The token which to parse */ public abstract void parseToken(String token, int curPos); protected boolean isInteger(String intValue) { try { Integer.parseInt(intValue); return true; } catch (NumberFormatException ex) { return false; } } public LinkedHashMap<String, int[]> getBinaryEntries() { return binaryEntries; } /** * Gets the binary entry location of a given index from the array * @param index The index for which to retrieve the binary data location * @return */ public int[] getBinaryEntryByIndex(int index) { int count = 0; for (Entry<String, int[]> entry : binaryEntries.entrySet()) { if (count == index) { return entry.getValue(); } count++; } return new int[0]; } /** * Determines if more data is still to be parsed for the Postscript element. * @return Returns true if more data exists */ public boolean hasMore() { return hasMore; } /** * Sets a value to be true if an expected entry postscript is found in the element. * An example is where the encoding table may have a series of postscript operators * altering the state of the array. In this case the only option will be to * fully embed the font to avoid incorrect encoding in the resulting subset. * @param foundUnexpected true if unexpected postscript is found. */ protected void setFoundUnexpected(boolean foundUnexpected) { this.foundUnexpected = foundUnexpected; } /** * Returns whether unexpected postscript has been found in the element * @return true if unexpected postscript is found */ public boolean getFoundUnexpected() { return this.foundUnexpected; } } /** * An object representing a Postscript array with a fixed number of entries */ public class PSFixedArray extends PSElement { private String entry = ""; private String token = ""; private boolean finished; protected int binaryLength; /* A list containing each entry and it's contents in the array */ private HashMap<Integer, String> entries; private static final String READ_ONLY = "readonly"; public PSFixedArray(String operator, int startPoint) { super(operator, startPoint); entries = new HashMap<Integer, String>(); } @Override public void parseToken(String token, int curPos) { if (!checkForEnd(token) || token.equals("def")) { hasMore = false; endPoint = curPos; return; } if (token.equals("dup")) { if (entry.startsWith("dup")) { addEntry(entry); } entry = ""; tokens.clear(); } if (!token.equals(READ_ONLY)) { entry += token + " "; } if (!token.trim().equals("")) { tokens.add(token); } if (tokens.size() == 4 && tokens.get(0).equals("dup") && isInteger(tokens.get(2))) { binaryLength = Integer.parseInt(tokens.get(2)); readBinary = true; } } private boolean checkForEnd(String checkToken) { boolean subFound = false; //Check for a subroutine matching that of an array end definition PSSubroutine sub = subroutines.get("/" + checkToken); if (sub != null && sub.getSubroutine().contains("def")) { subFound = true; } if (!finished && (subFound || checkToken.equals("def"))) { finished = true; addEntry(entry); return false; } else { return !finished; } } /** * Gets a map of array entries identified by index * @return Returns the map of array entries */ public HashMap<Integer, String> getEntries() { return entries; } private void addEntry(String entry) { if (!entry.equals("")) { if (entry.indexOf('/') != -1 && entry.charAt(entry.indexOf('/') - 1) != ' ') { entry = entry.replace("/", " /"); } int entryLen; do { entryLen = entry.length(); entry = entry.replace(" ", " "); } while (entry.length() != entryLen); Scanner s = new Scanner(entry).useDelimiter(" "); boolean valid = false; do { s.next(); if (!s.hasNext()) { break; } int id = s.nextInt(); entries.put(id, entry); valid = true; } while (false); if (!valid) { setFoundUnexpected(true); } } } @Override public void parseByte(byte cur, int pos) { if (binaryLength > 0) { token += (char)cur; binaryLength--; } else { if (readBinary) { int bLength = Integer.parseInt(tokens.get(2)); int start = pos - bLength; int end = start + bLength; binaryEntries.put(tokens.get(1), new int[] {start, end}); token = ""; readBinary = false; } else { tokens.add(token); parseToken(token, pos); token = ""; } } } } /** * An object representing a Postscript array with a variable number of entries */ public class PSVariableArray extends PSElement { private int level; private List<String> arrayItems; private String entry = ""; public PSVariableArray(String operator, int startPoint) { super(operator, startPoint); arrayItems = new ArrayList<String>(); } @Override public void parseToken(String token, int curPos) { entry += token + " "; if (level <= 0 && token.length() > 0 && token.charAt(0) == ']') { hasMore = false; endPoint = curPos; return; } /* If the array item is a subroutine, the following keeps track of the current level * of the tokens being parsed so that it can identify the finish */ if (token.equals("{")) { level++; } else if (token.equals("}")) { level--; if (!entry.equals("") && level == 0) { arrayItems.add(entry); entry = ""; } } } /** * Gets a list of found array entries within the variable array * @return Returns the found array elements as a list */ public List<String> getEntries() { return arrayItems; } @Override public void parseByte(byte cur, int pos) { //Not currently used } } /** * An object representing a Postscript subroutine element */ public class PSSubroutine extends PSElement { private int level = 1; private String entry = ""; public PSSubroutine(String operator, int startPoint) { super(operator, startPoint); } @Override public void parseToken(String token, int curPos) { if (level == 0 && token.length() > 0 && (token.equals("def") || token.equals("ifelse") || token.charAt(0) == '}')) { hasMore = false; endPoint = curPos; return; } if (token.equals("{")) { level++; } else if (token.equals("}")) { level--; } entry += token + " "; } /** * Gets the parsed subroutine element as unmodified string * @return The subroutine as a string */ public String getSubroutine() { return entry.trim(); } @Override public void parseByte(byte cur, int pos) { //Not currently used } } /** * An object representing a Postscript dictionary */ public class PSDictionary extends PSElement { /* A list of dictionary entries which they themselves could be variables, * subroutines and arrays, This is currently left as parsed Strings as there is * no need to delve deeper for our current purposes. */ private HashMap<String, String> entries; private String entry = ""; private String token = ""; protected int binaryLength; public PSDictionary(String operator, int startPoint) { super(operator, startPoint); entries = new HashMap<String, String>(); } @Override public void parseToken(String token, int curPos) { if (token.equals("end")) { addEntry(entry); hasMore = false; endPoint = curPos; return; } if (token.startsWith("/")) { if (entry.trim().startsWith("/")) { tokens.clear(); addEntry(entry); } entry = ""; } if (tokens.size() >= 1 || token.startsWith("/")) { tokens.add(token); } entry += token + " "; if (tokens.size() == 3 && tokens.get(0).startsWith("/") && !tokens.get(2).equals("def") && isInteger(tokens.get(1))) { binaryLength = Integer.parseInt(tokens.get(1)); readBinary = true; } } /** * Gets a map of dictionary entries identified by their name * @return Returns the dictionary entries as a map */ public HashMap<String, String> getEntries() { return entries; } private void addEntry(String entry) { Scanner s = new Scanner(entry).useDelimiter(" "); String id = s.next(); entries.put(id, entry); } @Override public void parseByte(byte cur, int pos) { if (binaryLength > 0) { binaryLength--; } else { if (readBinary) { int start = pos - Integer.parseInt(tokens.get(1)); int end = pos; binaryEntries.put(tokens.get(0), new int[] {start, end}); readBinary = false; } else { tokens.add(token); parseToken(token, pos); } } } } /** * An object representing a Postscript variable */ public class PSVariable extends PSElement { /* The value of the parsed Postscript variable. */ private String value = ""; public PSVariable(String operator, int startPoint) { super(operator, startPoint); } @Override public void parseToken(String token, int curPos) { if (token.equals("def")) { hasMore = false; endPoint = curPos; return; } } @Override public void parseByte(byte cur, int pos) { //Not currently used } /** * Sets the value of the Postscript variable value * @param value The value to set */ public void setValue(String value) { this.value = value; } /** * Gets the value of the Postscript variable * @return Returns the value as a String */ public String getValue() { return value; } /** * Sets the end point location of the current Postscript variable. * @param endPoint The end point location as an integer */ public void setEndPoint(int endPoint) { this.endPoint = endPoint; } } }