/* * Copyright 2006-2017 ICEsoft Technologies Canada Corp. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the * License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an "AS * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.icepdf.core.pobjects.fonts.ofont; import org.icepdf.core.io.SeekableInput; import org.icepdf.core.pobjects.Dictionary; import org.icepdf.core.pobjects.Name; import org.icepdf.core.pobjects.Stream; import org.icepdf.core.pobjects.StringObject; import org.icepdf.core.util.Library; import org.icepdf.core.util.Parser; import org.icepdf.core.util.Utils; import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; /** * The purpose of the class is to parse a CMap file. A CMap specifies the * mapping from character codes to character selectors. A CMap file defines * the relationship between a character code and the character description * <br> * Character selectors are always CIDs in a CIDFont. A CMap serves a function * analogous to the Encoding dictionary for a simple font. The CMap does not * refer directly to a specific CIDFont; instead, it is combined with it as part * of a CIDkeyed font, represented in PDF as a Type 0 font dictionary. Within * the CMap, the character mappings refer to the associated CIDFont by font * number, which in PDF is always 0. * * @since 1.0 */ class CMap extends Dictionary implements org.icepdf.core.pobjects.fonts.CMap { private static final Logger logger = Logger.getLogger(CMap.class.toString()); /** * Dictionary containing entries that define the character collection for * the CIDFont or CIDFonts associate with the CMap. Specifically the * character collections registry, ordering and supplement is defined. */ private HashMap cIdSystemInfo; /** * PostScript name of the CMap. */ private String cMapName; /** * defines changes to the internal organization of CMap files or the * semantics of CMap operators. The CMapType of CMaps described in * this document. * cMapType = 2 - indicates a ToUnicode cmap * cMapType = 1 - indicates a CMap object * cMapType = 0 - not sure yet, maybe CMap with external CMap reference */ private float cMapType; /** * The name of a predefined CMap, or a stream containing a CMap, that * is to be used as the base for this CMap. This allows the CMap to * be defined differentially, specifying only the character mappings * that differ from the base CMap. */ private Object useCMap; /** * The WMode dictionary entry controls whether the CID-keyed font writes * horizontally or vertically. It indicates which set of metrics will be * used when a base font is shown. An entry of 0 defines horizontal * writing from left to right; an entry of 1 defines vertical writing * from top to bottom. */ private int wMode; /** * Defines the source character code range. Source CMap references must * be in this range. */ private int[][] codeSpaceRange; // determine if cmap is using one or two byte character maps. private boolean oneByte; /** * Defines mappings from character codes to Unicode characters in the * associated font. Expressed in UTF-16BE encoding. */ private HashMap<Integer, char[]> bfChars; /** * Defines mappings from character codes to Unicode character ranges. * Expressed in UTF-16BE encoding. */ private List<CMapRange> bfRange; /** * Define mappings of individual input character codes to CIDS in the * associated CIDFont. */ private HashMap cIdChars; /** * Similar to cIdChars but defines ranges of input codes. */ private HashMap cIdRange; /** * Define mappings if the normal mapping produces a CID for which no glyph * in the associated CIDFont */ private HashMap notDefChars; /** * Similar to notDefChars but defines ranges of input codes. */ private HashMap notDefRange; /** * Stream containing the embbeded CMap */ private Stream cMapStream; private InputStream cMapInputStream; /** * Create a new CMap instance. If the CMap is created from a named object * the dictionary property will be populated with values for the keys * Type, CMapName and CIDSystemInfo which are also repeated in the CMap * file itself. If the CMap file was created from a Font object then they * previously mentioned keys values must be parsed from the CMap file. * * @param library pointer to default library containing all document objects * @param entries HashMap containing all of the dictionary properties associated * with this object. The HashMap will be empty if this object * was created via a Font objects ToUnicode key. * @param cMapStream stream containing CMap data. */ public CMap(Library library, HashMap entries, Stream cMapStream) { super(library, entries); this.cMapStream = cMapStream; } public CMap(Library l, HashMap h, InputStream cMapInputStream) { super(l, h); this.cMapInputStream = cMapInputStream; } public boolean isOneByte() { return oneByte; } public boolean isTwoByte() { return !oneByte; } public boolean isMixedByte() { return false; } public boolean isEmptyMapping() { return false; } /** * Start the parsing of the CMap file. Once completed, all necessary data * should be captured from the CMap file. * <br> * Simple CMap * /CIDInit /ProcSet findresource * begin * 12 dict begin * begincmap * /CIDSystemInfo << * /Registry (Adobe) * /Ordering (UCS) * /Supplement 0 * >> def * /CMapName /Adobe-Identity-UCS def * /CMapType 2 def * 1 begincodespacerange * <00> <FF> * endcodespacerange * 7 beginbfchar * <01> <0054> * <02> <0065> * <03> <0073> * <04> <0074> * <05> <0069> * <06> <006E> * <07> <0067> * endbfchar * 2 beginbfrange * <0000> <005E> <0020> * <005F> <0061>[<00660066> <0066069> <00660066006C>] * endbfrange * endcmap * CMapName currentdict /CMap defineresource pop * end * end */ public void init() { try { // get the byes and push them through the parser to get objects in CMap if (cMapInputStream == null) { cMapInputStream = cMapStream.getDecodedByteArrayInputStream(); } // Print CMap ASCII if (logger.isLoggable(Level.FINER)) { String content; if (cMapInputStream instanceof SeekableInput) { content = Utils.getContentFromSeekableInput((SeekableInput) cMapInputStream, false); } else { InputStream[] inArray = new InputStream[]{cMapInputStream}; content = Utils.getContentAndReplaceInputStream(inArray, false); cMapInputStream = inArray[0]; } logger.finer("<------------------------ CMap"); logger.finer(content); logger.finer("CMap ------------------------> "); } Parser parser = new Parser(cMapInputStream); /** * Start gathering the data from the CMap objects, the CMap file * is fixed in format so this routine doesn't have to be to * complicated */ Object previousToken = null; while (true) { Object token = parser.getStreamObject(); // break out and the end of the stream if (token == null) { break; } // find cIdSystemInfo, not always a named attribute String nameString = token.toString(); if (nameString.toLowerCase().indexOf("cidsysteminfo") >= 0) { // CIDSystemInfo only has one property which should be // always be hash by definition and our parser result token = parser.getStreamObject(); if (token instanceof HashMap) { cIdSystemInfo = (HashMap) token; // always followed by a def token; token = parser.getStreamObject(); } // ignore any other format that isn't a hash } // find main CMap descriptors if (token instanceof Name) { nameString = token.toString(); // find cMapName if (nameString.toLowerCase().indexOf("cmapname") >= 0) { // cmapname will always be a Name object token = parser.getStreamObject(); cMapName = token.toString(); // always followed by a def token; token = parser.getStreamObject(); } // find cMapType if (nameString.toLowerCase().indexOf("cmaptype") >= 0) { // cmapname will always be a float token = parser.getStreamObject(); cMapType = Float.parseFloat(token.toString()); // always followed by a def token; token = parser.getStreamObject(); } // find UseMap if (nameString.toLowerCase().indexOf("usemap") >= 0) { // nothing for now } } // record the actual CMap mappings if (token instanceof String) { String stringToken = (String) token; // find codeSpaceRange if (stringToken.equalsIgnoreCase("begincodespacerange")) { // before begincodespacerange, the number of ranges is defined int numberOfRanges = (int) Float.parseFloat(previousToken.toString()); // a range will always have two hex numbers codeSpaceRange = new int[numberOfRanges][2]; for (int i = 0; i < numberOfRanges; i++) { // low end of range token = parser.getStreamObject(); StringObject hexToken = (StringObject) token; int startRange = hexToken.getUnsignedInt(0, hexToken.getLength()); // high end of range token = parser.getStreamObject(); hexToken = (StringObject) token; int length = hexToken.getLength(); int endRange = hexToken.getUnsignedInt(0, length); codeSpaceRange[i][0] = startRange; codeSpaceRange[i][1] = endRange; if (length == 2) { oneByte = true; } } } // find bfChars if (stringToken.equalsIgnoreCase("beginbfchar")) { // before beginbfchar, the number of ranges is defined int numberOfbfChar = (int) Float.parseFloat(previousToken.toString()); // there can be multiple char maps so we don't want to override previous values. if (bfChars == null) { bfChars = new HashMap<Integer, char[]>(numberOfbfChar); } // a range will always have two hex numbers for (int i = 0; i < numberOfbfChar; i++) { // cid value token = parser.getStreamObject(); StringObject hexToken = (StringObject) token; Integer key = hexToken.getUnsignedInt(0, hexToken.getLength()); // cid mapping value token = parser.getStreamObject(); hexToken = (StringObject) token; char[] value = null; try { value = convertToString(hexToken.getLiteralStringBuffer()); } catch (NumberFormatException e) { logger.log(Level.FINE, "CMAP: ", e); } bfChars.put(key, value); } } // find bfRange if (stringToken.equalsIgnoreCase("beginbfrange")) { int numberOfbfRanges = (int) Float.parseFloat(previousToken.toString()); if (bfRange == null) { bfRange = new ArrayList<CMapRange>(numberOfbfRanges); } StringObject hexToken; Integer startRange; Integer endRange; // work through each range for (int i = 0; i < numberOfbfRanges; i++) { // look for start range. token = parser.getStreamObject(); if (token instanceof StringObject) { hexToken = (StringObject) token; startRange = hexToken.getUnsignedInt(0, hexToken.getLength()); } else { // likely a malformed cmap break; } // end range token = parser.getStreamObject(); if (token instanceof StringObject) { hexToken = (StringObject) token; endRange = hexToken.getUnsignedInt(0, hexToken.getLength()); } else { // likely a malformed cmap break; } // the next token will be vector or another Integer token = parser.getStreamObject(); if (token instanceof List) { bfRange.add(new CMapRange(startRange, endRange, (List) token)); } else { hexToken = (StringObject) token; Integer offset = hexToken.getUnsignedInt(0, hexToken.getLength()); bfRange.add(new CMapRange(startRange, endRange, offset)); } } } /** * CID mappings still need to be implemented but I have * no examples of yet to check. The CID mappings are little * bit different then the bf ranges. */ // find cIdChars if (stringToken.equalsIgnoreCase("begincidchar")) { } // find cIdRange if (stringToken.equalsIgnoreCase("begincidrange")) { } // find notDefChars if (stringToken.equalsIgnoreCase("beginnotdefchar")) { } // find notDefRange if (stringToken.equalsIgnoreCase("beginnotdefrange")) { } } previousToken = token; } } catch (UnsupportedEncodingException e) { logger.log(Level.SEVERE, "CMap parsing error", e); } catch (IOException e) { // eat it, end of file stream } finally { if (cMapInputStream != null) { try { cMapInputStream.close(); } catch (IOException e) { logger.log(Level.FINE, "Error clossing cmap stream", e); } } } } public String toUnicode(char ch) { // check bfChar if (bfChars != null) { char[] tmp = bfChars.get((int) ch); if (tmp != null) { return String.valueOf(tmp); } } // check bfRange for matches, there may be many ranges to check if (bfRange != null) { for (CMapRange aBfRange : bfRange) { if (aBfRange.inRange(ch)) { return String.valueOf(aBfRange.getCMapValue(ch)); } } } return String.valueOf(ch); } /** * The method is called when ever a character code is incounter that has a * FontDescriptor that defines a ToUnicode CMap. The <code>charMap</code> * is mapped according to the CMap rules and a mapped character code is * returned. * * @param charMap value to map against the ToUnicode CMap * @return mapped character value. */ public char toSelector(char charMap) { // print out a mapping for a particular character // if (charMap == 42){ // System.out.println("mapping " + (int)charMap + " " + bfChars); // System.out.println(cIdSystemInfo); // System.out.println(cMapType); // } // for ToUnicode we only need to look at bfChar and bfRange. // bfChar values have a higher precedent then bfRange. // check bfChar if (bfChars != null) { char[] tmp = bfChars.get((int) charMap); if (tmp != null) { return tmp[0]; } } // check bfRange for matches, there may be many ranges to check if (bfRange != null) { for (CMapRange aBfRange : bfRange) { if (aBfRange.inRange(charMap)) { return aBfRange.getCMapValue(charMap)[0]; } } } return charMap; } public char toSelector(char charMap, boolean isCFF) { return toSelector(charMap); } /** * Help class to store data for a CMap bfrange value. CMap bfranges come * in two flavours but there both share a start and end range value. * Characters that fall in this range are mapped with wither the offset * value or to an offset vector. * <br> * Basic offset Mapping * <0000> <005E> <0020> - values that are between <0000> and <005E> are * offset by <0020> ie <0001> maps to <0021>, <004f> maps to <006f> and * <0006F> would not be mapped by this range. * <br> * Vector offset Mapping * <005F> <0061>[<00660066> <0066069> <00660066006C>] - values that are * between <005f> and <0067> are mapped directly to an offset index in the * array. ie <005f> maps to <00660066> and <0060> maps to <0066069> and * finally <0061> maps to <00660066006C>. */ class CMapRange { // start value for a bfrange int startRange = 0; // end value for a bfrange int endRange = 0; // offset mapping int offsetValue = 0; // offset vector List offsetVecor = null; /** * Create a new instance of a CMapRange, when it is a simple range * mapping with an offset value. * * @param startRange start range of mapping * @param endRange end range of mapping * @param offsetValue value to offset a mapping by */ public CMapRange(int startRange, int endRange, int offsetValue) { this.startRange = startRange; this.endRange = endRange; this.offsetValue = offsetValue; } /** * Creat new instance of a CMapRange, when it is a more vector range * mapping. Each valid number in the range maps the a corresponding * value in the vector based on the numbers offset from the start range. * * @param startRange start range of mapping * @param endRange end range of the mapping * @param offsetVecor offset mappped vector */ public CMapRange(int startRange, int endRange, List offsetVecor) { this.startRange = startRange; this.endRange = endRange; this.offsetVecor = offsetVecor; } /** * Checks if a <code>value</code> is in the CMap bfrange. * * @param value value to check for containment * @return true if the cmap falls inside one of the bfranges, false * otherwise. */ public boolean inRange(int value) { return (value >= startRange && value <= endRange); } /** * Get the mapped value of <code>value</code>. It is assumed that * inRange is called before this method is called. If the * <code>value</code> is not in the range then a value of -1 is returned * * @param value value to find corresponding CMap for * @return the mapped CMap value for <code>value</code>, -1 if the * <code>value</code> can not be mapped. */ public char[] getCMapValue(int value) { // case of float offset if (offsetVecor == null) { return new char[]{(char) (offsetValue + (value - startRange))};//value + offsetValue; } else {// case of vector offset // value - startRange will give the index in the vector of the desired // mapping value StringObject hexToken = (StringObject) offsetVecor.get(value - startRange); char[] test = convertToString(hexToken.getLiteralStringBuffer()); return test; } } } // convert to characters. private char[] convertToString(CharSequence s) { if (s == null || s.length() % 2 != 0) { throw new IllegalArgumentException(); } int len = s.length(); if (len == 1) { return new char[]{s.charAt(0)}; } char[] dest = new char[len / 2]; for (int i = 0, j = 0; i < len; i += 2, j++) { dest[j] = (char) ((s.charAt(i) << 8) | s.charAt(i + 1)); } return dest; } }