/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* $Id$ */ package org.apache.fop.pdf; import java.io.IOException; import java.io.Writer; /** * Class representing ToUnicode CMaps. * Here are some documentation resources: * <ul> * <li>PDF Reference, Second Edition, Section 5.6.4, for general information * about CMaps in PDF Files.</li> * <li>PDF Reference, Second Edition, Section 5.9, for specific information * about ToUnicodeCMaps in PDF Files.</li> * <li> * <a href="http://partners.adobe.com/asn/developer/pdfs/tn/5411.ToUnicode.pdf"> * Adobe Technical Note #5411, "ToUnicode Mapping File Tutorial"</a>. * </ul> */ public class PDFToUnicodeCMap extends PDFCMap { /** * The array of Unicode characters ordered by character code * (maps from character code to Unicode code point). */ protected char[] unicodeCharMap; private boolean singleByte; /** * Constructor. * * @param unicodeCharMap An array of Unicode characters ordered by character code * (maps from character code to Unicode code point) * @param name One of the registered names found in Table 5.14 in PDF * Reference, Second Edition. * @param sysInfo The attributes of the character collection of the CIDFont. * @param singleByte true for single-byte, false for double-byte */ public PDFToUnicodeCMap(char[] unicodeCharMap, String name, PDFCIDSystemInfo sysInfo, boolean singleByte) { super(name, sysInfo); if (singleByte && unicodeCharMap.length > 256) { throw new IllegalArgumentException("unicodeCharMap may not contain more than" + " 256 characters for single-byte encodings"); } this.unicodeCharMap = unicodeCharMap; this.singleByte = singleByte; } /** {@inheritDoc} */ protected CMapBuilder createCMapBuilder(Writer writer) { return new ToUnicodeCMapBuilder(writer); } class ToUnicodeCMapBuilder extends CMapBuilder { public ToUnicodeCMapBuilder(Writer writer) { super(writer, null); } /** * Writes the CMap to a Writer. * @throws IOException if an I/O error occurs */ public void writeCMap() throws IOException { writeCIDInit(); writeCIDSystemInfo("Adobe", "UCS", 0); writeName("Adobe-Identity-UCS"); writeType("2"); writeCodeSpaceRange(singleByte); writeBFEntries(); writeWrapUp(); } /** * Writes the character mappings for this font. */ protected void writeBFEntries() throws IOException { if (unicodeCharMap != null) { writeBFCharEntries(unicodeCharMap); writeBFRangeEntries(unicodeCharMap); } } /** * Writes the entries for single characters of a base font (only characters which cannot be * expressed as part of a character range). * @param charArray all the characters to map * @throws IOException */ protected void writeBFCharEntries(char[] charArray) throws IOException { int totalEntries = 0; for (int i = 0; i < charArray.length; i++) { if (!partOfRange(charArray, i)) { totalEntries++; } } if (totalEntries < 1) { return; } int remainingEntries = totalEntries; int charIndex = 0; do { /* Limited to 100 entries in each section */ int entriesThisSection = Math.min(remainingEntries, 100); writer.write(entriesThisSection + " beginbfchar\n"); for (int i = 0; i < entriesThisSection; i++) { /* Go to the next char not in a range */ while (partOfRange(charArray, charIndex)) { charIndex++; } writer.write("<" + padCharIndex(charIndex) + "> "); writer.write("<" + padHexString(Integer.toHexString(charArray[charIndex]), 4) + ">\n"); charIndex++; } remainingEntries -= entriesThisSection; writer.write("endbfchar\n"); } while (remainingEntries > 0); } private String padCharIndex(int charIndex) { return padHexString(Integer.toHexString(charIndex), (singleByte ? 2 : 4)); } /** * Writes the entries for character ranges for a base font. * @param charArray all the characters to map * @throws IOException */ protected void writeBFRangeEntries(char[] charArray) throws IOException { int totalEntries = 0; for (int i = 0; i < charArray.length; i++) { if (startOfRange(charArray, i)) { totalEntries++; } } if (totalEntries < 1) { return; } int remainingEntries = totalEntries; int charIndex = 0; do { /* Limited to 100 entries in each section */ int entriesThisSection = Math.min(remainingEntries, 100); writer.write(entriesThisSection + " beginbfrange\n"); for (int i = 0; i < entriesThisSection; i++) { /* Go to the next start of a range */ while (!startOfRange(charArray, charIndex)) { charIndex++; } writer.write("<" + padCharIndex(charIndex) + "> "); writer.write("<" + padCharIndex(endOfRange(charArray, charIndex)) + "> "); writer.write("<" + padHexString(Integer.toHexString(charArray[charIndex]), 4) + ">\n"); charIndex++; } remainingEntries -= entriesThisSection; writer.write("endbfrange\n"); } while (remainingEntries > 0); } /** * Find the end of the current range. * @param charArray The array which is being tested. * @param startOfRange The index to the array element that is the start of * the range. * @return The index to the element that is the end of the range. */ private int endOfRange(char[] charArray, int startOfRange) { int i = startOfRange; while (i < charArray.length - 1 && sameRangeEntryAsNext(charArray, i)) { i++; } return i; } /** * Determine whether this array element should be part of a bfchar entry or * a bfrange entry. * @param charArray The array to be tested. * @param arrayIndex The index to the array element to be tested. * @return True if this array element should be included in a range. */ private boolean partOfRange(char[] charArray, int arrayIndex) { if (charArray.length < 2) { return false; } if (arrayIndex == 0) { return sameRangeEntryAsNext(charArray, 0); } if (arrayIndex == charArray.length - 1) { return sameRangeEntryAsNext(charArray, arrayIndex - 1); } if (sameRangeEntryAsNext(charArray, arrayIndex - 1)) { return true; } if (sameRangeEntryAsNext(charArray, arrayIndex)) { return true; } return false; } /** * Determine whether two bytes can be written in the same bfrange entry. * @param charArray The array to be tested. * @param firstItem The first of the two items in the array to be tested. * The second item is firstItem + 1. * @return True if both 1) the next item in the array is sequential with * this one, and 2) the first byte of the character in the first position * is equal to the first byte of the character in the second position. */ private boolean sameRangeEntryAsNext(char[] charArray, int firstItem) { if (charArray[firstItem] + 1 != charArray[firstItem + 1]) { return false; } if (firstItem / 256 != (firstItem + 1) / 256) { return false; } return true; } /** * Determine whether this array element should be the start of a bfrange * entry. * @param charArray The array to be tested. * @param arrayIndex The index to the array element to be tested. * @return True if this array element is the beginning of a range. */ private boolean startOfRange(char[] charArray, int arrayIndex) { // Can't be the start of a range if not part of a range. if (!partOfRange(charArray, arrayIndex)) { return false; } // If first element in the array, must be start of a range if (arrayIndex == 0) { return true; } // If last element in the array, cannot be start of a range if (arrayIndex == charArray.length - 1) { return false; } /* * If part of same range as the previous element is, cannot be start * of range. */ if (sameRangeEntryAsNext(charArray, arrayIndex - 1)) { return false; } // Otherwise, this is start of a range. return true; } /** * Prepends the input string with a sufficient number of "0" characters to * get the returned string to be numChars length. * @param input The input string. * @param numChars The minimum characters in the output string. * @return The padded string. */ private String padHexString(String input, int numChars) { int length = input.length(); if (length >= numChars) { return input; } StringBuffer returnString = new StringBuffer(); for (int i = 1; i <= numChars - length; i++) { returnString.append("0"); } returnString.append(input); return returnString.toString(); } } }