/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.fontbox.ttf; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; /** * A "cmap" subtable. * * @author Ben Litchfield */ public class CmapSubtable { private static final Log LOG = LogFactory.getLog(CmapSubtable.class); private static final long LEAD_OFFSET = 0xD800 - (0x10000 >> 10); private static final long SURROGATE_OFFSET = 0x10000 - (0xD800 << 10) - 0xDC00; private int platformId; private int platformEncodingId; private long subTableOffset; private int[] glyphIdToCharacterCode; private Map<Integer, List<Integer>> glyphIdToCharacterCodeMultiple = new HashMap<>(); private Map<Integer, Integer> characterCodeToGlyphId; /** * This will read the required data from the stream. * * @param data The stream to read the data from. * @throws IOException If there is an error reading the data. */ public void initData(TTFDataStream data) throws IOException { platformId = data.readUnsignedShort(); platformEncodingId = data.readUnsignedShort(); subTableOffset = data.readUnsignedInt(); } /** * This will read the required data from the stream. * * @param cmap the CMAP this encoding belongs to. * @param numGlyphs number of glyphs. * @param data The stream to read the data from. * @throws IOException If there is an error reading the data. */ public void initSubtable(CmapTable cmap, int numGlyphs, TTFDataStream data) throws IOException { data.seek(cmap.getOffset() + subTableOffset); int subtableFormat = data.readUnsignedShort(); long length; long version; if (subtableFormat < 8) { length = data.readUnsignedShort(); version = data.readUnsignedShort(); } else { // read an other UnsignedShort to read a Fixed32 data.readUnsignedShort(); length = data.readUnsignedInt(); version = data.readUnsignedInt(); } switch (subtableFormat) { case 0: processSubtype0(data); break; case 2: processSubtype2(data, numGlyphs); break; case 4: processSubtype4(data, numGlyphs); break; case 6: processSubtype6(data, numGlyphs); break; case 8: processSubtype8(data, numGlyphs); break; case 10: processSubtype10(data, numGlyphs); break; case 12: processSubtype12(data, numGlyphs); break; case 13: processSubtype13(data, numGlyphs); break; case 14: processSubtype14(data, numGlyphs); break; default: throw new IOException("Unknown cmap format:" + subtableFormat); } } /** * Reads a format 8 subtable. * * @param data the data stream of the to be parsed ttf font * @param numGlyphs number of glyphs to be read * @throws IOException If there is an error parsing the true type font. */ protected void processSubtype8(TTFDataStream data, int numGlyphs) throws IOException { // --- is32 is a 65536 BITS array ( = 8192 BYTES) int[] is32 = data.readUnsignedByteArray(8192); long nbGroups = data.readUnsignedInt(); // --- nbGroups shouldn't be greater than 65536 if (nbGroups > 65536) { throw new IOException("CMap ( Subtype8 ) is invalid"); } glyphIdToCharacterCode = newGlyphIdToCharacterCode(numGlyphs); characterCodeToGlyphId = new HashMap<>(numGlyphs); // -- Read all sub header for (long i = 0; i < nbGroups; ++i) { long firstCode = data.readUnsignedInt(); long endCode = data.readUnsignedInt(); long startGlyph = data.readUnsignedInt(); // -- process simple validation if (firstCode > endCode || 0 > firstCode) { throw new IOException("Range invalid"); } for (long j = firstCode; j <= endCode; ++j) { // -- Convert the Character code in decimal if (j > Integer.MAX_VALUE) { throw new IOException("[Sub Format 8] Invalid Character code"); } int currentCharCode; if ((is32[(int) j / 8] & (1 << ((int) j % 8))) == 0) { currentCharCode = (int) j; } else { // the character code uses a 32bits format // convert it in decimal : see http://www.unicode.org/faq//utf_bom.html#utf16-4 long lead = LEAD_OFFSET + (j >> 10); long trail = 0xDC00 + (j & 0x3FF); long codepoint = (lead << 10) + trail + SURROGATE_OFFSET; if (codepoint > Integer.MAX_VALUE) { throw new IOException("[Sub Format 8] Invalid Character code"); } currentCharCode = (int) codepoint; } long glyphIndex = startGlyph + (j - firstCode); if (glyphIndex > numGlyphs || glyphIndex > Integer.MAX_VALUE) { throw new IOException("CMap contains an invalid glyph index"); } glyphIdToCharacterCode[(int) glyphIndex] = currentCharCode; characterCodeToGlyphId.put(currentCharCode, (int) glyphIndex); } } } /** * Reads a format 10 subtable. * * @param data the data stream of the to be parsed ttf font * @param numGlyphs number of glyphs to be read * @throws IOException If there is an error parsing the true type font. */ protected void processSubtype10(TTFDataStream data, int numGlyphs) throws IOException { long startCode = data.readUnsignedInt(); long numChars = data.readUnsignedInt(); if (numChars > Integer.MAX_VALUE) { throw new IOException("Invalid number of Characters"); } if (startCode < 0 || startCode > 0x0010FFFF || (startCode + numChars) > 0x0010FFFF || ((startCode + numChars) >= 0x0000D800 && (startCode + numChars) <= 0x0000DFFF)) { throw new IOException("Invalid Characters codes"); } } /** * Reads a format 12 subtable. * * @param data the data stream of the to be parsed ttf font * @param numGlyphs number of glyphs to be read * @throws IOException If there is an error parsing the true type font. */ protected void processSubtype12(TTFDataStream data, int numGlyphs) throws IOException { long nbGroups = data.readUnsignedInt(); glyphIdToCharacterCode = newGlyphIdToCharacterCode(numGlyphs); characterCodeToGlyphId = new HashMap<>(numGlyphs); for (long i = 0; i < nbGroups; ++i) { long firstCode = data.readUnsignedInt(); long endCode = data.readUnsignedInt(); long startGlyph = data.readUnsignedInt(); if (firstCode < 0 || firstCode > 0x0010FFFF || firstCode >= 0x0000D800 && firstCode <= 0x0000DFFF) { throw new IOException("Invalid characters codes"); } if (endCode > 0 && endCode < firstCode || endCode > 0x0010FFFF || endCode >= 0x0000D800 && endCode <= 0x0000DFFF) { throw new IOException("Invalid characters codes"); } for (long j = 0; j <= endCode - firstCode; ++j) { long glyphIndex = startGlyph + j; if (glyphIndex >= numGlyphs) { LOG.warn("Format 12 cmap contains an invalid glyph index"); break; } if (firstCode + j > 0x10FFFF) { LOG.warn("Format 12 cmap contains character beyond UCS-4"); } glyphIdToCharacterCode[(int) glyphIndex] = (int) (firstCode + j); characterCodeToGlyphId.put((int) (firstCode + j), (int) glyphIndex); } } } /** * Reads a format 13 subtable. * * @param data the data stream of the to be parsed ttf font * @param numGlyphs number of glyphs to be read * @throws IOException If there is an error parsing the true type font. */ protected void processSubtype13(TTFDataStream data, int numGlyphs) throws IOException { long nbGroups = data.readUnsignedInt(); characterCodeToGlyphId = new HashMap<>(numGlyphs); for (long i = 0; i < nbGroups; ++i) { long firstCode = data.readUnsignedInt(); long endCode = data.readUnsignedInt(); long glyphId = data.readUnsignedInt(); if (glyphId > numGlyphs) { LOG.warn("Format 13 cmap contains an invalid glyph index"); break; } if (firstCode < 0 || firstCode > 0x0010FFFF || (firstCode >= 0x0000D800 && firstCode <= 0x0000DFFF)) { throw new IOException("Invalid Characters codes"); } if ((endCode > 0 && endCode < firstCode) || endCode > 0x0010FFFF || (endCode >= 0x0000D800 && endCode <= 0x0000DFFF)) { throw new IOException("Invalid Characters codes"); } for (long j = 0; j <= endCode - firstCode; ++j) { if (firstCode + j > Integer.MAX_VALUE) { throw new IOException("Character Code greater than Integer.MAX_VALUE"); } if (firstCode + j > 0x10FFFF) { LOG.warn("Format 13 cmap contains character beyond UCS-4"); } glyphIdToCharacterCode[(int) glyphId] = (int) (firstCode + j); characterCodeToGlyphId.put((int) (firstCode + j), (int) glyphId); } } } /** * Reads a format 14 subtable. * * @param data the data stream of the to be parsed ttf font * @param numGlyphs number of glyphs to be read * @throws IOException If there is an error parsing the true type font. */ protected void processSubtype14(TTFDataStream data, int numGlyphs) throws IOException { // Unicode Variation Sequences (UVS) // see http://blogs.adobe.com/CCJKType/2013/05/opentype-cmap-table-ramblings.html LOG.warn("Format 14 cmap table is not supported and will be ignored"); } /** * Reads a format 6 subtable. * * @param data the data stream of the to be parsed ttf font * @param numGlyphs number of glyphs to be read * @throws IOException If there is an error parsing the true type font. */ protected void processSubtype6(TTFDataStream data, int numGlyphs) throws IOException { int firstCode = data.readUnsignedShort(); int entryCount = data.readUnsignedShort(); // skip emtpy tables if (entryCount == 0) { return; } characterCodeToGlyphId = new HashMap<>(numGlyphs); int[] glyphIdArray = data.readUnsignedShortArray(entryCount); int maxGlyphId = 0; for (int i = 0; i < entryCount; i++) { maxGlyphId = Math.max(maxGlyphId, glyphIdArray[i]); characterCodeToGlyphId.put(firstCode + i, glyphIdArray[i]); } buildGlyphIdToCharacterCodeLookup(maxGlyphId); } /** * Reads a format 4 subtable. * * @param data the data stream of the to be parsed ttf font * @param numGlyphs number of glyphs to be read * @throws IOException If there is an error parsing the true type font. */ protected void processSubtype4(TTFDataStream data, int numGlyphs) throws IOException { int segCountX2 = data.readUnsignedShort(); int segCount = segCountX2 / 2; int searchRange = data.readUnsignedShort(); int entrySelector = data.readUnsignedShort(); int rangeShift = data.readUnsignedShort(); int[] endCount = data.readUnsignedShortArray(segCount); int reservedPad = data.readUnsignedShort(); int[] startCount = data.readUnsignedShortArray(segCount); int[] idDelta = data.readUnsignedShortArray(segCount); long idRangeOffsetPosition = data.getCurrentPosition(); int[] idRangeOffset = data.readUnsignedShortArray(segCount); characterCodeToGlyphId = new HashMap<>(numGlyphs); int maxGlyphId = 0; for (int i = 0; i < segCount; i++) { int start = startCount[i]; int end = endCount[i]; int delta = idDelta[i]; int rangeOffset = idRangeOffset[i]; long segmentRangeOffset = idRangeOffsetPosition + (i * 2) + rangeOffset; if (start != 65535 && end != 65535) { for (int j = start; j <= end; j++) { if (rangeOffset == 0) { int glyphid = (j + delta) & 0xFFFF; maxGlyphId = Math.max(glyphid, maxGlyphId); characterCodeToGlyphId.put(j, glyphid); } else { long glyphOffset = segmentRangeOffset + ((j - start) * 2); data.seek(glyphOffset); int glyphIndex = data.readUnsignedShort(); if (glyphIndex != 0) { glyphIndex = (glyphIndex + delta) & 0xFFFF; maxGlyphId = Math.max(glyphIndex, maxGlyphId); characterCodeToGlyphId.put(j, glyphIndex); } } } } } /* * this is the final result key=glyphId, value is character codes Create an array that contains MAX(GlyphIds) * element, or -1 */ if (characterCodeToGlyphId.isEmpty()) { LOG.warn("cmap format 4 subtable is empty"); return; } buildGlyphIdToCharacterCodeLookup(maxGlyphId); } private void buildGlyphIdToCharacterCodeLookup(int maxGlyphId) { glyphIdToCharacterCode = newGlyphIdToCharacterCode(maxGlyphId + 1); for (Entry<Integer, Integer> entry : characterCodeToGlyphId.entrySet()) { if (glyphIdToCharacterCode[entry.getValue()] == -1) { // add new value to the array glyphIdToCharacterCode[entry.getValue()] = entry.getKey(); } else { // there is already a mapping for the given glyphId List<Integer> mappedValues = glyphIdToCharacterCodeMultiple.get(entry.getValue()); if (mappedValues == null) { mappedValues = new ArrayList<>(); glyphIdToCharacterCodeMultiple.put(entry.getValue(), mappedValues); mappedValues.add(glyphIdToCharacterCode[entry.getValue()]); // mark value as multiple mapping glyphIdToCharacterCode[entry.getValue()] = Integer.MIN_VALUE; } mappedValues.add(entry.getKey()); } } } /** * Read a format 2 subtable. * * @param data the data stream of the to be parsed ttf font * @param numGlyphs number of glyphs to be read * @throws IOException If there is an error parsing the true type font. */ protected void processSubtype2(TTFDataStream data, int numGlyphs) throws IOException { int[] subHeaderKeys = new int[256]; // ---- keep the Max Index of the SubHeader array to know its length int maxSubHeaderIndex = 0; for (int i = 0; i < 256; i++) { subHeaderKeys[i] = data.readUnsignedShort(); maxSubHeaderIndex = Math.max(maxSubHeaderIndex, subHeaderKeys[i] / 8); } // ---- Read all SubHeaders to avoid useless seek on DataSource SubHeader[] subHeaders = new SubHeader[maxSubHeaderIndex + 1]; for (int i = 0; i <= maxSubHeaderIndex; ++i) { int firstCode = data.readUnsignedShort(); int entryCount = data.readUnsignedShort(); short idDelta = data.readSignedShort(); int idRangeOffset = data.readUnsignedShort() - (maxSubHeaderIndex + 1 - i - 1) * 8 - 2; subHeaders[i] = new SubHeader(firstCode, entryCount, idDelta, idRangeOffset); } long startGlyphIndexOffset = data.getCurrentPosition(); glyphIdToCharacterCode = newGlyphIdToCharacterCode(numGlyphs); characterCodeToGlyphId = new HashMap<>(numGlyphs); for (int i = 0; i <= maxSubHeaderIndex; ++i) { SubHeader sh = subHeaders[i]; int firstCode = sh.getFirstCode(); int idRangeOffset = sh.getIdRangeOffset(); int idDelta = sh.getIdDelta(); int entryCount = sh.getEntryCount(); data.seek(startGlyphIndexOffset + idRangeOffset); for (int j = 0; j < entryCount; ++j) { // ---- compute the Character Code int charCode = i; charCode = (charCode << 8) + (firstCode + j); // ---- Go to the CharacterCOde position in the Sub Array // of the glyphIndexArray // glyphIndexArray contains Unsigned Short so add (j * 2) bytes // at the index position int p = data.readUnsignedShort(); // ---- compute the glyphIndex if (p > 0) { p = (p + idDelta) % 65536; } if (p >= numGlyphs) { LOG.warn("glyphId " + p + " for charcode " + charCode + " ignored, numGlyphs is " + numGlyphs); continue; } glyphIdToCharacterCode[p] = charCode; characterCodeToGlyphId.put(charCode, p); } } } /** * Initialize the CMapEntry when it is a subtype 0. * * @param data the data stream of the to be parsed ttf font * @throws IOException If there is an error parsing the true type font. */ protected void processSubtype0(TTFDataStream data) throws IOException { byte[] glyphMapping = data.read(256); glyphIdToCharacterCode = newGlyphIdToCharacterCode(256); characterCodeToGlyphId = new HashMap<>(glyphMapping.length); for (int i = 0; i < glyphMapping.length; i++) { int glyphIndex = glyphMapping[i] & 0xFF; glyphIdToCharacterCode[glyphIndex] = i; characterCodeToGlyphId.put(i, glyphIndex); } } /** * Workaround for the fact that glyphIdToCharacterCode doesn't distinguish between * missing character codes and code 0. */ private int[] newGlyphIdToCharacterCode(int size) { int[] gidToCode = new int[size]; Arrays.fill(gidToCode, -1); return gidToCode; } /** * @return Returns the platformEncodingId. */ public int getPlatformEncodingId() { return platformEncodingId; } /** * @param platformEncodingIdValue The platformEncodingId to set. */ public void setPlatformEncodingId(int platformEncodingIdValue) { platformEncodingId = platformEncodingIdValue; } /** * @return Returns the platformId. */ public int getPlatformId() { return platformId; } /** * @param platformIdValue The platformId to set. */ public void setPlatformId(int platformIdValue) { platformId = platformIdValue; } /** * Returns the GlyphId linked with the given character code. * * @param characterCode the given character code to be mapped * @return glyphId the corresponding glyph id for the given character code */ public int getGlyphId(int characterCode) { Integer glyphId = characterCodeToGlyphId.get(characterCode); return glyphId == null ? 0 : glyphId; } /** * Returns the character code for the given GID, or null if there is none. * * @param gid glyph id * @return character code * * @deprecated the mapping may be ambiguous, see {@link #getCharCodes(int)}. The first mapped value is returned by * default. */ public Integer getCharacterCode(int gid) { int code = getCharCode(gid); if (code == -1) { return null; } // ambiguous mapping if (code == Integer.MIN_VALUE) { List<Integer> mappedValues = glyphIdToCharacterCodeMultiple.get(gid); if (mappedValues != null) { // use the first mapping return mappedValues.get(0); } } return code; } private int getCharCode(int gid) { if (gid < 0 || gid >= glyphIdToCharacterCode.length) { return -1; } return glyphIdToCharacterCode[gid]; } /** * Returns all possible character codes for the given gid, or null if there is none. * * @param gid glyph id * @return a list with all character codes the given gid maps to * */ public List<Integer> getCharCodes(int gid) { int code = getCharCode(gid); if (code == -1) { return null; } List<Integer> codes = null; if (code == Integer.MIN_VALUE) { List<Integer> mappedValues = glyphIdToCharacterCodeMultiple.get(gid); if (mappedValues != null) { codes = new ArrayList<>(mappedValues); // sort the list to provide a reliable order Collections.sort(codes); } } else { codes = new ArrayList<>(1); codes.add(code); } return codes; } @Override public String toString() { return "{" + getPlatformId() + " " + getPlatformEncodingId() + "}"; } /** * * Class used to manage CMap - Format 2. * */ private static class SubHeader { private final int firstCode; private final int entryCount; /** * used to compute the GlyphIndex : P = glyphIndexArray.SubArray[pos] GlyphIndex = P + idDelta % 65536. */ private final short idDelta; /** * Number of bytes to skip to reach the firstCode in the glyphIndexArray. */ private final int idRangeOffset; private SubHeader(int firstCodeValue, int entryCountValue, short idDeltaValue, int idRangeOffsetValue) { firstCode = firstCodeValue; entryCount = entryCountValue; idDelta = idDeltaValue; idRangeOffset = idRangeOffsetValue; } /** * @return the firstCode */ private int getFirstCode() { return firstCode; } /** * @return the entryCount */ private int getEntryCount() { return entryCount; } /** * @return the idDelta */ private short getIdDelta() { return idDelta; } /** * @return the idRangeOffset */ private int getIdRangeOffset() { return idRangeOffset; } } }