/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.tom_roush.fontbox.ttf; import android.util.Log; import java.io.IOException; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.Map; import java.util.Map.Entry; /** * A "cmap" subtable. * * @author Ben Litchfield */ public class CmapSubtable { private static final long LEAD_OFFSET = 0xD800 - (0x10000 >> 10); private static final long SURROGATE_OFFSET = 0x10000 - (0xD800 << 10) - 0xDC00; private int platformId; private int platformEncodingId; private long subTableOffset; private int[] glyphIdToCharacterCode; private final Map<Integer, Integer> characterCodeToGlyphId = new HashMap<Integer, Integer>(); /** * This will read the required data from the stream. * * @param data The stream to read the data from. * @throws IOException If there is an error reading the data. */ public void initData(TTFDataStream data) throws IOException { platformId = data.readUnsignedShort(); platformEncodingId = data.readUnsignedShort(); subTableOffset = data.readUnsignedInt(); } /** * This will read the required data from the stream. * * @param cmap the CMAP this encoding belongs to. * @param numGlyphs number of glyphs. * @param data The stream to read the data from. * @throws IOException If there is an error reading the data. */ public void initSubtable(CmapTable cmap, int numGlyphs, TTFDataStream data) throws IOException { data.seek(cmap.getOffset() + subTableOffset); int subtableFormat = data.readUnsignedShort(); long length; long version; if (subtableFormat < 8) { length = data.readUnsignedShort(); version = data.readUnsignedShort(); } else { // read an other UnsignedShort to read a Fixed32 data.readUnsignedShort(); length = data.readUnsignedInt(); version = data.readUnsignedInt(); } switch (subtableFormat) { case 0: processSubtype0(data); break; case 2: processSubtype2(data, numGlyphs); break; case 4: processSubtype4(data, numGlyphs); break; case 6: processSubtype6(data, numGlyphs); break; case 8: processSubtype8(data, numGlyphs); break; case 10: processSubtype10(data, numGlyphs); break; case 12: processSubtype12(data, numGlyphs); break; case 13: processSubtype13(data, numGlyphs); break; case 14: processSubtype14(data, numGlyphs); break; default: throw new IOException("Unknown cmap format:" + subtableFormat); } } /** * Reads a format 8 subtable. * * @param data the data stream of the to be parsed ttf font * @param numGlyphs number of glyphs to be read * @throws IOException If there is an error parsing the true type font. */ protected void processSubtype8(TTFDataStream data, int numGlyphs) throws IOException { // --- is32 is a 65536 BITS array ( = 8192 BYTES) int[] is32 = data.readUnsignedByteArray(8192); long nbGroups = data.readUnsignedInt(); // --- nbGroups shouldn't be greater than 65536 if (nbGroups > 65536) { throw new IOException("CMap ( Subtype8 ) is invalid"); } glyphIdToCharacterCode = newGlyphIdToCharacterCode(numGlyphs); // -- Read all sub header for (long i = 0; i < nbGroups; ++i) { long firstCode = data.readUnsignedInt(); long endCode = data.readUnsignedInt(); long startGlyph = data.readUnsignedInt(); // -- process simple validation if (firstCode > endCode || 0 > firstCode) { throw new IOException("Range invalid"); } for (long j = firstCode; j <= endCode; ++j) { // -- Convert the Character code in decimal if (j > Integer.MAX_VALUE) { throw new IOException("[Sub Format 8] Invalid Character code"); } int currentCharCode; if ((is32[(int) j / 8] & (1 << ((int) j % 8))) == 0) { currentCharCode = (int) j; } else { // the character code uses a 32bits format // convert it in decimal : see http://www.unicode.org/faq//utf_bom.html#utf16-4 long lead = LEAD_OFFSET + (j >> 10); long trail = 0xDC00 + (j & 0x3FF); long codepoint = (lead << 10) + trail + SURROGATE_OFFSET; if (codepoint > Integer.MAX_VALUE) { throw new IOException("[Sub Format 8] Invalid Character code"); } currentCharCode = (int) codepoint; } long glyphIndex = startGlyph + (j - firstCode); if (glyphIndex > numGlyphs || glyphIndex > Integer.MAX_VALUE) { throw new IOException("CMap contains an invalid glyph index"); } glyphIdToCharacterCode[(int) glyphIndex] = currentCharCode; characterCodeToGlyphId.put(currentCharCode, (int) glyphIndex); } } } /** * Reads a format 10 subtable. * * @param data the data stream of the to be parsed ttf font * @param numGlyphs number of glyphs to be read * @throws IOException If there is an error parsing the true type font. */ protected void processSubtype10(TTFDataStream data, int numGlyphs) throws IOException { long startCode = data.readUnsignedInt(); long numChars = data.readUnsignedInt(); if (numChars > Integer.MAX_VALUE) { throw new IOException("Invalid number of Characters"); } if (startCode < 0 || startCode > 0x0010FFFF || (startCode + numChars) > 0x0010FFFF || ((startCode + numChars) >= 0x0000D800 && (startCode + numChars) <= 0x0000DFFF)) { throw new IOException("Invalid Characters codes"); } } /** * Reads a format 12 subtable. * * @param data the data stream of the to be parsed ttf font * @param numGlyphs number of glyphs to be read * @throws IOException If there is an error parsing the true type font. */ protected void processSubtype12(TTFDataStream data, int numGlyphs) throws IOException { long nbGroups = data.readUnsignedInt(); glyphIdToCharacterCode = newGlyphIdToCharacterCode(numGlyphs); for (long i = 0; i < nbGroups; ++i) { long firstCode = data.readUnsignedInt(); long endCode = data.readUnsignedInt(); long startGlyph = data.readUnsignedInt(); if (firstCode < 0 || firstCode > 0x0010FFFF || (firstCode >= 0x0000D800 && firstCode <= 0x0000DFFF)) { throw new IOException("Invalid characters codes"); } if ((endCode > 0 && endCode < firstCode) || endCode > 0x0010FFFF || (endCode >= 0x0000D800 && endCode <= 0x0000DFFF)) { throw new IOException("Invalid characters codes"); } for (long j = 0; j <= endCode - firstCode; ++j) { long glyphIndex = startGlyph + j; if (glyphIndex >= numGlyphs) { throw new IOException("Character Code greater than Integer.MAX_VALUE"); } if (firstCode + j > 0x10FFFF) { Log.w("PdfBox-Android", "Format 12 cmap contains character beyond UCS-4"); } glyphIdToCharacterCode[(int) glyphIndex] = (int) (firstCode + j); characterCodeToGlyphId.put((int) (firstCode + j), (int) glyphIndex); } } } /** * Reads a format 13 subtable. * * @param data the data stream of the to be parsed ttf font * @param numGlyphs number of glyphs to be read * @throws IOException If there is an error parsing the true type font. */ protected void processSubtype13(TTFDataStream data, int numGlyphs) throws IOException { long nbGroups = data.readUnsignedInt(); for (long i = 0; i < nbGroups; ++i) { long firstCode = data.readUnsignedInt(); long endCode = data.readUnsignedInt(); long glyphId = data.readUnsignedInt(); if (glyphId > numGlyphs) { Log.w("PdfBox-Android", "Format 13 cmap contains an invalid glyph index"); break; } if (firstCode < 0 || firstCode > 0x0010FFFF || (firstCode >= 0x0000D800 && firstCode <= 0x0000DFFF)) { throw new IOException("Invalid Characters codes"); } if ((endCode > 0 && endCode < firstCode) || endCode > 0x0010FFFF || (endCode >= 0x0000D800 && endCode <= 0x0000DFFF)) { throw new IOException("Invalid Characters codes"); } for (long j = 0; j <= endCode - firstCode; ++j) { if (firstCode + j > Integer.MAX_VALUE) { throw new IOException("Character Code greater than Integer.MAX_VALUE"); } if (firstCode + j > 0x10FFFF) { Log.w("PdfBox-Android", "Format 13 cmap contains character beyond UCS-4"); } glyphIdToCharacterCode[(int) glyphId] = (int) (firstCode + j); characterCodeToGlyphId.put((int) (firstCode + j), (int) glyphId); } } } /** * Reads a format 14 subtable. * * @param data the data stream of the to be parsed ttf font * @param numGlyphs number of glyphs to be read * @throws IOException If there is an error parsing the true type font. */ protected void processSubtype14(TTFDataStream data, int numGlyphs) throws IOException { // Unicode Variation Sequences (UVS) // see http://blogs.adobe.com/CCJKType/2013/05/opentype-cmap-table-ramblings.html Log.w("PdfBox-Android", "Format 14 cmap table is not supported and will be ignored"); } /** * Reads a format 6 subtable. * * @param data the data stream of the to be parsed ttf font * @param numGlyphs number of glyphs to be read * @throws IOException If there is an error parsing the true type font. */ protected void processSubtype6(TTFDataStream data, int numGlyphs) throws IOException { int firstCode = data.readUnsignedShort(); int entryCount = data.readUnsignedShort(); Map<Integer, Integer> tmpGlyphToChar = new HashMap<Integer, Integer>(); int[] glyphIdArray = data.readUnsignedShortArray(entryCount); for (int i = 0; i < entryCount; i++) { tmpGlyphToChar.put(glyphIdArray[i], firstCode + i); characterCodeToGlyphId.put((firstCode + i), glyphIdArray[i]); } glyphIdToCharacterCode = newGlyphIdToCharacterCode( Collections.max(tmpGlyphToChar.keySet()) + 1); for (Entry<Integer, Integer> entry : tmpGlyphToChar.entrySet()) { // link the glyphId with the right character code glyphIdToCharacterCode[entry.getKey()] = entry.getValue(); } } /** * Reads a format 4 subtable. * * @param data the data stream of the to be parsed ttf font * @param numGlyphs number of glyphs to be read * @throws IOException If there is an error parsing the true type font. */ protected void processSubtype4(TTFDataStream data, int numGlyphs) throws IOException { int segCountX2 = data.readUnsignedShort(); int segCount = segCountX2 / 2; int searchRange = data.readUnsignedShort(); int entrySelector = data.readUnsignedShort(); int rangeShift = data.readUnsignedShort(); int[] endCount = data.readUnsignedShortArray(segCount); int reservedPad = data.readUnsignedShort(); int[] startCount = data.readUnsignedShortArray(segCount); int[] idDelta = data.readUnsignedShortArray(segCount); int[] idRangeOffset = data.readUnsignedShortArray(segCount); Map<Integer, Integer> tmpGlyphToChar = new HashMap<Integer, Integer>(); long currentPosition = data.getCurrentPosition(); for (int i = 0; i < segCount; i++) { int start = startCount[i]; int end = endCount[i]; int delta = idDelta[i]; int rangeOffset = idRangeOffset[i]; if (start != 65535 && end != 65535) { for (int j = start; j <= end; j++) { if (rangeOffset == 0) { int glyphid = (j + delta) % 65536; tmpGlyphToChar.put(glyphid, j); characterCodeToGlyphId.put(j, glyphid); } else { long glyphOffset = currentPosition + ((rangeOffset / 2) + (j - start) + (i - segCount)) * 2; data.seek(glyphOffset); int glyphIndex = data.readUnsignedShort(); if (glyphIndex != 0) { glyphIndex += delta; glyphIndex %= 65536; if (!tmpGlyphToChar.containsKey(glyphIndex)) { tmpGlyphToChar.put(glyphIndex, j); characterCodeToGlyphId.put(j, glyphIndex); } } } } } } /* * this is the final result key=glyphId, value is character codes Create an array that contains MAX(GlyphIds) * element, or -1 */ if (tmpGlyphToChar.isEmpty()) { Log.w("PdfBox-Android", "cmap format 4 subtable is empty"); return; } glyphIdToCharacterCode = newGlyphIdToCharacterCode(Collections.max(tmpGlyphToChar.keySet()) + 1); for (Entry<Integer, Integer> entry : tmpGlyphToChar.entrySet()) { // link the glyphId with the right character code glyphIdToCharacterCode[entry.getKey()] = entry.getValue(); } } /** * Read a format 2 subtable. * * @param data the data stream of the to be parsed ttf font * @param numGlyphs number of glyphs to be read * @throws IOException If there is an error parsing the true type font. */ protected void processSubtype2(TTFDataStream data, int numGlyphs) throws IOException { int[] subHeaderKeys = new int[256]; // ---- keep the Max Index of the SubHeader array to know its length int maxSubHeaderIndex = 0; for (int i = 0; i < 256; i++) { subHeaderKeys[i] = data.readUnsignedShort(); maxSubHeaderIndex = Math.max(maxSubHeaderIndex, subHeaderKeys[i] / 8); } // ---- Read all SubHeaders to avoid useless seek on DataSource SubHeader[] subHeaders = new SubHeader[maxSubHeaderIndex + 1]; for (int i = 0; i <= maxSubHeaderIndex; ++i) { int firstCode = data.readUnsignedShort(); int entryCount = data.readUnsignedShort(); short idDelta = data.readSignedShort(); int idRangeOffset = data.readUnsignedShort() - (maxSubHeaderIndex + 1 - i - 1) * 8 - 2; subHeaders[i] = new SubHeader(firstCode, entryCount, idDelta, idRangeOffset); } long startGlyphIndexOffset = data.getCurrentPosition(); glyphIdToCharacterCode = newGlyphIdToCharacterCode(numGlyphs); for (int i = 0; i <= maxSubHeaderIndex; ++i) { SubHeader sh = subHeaders[i]; int firstCode = sh.getFirstCode(); int idRangeOffset = sh.getIdRangeOffset(); int idDelta = sh.getIdDelta(); int entryCount = sh.getEntryCount(); data.seek(startGlyphIndexOffset + idRangeOffset); for (int j = 0; j < entryCount; ++j) { // ---- compute the Character Code int charCode = i; charCode = (charCode << 8) + (firstCode + j); // ---- Go to the CharacterCOde position in the Sub Array // of the glyphIndexArray // glyphIndexArray contains Unsigned Short so add (j * 2) bytes // at the index position int p = data.readUnsignedShort(); // ---- compute the glyphIndex if (p > 0) { p = (p + idDelta) % 65536; } glyphIdToCharacterCode[p] = charCode; characterCodeToGlyphId.put(charCode, p); } } } /** * Initialize the CMapEntry when it is a subtype 0. * * @param data the data stream of the to be parsed ttf font * @throws IOException If there is an error parsing the true type font. */ protected void processSubtype0(TTFDataStream data) throws IOException { byte[] glyphMapping = data.read(256); glyphIdToCharacterCode = newGlyphIdToCharacterCode(256); for (int i = 0; i < glyphMapping.length; i++) { int glyphIndex = (glyphMapping[i] + 256) % 256; glyphIdToCharacterCode[glyphIndex] = i; characterCodeToGlyphId.put(i, glyphIndex); } } /** * Workaround for the fact that glyphIdToCharacterCode doesn't distinguish between * missing character codes and code 0. */ private int[] newGlyphIdToCharacterCode(int size) { int[] gidToCode = new int[size]; Arrays.fill(gidToCode, -1); return gidToCode; } /** * @return Returns the platformEncodingId. */ public int getPlatformEncodingId() { return platformEncodingId; } /** * @param platformEncodingIdValue The platformEncodingId to set. */ public void setPlatformEncodingId(int platformEncodingIdValue) { platformEncodingId = platformEncodingIdValue; } /** * @return Returns the platformId. */ public int getPlatformId() { return platformId; } /** * @param platformIdValue The platformId to set. */ public void setPlatformId(int platformIdValue) { platformId = platformIdValue; } /** * Returns the GlyphId linked with the given character code. * * @param characterCode the given character code to be mapped * @return glyphId the corresponding glyph id for the given character code */ public int getGlyphId(int characterCode) { Integer glyphId = characterCodeToGlyphId.get(characterCode); return glyphId == null ? 0 : glyphId; } /** * Returns the character code for the given GID, or null if there is none. * * @param gid glyph id * @return character code */ public Integer getCharacterCode(int gid) { if (gid < 0 || gid >= glyphIdToCharacterCode.length) { return null; } // workaround for the fact that glyphIdToCharacterCode doesn't distinguish between // missing character codes and code 0. int code = glyphIdToCharacterCode[gid]; if (code == -1) { return null; } return code; } @Override public String toString() { return "{" + getPlatformId() + " " + getPlatformEncodingId() + "}"; } /** * * Class used to manage CMap - Format 2. * */ private class SubHeader { private final int firstCode; private final int entryCount; /** * used to compute the GlyphIndex : P = glyphIndexArray.SubArray[pos] GlyphIndex = P + idDelta % 65536. */ private final short idDelta; /** * Number of bytes to skip to reach the firstCode in the glyphIndexArray. */ private final int idRangeOffset; private SubHeader(int firstCodeValue, int entryCountValue, short idDeltaValue, int idRangeOffsetValue) { firstCode = firstCodeValue; entryCount = entryCountValue; idDelta = idDeltaValue; idRangeOffset = idRangeOffsetValue; } /** * @return the firstCode */ private int getFirstCode() { return firstCode; } /** * @return the entryCount */ private int getEntryCount() { return entryCount; } /** * @return the idDelta */ private short getIdDelta() { return idDelta; } /** * @return the idRangeOffset */ private int getIdRangeOffset() { return idRangeOffset; } } }