package com.tom_roush.pdfbox.pdmodel.font; import android.graphics.Path; import android.util.Log; import com.tom_roush.fontbox.cff.Type2CharString; import com.tom_roush.fontbox.cmap.CMap; import com.tom_roush.fontbox.ttf.CmapSubtable; import com.tom_roush.fontbox.ttf.GlyphData; import com.tom_roush.fontbox.ttf.OTFParser; import com.tom_roush.fontbox.ttf.OpenTypeFont; import com.tom_roush.fontbox.ttf.TTFParser; import com.tom_roush.fontbox.ttf.TrueTypeFont; import com.tom_roush.fontbox.util.BoundingBox; import com.tom_roush.pdfbox.cos.COSBase; import com.tom_roush.pdfbox.cos.COSDictionary; import com.tom_roush.pdfbox.cos.COSName; import com.tom_roush.pdfbox.cos.COSStream; import com.tom_roush.pdfbox.io.IOUtils; import com.tom_roush.pdfbox.pdmodel.common.PDStream; import com.tom_roush.pdfbox.util.Matrix; import java.io.IOException; import java.io.InputStream; import java.util.HashMap; import java.util.Map; /** * Type 2 CIDFont (TrueType). * * @author Ben Litchfield */ public class PDCIDFontType2 extends PDCIDFont { private final TrueTypeFont ttf; private final int[] cid2gid; private final Map<Integer, Integer> gid2cid; private final boolean hasIdentityCid2Gid; private final boolean isEmbedded; private final boolean isDamaged; private final CmapSubtable cmap; // may be null private Matrix fontMatrix; /** * Constructor. * * @param fontDictionary The font dictionary according to the PDF specification. */ public PDCIDFontType2(COSDictionary fontDictionary, PDType0Font parent) throws IOException { super(fontDictionary, parent); PDFontDescriptor fd = getFontDescriptor(); PDStream ff2Stream = fd.getFontFile2(); PDStream ff3Stream = fd.getFontFile3(); // Acrobat looks in FontFile too, even though it is not in the spec, see PDFBOX-2599 if (ff2Stream == null && ff3Stream == null) { ff2Stream = fd.getFontFile(); } TrueTypeFont ttfFont = null; boolean fontIsDamaged = false; if (ff2Stream != null) { try { // embedded TTFParser ttfParser = new TTFParser(true); ttfFont = ttfParser.parse(ff2Stream.createInputStream()); } catch (NullPointerException e) // TTF parser is buggy { Log.w("PdfBox-Android", "Could not read embedded TTF for font " + getBaseFont(), e); fontIsDamaged = true; } catch (IOException e) { Log.w("PdfBox-Android", "Could not read embedded TTF for font " + getBaseFont(), e); fontIsDamaged = true; } } else if (ff3Stream != null) { try { // embedded OTFParser otfParser = new OTFParser(true); OpenTypeFont otf = otfParser.parse(ff3Stream.createInputStream()); ttfFont = otf; if (otf.isPostScript()) { // todo: we need more abstraction to support CFF fonts here throw new IOException("Not implemented: OpenType font with CFF table " + getBaseFont()); } if (otf.hasLayoutTables()) { Log.e("PdfBox-Android", "OpenType Layout tables used in font " + getBaseFont() + " are not implemented in PDFBox and will be ignored"); } } catch (NullPointerException e) // TTF parser is buggy { fontIsDamaged = true; Log.w("PdfBox-Android", "Could not read embedded OTF for font " + getBaseFont(), e); } catch (IOException e) { fontIsDamaged = true; Log.w("PdfBox-Android", "Could not read embedded OTF for font " + getBaseFont(), e); } } isEmbedded = ttfFont != null; isDamaged = fontIsDamaged; if (ttfFont == null) { // find font or substitute CIDFontMapping mapping = FontMapper.getCIDFont(getBaseFont(), getFontDescriptor(), getCIDSystemInfo()); if (mapping.isCIDFont()) { ttfFont = mapping.getFont(); } else { ttfFont = (TrueTypeFont) mapping.getTrueTypeFont(); } if (mapping.isFallback()) { Log.w("PdfBox-Android", "Using fallback font " + ttfFont.getName() + " for CID-keyed TrueType font " + getBaseFont()); } } ttf = ttfFont; cmap = ttf.getUnicodeCmap(false); cid2gid = readCIDToGIDMap(); gid2cid = invert(cid2gid); COSBase map = dict.getDictionaryObject(COSName.CID_TO_GID_MAP); hasIdentityCid2Gid = map instanceof COSName && ((COSName) map).getName().equals("Identity"); } @Override public Matrix getFontMatrix() { if (fontMatrix == null) { // 1000 upem, this is not strictly true fontMatrix = new Matrix(0.001f, 0, 0, 0.001f, 0, 0); } return fontMatrix; } @Override public BoundingBox getBoundingBox() throws IOException { return ttf.getFontBBox(); } private int[] readCIDToGIDMap() throws IOException { int[] cid2gid = null; COSBase map = dict.getDictionaryObject(COSName.CID_TO_GID_MAP); if (map instanceof COSStream) { COSStream stream = (COSStream) map; InputStream is = stream.getUnfilteredStream(); byte[] mapAsBytes = IOUtils.toByteArray(is); IOUtils.closeQuietly(is); int numberOfInts = mapAsBytes.length / 2; cid2gid = new int[numberOfInts]; int offset = 0; for (int index = 0; index < numberOfInts; index++) { int gid = (mapAsBytes[offset] & 0xff) << 8 | mapAsBytes[offset + 1] & 0xff; cid2gid[index] = gid; offset += 2; } } return cid2gid; } private Map<Integer, Integer> invert(int[] cid2gid) { if (cid2gid == null) { return null; } Map<Integer, Integer> inverse = new HashMap<Integer, Integer>(); for (int i = 0; i < cid2gid.length; i++) { inverse.put(cid2gid[i], i); } return inverse; } @Override public int codeToCID(int code) { CMap cMap = parent.getCMap(); // Acrobat allows bad PDFs to use Unicode CMaps here instead of CID CMaps, see PDFBOX-1283 if (!cMap.hasCIDMappings() && cMap.hasUnicodeMappings()) { return cMap.toUnicode(code).codePointAt(0); // actually: code -> CID } return cMap.toCID(code); } /** * Returns the GID for the given character code. * * @param code character code * @return GID */ public int codeToGID(int code) throws IOException { if (!isEmbedded) { // The conforming reader shall select glyphs by translating characters from the // encoding specified by the predefined CMap to one of the encodings in the TrueType // font's 'cmap' table. The means by which this is accomplished are implementation- // dependent. boolean hasUnicodeMap = parent.getCMapUCS2() != null; if (cid2gid != null) { // Acrobat allows non-embedded GIDs - todo: can we find a test PDF for this? int cid = codeToCID(code); return cid2gid[cid]; } else if (hasIdentityCid2Gid || !hasUnicodeMap) { // same as above, but for the default Identity CID2GIDMap or when there is no // ToUnicode CMap to fallback to, see PDFBOX-2599 and PDFBOX-2560 // todo: can we find a test PDF for the Identity case? return codeToCID(code); } else { // fallback to the ToUnicode CMap, test with PDFBOX-1422 and PDFBOX-2560 String unicode = parent.toUnicode(code); if (unicode == null) { Log.w("PdfBox-Android", "Failed to find a character mapping for " + code + " in " + getName()); return 0; } else if (unicode.length() > 1) { Log.w("PdfBox-Android", "Trying to map multi-byte character using 'cmap', result will be poor"); } // a non-embedded font always has a cmap (otherwise FontMapper won't load it) return cmap.getGlyphId(unicode.codePointAt(0)); } } else { // If the TrueType font program is embedded, the Type 2 CIDFont dictionary shall contain // a CIDToGIDMap entry that maps CIDs to the glyph indices for the appropriate glyph // descriptions in that font program. int cid = codeToCID(code); if (cid2gid != null) { // use CIDToGIDMap if (cid < cid2gid.length) { return cid2gid[cid]; } else { return 0; } } else { // "Identity" is the default CIDToGIDMap if (cid < ttf.getNumberOfGlyphs()) { return cid; } else { // out of range CIDs map to GID 0 return 0; } } } } @Override public float getHeight(int code) throws IOException { // todo: really we want the BBox, (for text extraction:) return (ttf.getHorizontalHeader().getAscender() + -ttf.getHorizontalHeader().getDescender()) / ttf.getUnitsPerEm(); // todo: shouldn't this be the yMax/yMin? } @Override public float getWidthFromFont(int code) throws IOException { int gid = codeToGID(code); int width = ttf.getAdvanceWidth(gid); int unitsPerEM = ttf.getUnitsPerEm(); if (unitsPerEM != 1000) { width *= 1000f / unitsPerEM; } return width; } @Override public byte[] encode(int unicode) { int cid = -1; if (isEmbedded) { // embedded fonts always use CIDToGIDMap, with Identity as the default if (parent.getCMap().getName().startsWith("Identity-")) { if (cmap != null) { cid = cmap.getGlyphId(unicode); } } else { // if the CMap is predefined then there will be a UCS-2 CMap if (parent.getCMapUCS2() != null) { cid = parent.getCMapUCS2().toCID(unicode); } } // otherwise we require an explicit ToUnicode CMap if (cid == -1) { // todo: invert the ToUnicode CMap? cid = 0; } } else { // a non-embedded font always has a cmap (otherwise it we wouldn't load it) cid = cmap.getGlyphId(unicode); } if (cid == 0) { throw new IllegalArgumentException( String.format("No glyph for U+%04X in font %s", unicode, getName())); } // CID is always 2-bytes (16-bit) for TrueType return new byte[] { (byte)(cid >> 8 & 0xff), (byte)(cid & 0xff) }; } @Override public boolean isEmbedded() { return isEmbedded; } @Override public boolean isDamaged() { return isDamaged; } /** * Returns the embedded or substituted TrueType font. May be an OpenType font if the font is * not embedded. */ public TrueTypeFont getTrueTypeFont() { return ttf; } @Override public Path getPath(int code) throws IOException { if (ttf instanceof OpenTypeFont && ((OpenTypeFont) ttf).isPostScript()) { int cid = codeToCID(code); Type2CharString charstring = ((OpenTypeFont)ttf).getCFF().getFont().getType2CharString(cid); return charstring.getPath(); } else { int gid = codeToGID(code); GlyphData glyph = ttf.getGlyph().getGlyph(gid); if (glyph != null) { return glyph.getPath(); } return new Path(); } } @Override public boolean hasGlyph(int code) throws IOException { return codeToGID(code) != 0; } }