/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.tom_roush.pdfbox.pdmodel.font; import android.graphics.Path; import android.util.Log; import com.tom_roush.fontbox.FontBoxFont; import com.tom_roush.pdfbox.cos.COSBase; import com.tom_roush.pdfbox.cos.COSDictionary; import com.tom_roush.pdfbox.cos.COSName; import com.tom_roush.pdfbox.pdmodel.font.encoding.DictionaryEncoding; import com.tom_roush.pdfbox.pdmodel.font.encoding.Encoding; import com.tom_roush.pdfbox.pdmodel.font.encoding.GlyphList; import com.tom_roush.pdfbox.pdmodel.font.encoding.MacRomanEncoding; import com.tom_roush.pdfbox.pdmodel.font.encoding.StandardEncoding; import com.tom_roush.pdfbox.pdmodel.font.encoding.WinAnsiEncoding; import java.io.IOException; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; /** * A simple font. Simple fonts use a PostScript encoding vector. * * @author John Hewson */ public abstract class PDSimpleFont extends PDFont { protected Encoding encoding; protected GlyphList glyphList; private Boolean isSymbolic; private final Set<Integer> noUnicode = new HashSet<Integer>(); // for logging private Map<String, Integer> invertedEncoding; // for writing /** * Constructor for embedding. */ PDSimpleFont() { super(); } /** * Constructor for Standard 14. */ PDSimpleFont(String baseFont) { super(baseFont); this.encoding = WinAnsiEncoding.INSTANCE; // assign the glyph list based on the font if ("ZapfDingbats".equals(baseFont)) { glyphList = GlyphList.getZapfDingbats(); } else { glyphList = GlyphList.getAdobeGlyphList(); } } /** * Constructor. * * @param fontDictionary Font dictionary. */ PDSimpleFont(COSDictionary fontDictionary) throws IOException { super(fontDictionary); } /** * Reads the Encoding from the Font dictionary or the embedded or substituted font file. * Must be called at the end of any subclass constructors. * * @throws IOException if the font file could not be read */ protected void readEncoding() throws IOException { COSBase encoding = dict.getDictionaryObject(COSName.ENCODING); if (encoding != null) { if (encoding instanceof COSName) { COSName encodingName = (COSName)encoding; this.encoding = Encoding.getInstance(encodingName); if (this.encoding == null) { Log.w("PdfBox-Android", "Unknown encoding: " + encodingName.getName()); this.encoding = readEncodingFromFont(); } } else if (encoding instanceof COSDictionary) { COSDictionary encodingDict = (COSDictionary)encoding; Encoding builtIn = null; Boolean symbolic = getSymbolicFlag(); boolean isFlaggedAsSymbolic = symbolic != null && symbolic; if (!encodingDict.containsKey(COSName.BASE_ENCODING) && isFlaggedAsSymbolic) { builtIn = readEncodingFromFont(); } if (symbolic == null) { symbolic = false; } this.encoding = new DictionaryEncoding(encodingDict, !symbolic, builtIn); } } else { this.encoding = readEncodingFromFont(); } // normalise the standard 14 name, e.g "Symbol,Italic" -> "Symbol" String standard14Name = Standard14Fonts.getMappedFontName(getName()); // assign the glyph list based on the font if ("ZapfDingbats".equals(standard14Name)) { glyphList = GlyphList.getZapfDingbats(); } else { // StandardEncoding and Symbol are in the AGL glyphList = GlyphList.getAdobeGlyphList(); } } private void readEncodingFromDictionary(COSDictionary encodingDict) throws IOException { } private void readEncodingFromName(COSName encodingName) throws IOException { this.encoding = Encoding.getInstance(encodingName); if (this.encoding == null) { Log.w("PdfBox-Android", "Unknown encoding: " + encodingName.getName()); // fallback this.encoding = readEncodingFromFont(); } } /** * Called by readEncoding() if the encoding needs to be extracted from the font file. * * @throws IOException if the font file could not be read. */ protected abstract Encoding readEncodingFromFont() throws IOException; /** * Returns the Encoding vector. */ public Encoding getEncoding() { return encoding; } /** * Returns the Encoding vector. */ public GlyphList getGlyphList() { return glyphList; } /** * Inverts the font's Encoding. Any duplicate (Name -> Code) mappings will be lost. */ protected Map<String, Integer> getInvertedEncoding() { if (invertedEncoding != null) { return invertedEncoding; } invertedEncoding = new HashMap<String, Integer>(); Map<Integer, String> codeToName = encoding.getCodeToNameMap(); for (Map.Entry<Integer, String> entry : codeToName.entrySet()) { if (!invertedEncoding.containsKey(entry.getValue())) { invertedEncoding.put(entry.getValue(), entry.getKey()); } } return invertedEncoding; } /** * Returns true the font is a symbolic (that is, it does not use the Adobe Standard Roman * character set). */ public final boolean isSymbolic() { if (isSymbolic == null) { Boolean result = isFontSymbolic(); if (result != null) { isSymbolic = result; } else { // unless we can prove that the font is non-symbolic, we assume that it is not isSymbolic = true; } } return isSymbolic; } /** * Internal implementation of isSymbolic, allowing for the fact that the result may be * indeterminate. */ protected Boolean isFontSymbolic() { Boolean result = getSymbolicFlag(); if (result != null) { return result; } else if (isStandard14()) { String mappedName = Standard14Fonts.getMappedFontName(getName()); return mappedName.equals("Symbol") || mappedName.equals("ZapfDingbats"); } else { if (encoding == null) { // sanity check, should never happen if (!(this instanceof PDTrueTypeFont)) { throw new IllegalStateException("PDFBox bug: encoding should not be null!"); } // TTF without its non-symbolic flag set must be symbolic return true; } else if (encoding instanceof WinAnsiEncoding || encoding instanceof MacRomanEncoding || encoding instanceof StandardEncoding) { return false; } else if (encoding instanceof DictionaryEncoding) { // each name in Differences array must also be in the latin character set for (String name : ((DictionaryEncoding)encoding).getDifferences().values()) { if (name.equals(".notdef")) { // skip } else if (!(WinAnsiEncoding.INSTANCE.contains(name) && MacRomanEncoding.INSTANCE.contains(name) && StandardEncoding.INSTANCE.contains(name))) { return true; } } return false; } else { // we don't know return null; } } } /** * Returns the value of the symbolic flag, allowing for the fact that the result may be * indeterminate. */ protected final Boolean getSymbolicFlag() { if (getFontDescriptor() != null) { // fixme: isSymbolic() defaults to false if the flag is missing so we can't trust this return getFontDescriptor().isSymbolic(); } return null; } @Override public String toUnicode(int code) throws IOException { return toUnicode(code, GlyphList.getAdobeGlyphList()); } @Override public String toUnicode(int code, GlyphList customGlyphList) throws IOException { // allow the glyph list to be overridden for the purpose of extracting Unicode // we only do this when the font's glyph list is the AGL, to avoid breaking Zapf Dingbats GlyphList unicodeGlyphList; if (this.glyphList == GlyphList.getAdobeGlyphList()) { unicodeGlyphList = customGlyphList; } else { unicodeGlyphList = this.glyphList; } // first try to use a ToUnicode CMap String unicode = super.toUnicode(code); if (unicode != null) { return unicode; } // if the font is a "simple font" and uses MacRoman/MacExpert/WinAnsi[Encoding] // or has Differences with names from only Adobe Standard and/or Symbol, then: // // a) Map the character codes to names // b) Look up the name in the Adobe Glyph List to obtain the Unicode value String name = null; if (encoding != null) { name = encoding.getName(code); unicode = unicodeGlyphList.toUnicode(name); if (unicode != null) { return unicode; } } // if no value has been produced, there is no way to obtain Unicode for the character. if (!noUnicode.contains(code)) { // we keep track of which warnings have been issued, so we don't log multiple times noUnicode.add(code); if (name != null) { Log.w("PdfBox-Android", "No Unicode mapping for " + name + " (" + code + ") in font " + getName()); } else { Log.w("PdfBox-Android", "No Unicode mapping for character code " + code + " in font " + getName()); } } return null; } @Override public boolean isVertical() { return false; } @Override protected final float getStandard14Width(int code) { if (getStandard14AFM() != null) { String nameInAFM = getEncoding().getName(code); // the Adobe AFMs don't include .notdef, but Acrobat uses 250, test with PDFBOX-2334 if (nameInAFM.equals(".notdef")) { return 250f; } return getStandard14AFM().getCharacterWidth(nameInAFM); } throw new IllegalStateException("No AFM"); } @Override public boolean isStandard14() { // this logic is based on Acrobat's behaviour, see see PDFBOX-2372 // the Encoding entry cannot have Differences if we want "standard 14" font handling if (getEncoding() instanceof DictionaryEncoding) { DictionaryEncoding dictionary = (DictionaryEncoding)getEncoding(); if (dictionary.getDifferences().size() > 0) { // we also require that the differences are actually different, see PDFBOX-1900 with // the file from PDFBOX-2192 on Windows Encoding baseEncoding = dictionary.getBaseEncoding(); for (Map.Entry<Integer, String> entry : dictionary.getDifferences().entrySet()) { if (!entry.getValue().equals(baseEncoding.getName(entry.getKey()))) { return false; } } } } return super.isStandard14(); } /** * Returns the path for the character with the given name. For some fonts, GIDs may be used * instead of names when calling this method. * * @return glyph path * @throws IOException if the path could not be read */ public abstract Path getPath(String name) throws IOException; /** * Returns true if the font contains the character with the given name. * * @throws IOException if the path could not be read */ public abstract boolean hasGlyph(String name) throws IOException; /** * Returns the embedded or system font used for rendering. This is never null. */ public abstract FontBoxFont getFontBoxFont(); @Override public void addToSubset(int codePoint) { throw new UnsupportedOperationException(); } @Override public void subset() throws IOException { // only TTF subsetting via PDType0Font is currently supported throw new UnsupportedOperationException(); } @Override public boolean willBeSubset() { return false; } }