/* * Copyright 2006-2017 ICEsoft Technologies Canada Corp. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the * License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an "AS * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.icepdf.core.pobjects.fonts; import org.icepdf.core.pobjects.Dictionary; import org.icepdf.core.pobjects.Name; import org.icepdf.core.pobjects.Resources; import org.icepdf.core.util.Library; import java.util.HashMap; /** * <p>This class represents a PDF object which has a subtype value equal to "Font". * The class does the necessary encoding and Cmap manipulation to allow the proper * display of text that uses this font object.</p> * <br> * <p>This class is generally only used by the ContentParser for laying out * text for display and for text extraction. There are two categories of PDF fonts: * Simple and Composite.<p> * <br> * <h3>Simple Fonts</h3> * <p>There are several types of simple font; all of which have the following * properties:</p> * <ul> * <li>Glyphs in the font are selected by single-byte character codes obtained from a * string that is shown by the text-showing operators. Logically, these codes * index into a table of 256 glyphs; the mapping from codes to glyphs is called * the font's encoding. Each font program has a built-in encoding. Under some * circumstances, the encoding can be altered by means described in Section * 5.5.5, "Character Encoding."</li> * <li>Each glyph has a single set of metrics, including a horizontal displacement * or width, as described in Section 5.1.3, "Glyph Positioning and Metrics." * That is, simple fonts support only horizontal writing mode.</li> * <li>Except for Type 3 fonts and certain standard Type 1 fonts, every font * dictionary contains a subsidiary dictionary, the font descriptor, * containing fontwide metrics and other attributes of the font; see Section * 5.7, "Font Descriptors." Among those attributes is an optional font file * stream containing the font program itself.</li> * </ul> * <br> * <h3>Composite Fonts</h3> * <p>A composite font, also called Type0 font, is one whose glyphs are obtained * from a font like object called a CIDFont. A composite font is represented by * a font dictionary whose Subtype value is Type0. The Type 0 font is known as * the root font, and its associated CID Font is called its descendant.</p> * * @since 1.0 */ public abstract class Font extends Dictionary { public static final Name TYPE = new Name("Font"); public static final Name NAME_KEY = new Name("Name"); public static final Name BASEFONT_KEY = new Name("BaseFont"); public static final Name ENCODING_KEY = new Name("Encoding"); public static final Name FIRST_CHAR_KEY = new Name("FirstChar"); public static final Name LAST_CHAR_KEY = new Name("LastChar"); /** * All glyphs have the same width (as opposed to proportional or * variable-pitch fonts, which have different widths). */ public static final int FONT_FLAG_FIXED_PITCH = 0x1; // bit 1 /** * Glyphs have serifs, which are short strokes drawn at an angle on the top * and bottom of glyph stems. (Sans serif fonts do not have serifs.) */ public static final int FONT_FLAG_SERIF = 0x2; // bit 2 /** * Font contains glyphs outside the Adobe standard Latin character set. * This flag and the Nonsymbolic flag shall not both be set or both be clear. */ public static final int FONT_FLAG_SYMBOLIC = 0x4; // bit 3 /** * Glyphs resemble cursive handwriting. */ public static final int FONT_FLAG_SCRIPT = 0x8; // bit 4 /** * Font uses the Adobe standard Latin character set or a subset of it. */ public static final int FONT_FLAG_NON_SYMBOLIC = 0x20; // bit 6 /** * Glyphs have dominant vertical strokes that are slanted. */ public static final int FONT_FLAG_ITALIC = 0x40; // bit 7 /** * Font contains no lowercase letters; typically used for display purposes, * such as for titles or headlines. */ public static final int FONT_FLAG_ALL_CAP = 0x10000; // bit 17 /** * Glyphs have dominant vertical strokes that are slanted. */ public static final int FONT_FLAG_SMALL_CAP = 0x20000; // bit 18 /** * Font contains no lowercase letters; typically used for display purposes, * such as for titles or headlines. */ public static final int FONT_FLAG_FORCE_BOLD = 0x40000; // bit 19 // Object name always "Font" protected Name name; // The name of the object, Font protected String basefont; // The font subtype, type 0, 1, 2 etc. protected Name subtype; // the encoding name associated with font. protected Name encoding; /** * <p>Indicates that the font used to render this String object is in the * Simple Font family and thus each glyph is represented by one byte.</p> */ public static final int SIMPLE_FORMAT = 1; /** * <p>Indicates that the font used to render this String object is in the * Composite Font family and thus each glyph is represented by at least * one byte.</p> */ public static final int CID_FORMAT = 2; // supType Format, either simple or CID. protected int subTypeFormat = SIMPLE_FORMAT; // The actual Java font that will be used to display the Glyphs protected FontFile font; // The first character code defined in the font's Widths array. protected int firstchar = 32; protected int lastchar = 255; // Font Descriptor used protected FontDescriptor fontDescriptor; // initiated flag protected boolean inited; // AFM flag protected boolean isAFMFont; // vertical writing flag; protected boolean isVerticalWriting; // font substitution being used protected boolean isFontSubstitution; // parent resource, needed by some type3 fonts to access resources. protected Resources parentResource; /** * Map named CMap to Unicode mapping. */ protected static final String[][] TO_UNICODE = { // format: <canonical> <map1> <map2> ... // Chinese (Simplified) {"GBpc-EUC-UCS2", "GBpc-EUC-H", "GBpc-EUC-V"}, {"GBK-EUC-UCS2", "GBK-EUC-H", "GBK-EUC-V"}, {"UniGB-UCS2-H", "GB-EUC-H", "GBT-EUC-H", "GBK2K-H", "GBKp-EUC-H"}, {"UniGB-UCS2-V", "GB-EUC-V", "GBT-EUC-V", "GBK2K-V", "GBKp-EUC-V"}, // Chinese (Traditional) {"B5pc-UCS2", "B5pc-H", "B5pc-V"}, {"ETen-B5-UCS2", "ETen-B5-H", "ETen-B5-V", "ETenms-B5-H", "ETenms-B5-V"}, {"UniCNS-UCS2-H", "HKscs-B5-H", "CNS-EUC-H"}, {"UniCNS-UCS2-V", "HKscs-B5-V", "CNS-EUC-V"}, // Japanese {"90pv-RKSJ-UCS2", "90pv-RKSJ-H", "83pv-RKSJ-H"}, {"90ms-RKSJ-UCS2", "90ms-RKSJ-H", "90ms-RKSJ-V", "90msp-RKSJ-H", "90msp-RKSJ-V"}, {"UniJIS-UCS2-H", "Ext-RKSJ-H", "H", "Add-RKSJ-H", "EUC-H"}, {"UniJIS-UCS2-V", "Ext-RKSJ-V", "V", "Add-RKSJ-V", "EUC-V"}, // Korean {"KSCms-UHC-UCS2", "KSCms-UHC-H", "KSCms-UHC-V", "KSCms-UHC-HW-H", "KSCms-UHC-HW-V"}, {"KSCpc-EUC-UCS2", "KSCpc-EUC-H"}, {"UniKS-UCS2-H", "KSC-EUC-H"}, {"UniKS-UCS2-V", "KSC-EUC-V"} }; // core 14 AFM names protected static final String[] CORE14 = { "Times-Roman", "Times-Bold", "Times-Italic", "Times-BoldItalic", "Helvetica", "Helvetica-Bold", "Helvetica-Oblique", "Helvetica-BoldOblique", "Courier", "Courier-Bold", "Courier-Oblique", "Courier-BoldOblique", "Symbol", "ZapfDingbats" }; // type1 font names. protected static final String[][] TYPE1_FONT_NAME = { {"Times-Roman", "Times New Roman", "TimesNewRoman", "TimesNewRomanPS", "TimesNewRomanPSMT"}, {"Times-Bold", "TimesNewRoman,Bold", "TimesNewRoman-Bold", "TimesNewRomanPS-Bold", "TimesNewRomanPS-BoldMT"}, {"Times-Italic", "TimesNewRoman,Italic", "TimesNewRoman-Italic", "TimesNewRomanPS-Italic", "TimesNewRomanPS-ItalicMT"}, {"Times-BoldItalic", "TimesNewRoman,BoldItalic", "TimesNewRoman-BoldItalic", "TimesNewRomanPS-BoldItalic", "TimesNewRomanPS-BoldItalicMT"}, {"Helvetica", "Arial", "ArialMT"}, {"Helvetica-Bold", "Helvetica,Bold", "Arial,Bold", "Arial-Bold", "Arial-BoldMT"}, {"Helvetica-Oblique", "Helvetica,Italic", "Helvetica-Italic", "Arial,Italic", "Arial-Italic", "Arial-ItalicMT"}, {"Helvetica-BoldOblique", "Helvetica,BoldItalic", "Helvetica-BoldItalic", "Arial,BoldItalic", "Arial-BoldItalic", "Arial-BoldItalicMT"}, {"Courier", "CourierNew", "CourierNewPSMT"}, {"Courier-Bold", "Courier,Bold", "CourierNew,Bold", "CourierNew-Bold", "CourierNewPS-BoldMT"}, {"Courier-Oblique", "Courier,Italic", "CourierNew-Italic", "CourierNew,Italic", "CourierNewPS-ItalicMT"}, {"Courier-BoldOblique", "Courier,BoldItalic", "CourierNew-BoldItalic", "CourierNew,BoldItalic", "CourierNewPS-BoldItalicMT"}, {"Symbol"}, {"ZapfDingbats", "Zapf-Dingbats", "Dingbats"} }; /** * Creates a new instance of a PDF Font. * * @param library Libaray of all objects in PDF * @param entries hash of parsed font attributes */ public Font(Library library, HashMap entries) { super(library, entries); // name of object "Font" name = library.getName(entries, NAME_KEY); // Type of the font, type 0, 1, 2, 3 etc. subtype = library.getName(entries, SUBTYPE_KEY); encoding = library.getName(entries, ENCODING_KEY); // figure out type if (subtype != null) { subTypeFormat = (subtype.getName().toLowerCase().equals("type0") || subtype.getName().toLowerCase().contains("cid")) ? CID_FORMAT : SIMPLE_FORMAT; } int tmpInt = library.getInt(entries, FIRST_CHAR_KEY); if (tmpInt != 0) { firstchar = tmpInt; } tmpInt = library.getInt(entries, LAST_CHAR_KEY); if (tmpInt != 0) { lastchar = tmpInt; } // font name, SanSerif is used as it has a a robust CID, and it // is the most commonly used font family for pdfs basefont = "Serif"; Object tmp = entries.get(BASEFONT_KEY); if (tmp != null && tmp instanceof Name) { basefont = ((Name) tmp).getName(); } } /** * Initiate the font. Retrieve any needed attributes, basically set up the * font so it can be used by the content parser. */ public abstract void init(); /** * Gets the base name of the core 14 fonts, null if it does not match * * @param name name of font to search for canonical name * @return core 14 font name */ protected String getCanonicalName(String name) { for (String[] aTYPE1_FONT_NAME : TYPE1_FONT_NAME) { for (String anATYPE1_FONT_NAME : aTYPE1_FONT_NAME) { if (name.startsWith(anATYPE1_FONT_NAME)) { return aTYPE1_FONT_NAME[0]; } } } return null; } /** * Gets the fonts base name. * * @return fonts base name, "Serif" if none specified. */ public String getBaseFont() { return basefont; } /** * Gets the font name. * * @return string representing the font name */ public Name getName() { return name; } /** * Gets the font subtype value. * * @return string representing the font subtype */ public Name getSubType() { return subtype; } /** * Gets the font subtype format * * @return SIMPLE_FORMAT or CID_FORMAT. */ public int getSubTypeFormat() { return subTypeFormat; } /** * Gets the font encoding name. * * @return font encoding name. */ public Name getEncoding() { return encoding; } /** * <p>Returns a font which can be used to paint the glyphs in the character * set.</p> * * @return value of embedded font. */ public FontFile getFont() { return font; } /** * <p>Returns true if the writing mode is vertical; false, otherwise</p> * * @return true if the writing mode is vertical; false, otherwise. */ public boolean isVerticalWriting() { return isVerticalWriting; } /** * <p>Indicates that this font is an Adobe Core 14 font. </p> * * @return true, if font is a core 14 font; false otherwise. */ public boolean isAFMFont() { return isAFMFont; } public boolean isFontSubstitution() { return isFontSubstitution; } /** * <p>Returns true if the font name is one of the core 14 fonts specified by * Adobe.</p> * * @param fontName name to test if a core 14 font. * @return true, if font name is a core 14 font; false, otherwise. */ public boolean isCore14(String fontName) { for (String aCORE14 : CORE14) { if (fontName.startsWith(aCORE14)) { return true; } } return false; } /** * String representation of the Font object. * * @return string representing Font object attributes. */ public String toString() { return getPObjectReference() + " FONT= " + basefont + " " + entries.toString(); } public Resources getParentResource() { return parentResource; } public void setParentResource(Resources parentResource) { this.parentResource = parentResource; } }