/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pdfbox.pdmodel.font;
import java.awt.geom.GeneralPath;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashSet;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.fontbox.cmap.CMap;
import org.apache.fontbox.ttf.TTFParser;
import org.apache.fontbox.ttf.TrueTypeFont;
import org.apache.fontbox.util.BoundingBox;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.Matrix;
import org.apache.pdfbox.util.Vector;
/**
* A Composite (Type 0) font.
*
* @author Ben Litchfield
*/
public class PDType0Font extends PDFont implements PDVectorFont
{
private static final Log LOG = LogFactory.getLog(PDType0Font.class);
private final PDCIDFont descendantFont;
private final Set<Integer> noUnicode = new HashSet<>();
private CMap cMap, cMapUCS2;
private boolean isCMapPredefined;
private boolean isDescendantCJK;
private PDCIDFontType2Embedder embedder;
/**
* Constructor for reading a Type0 font from a PDF file.
*
* @param fontDictionary The font dictionary according to the PDF specification.
* @throws IOException if the descendant font is missing.
*/
public PDType0Font(COSDictionary fontDictionary) throws IOException
{
super(fontDictionary);
COSArray descendantFonts = (COSArray)dict.getDictionaryObject(COSName.DESCENDANT_FONTS);
COSDictionary descendantFontDictionary = (COSDictionary) descendantFonts.getObject(0);
if (descendantFontDictionary == null)
{
throw new IOException("Missing descendant font dictionary");
}
descendantFont = PDFontFactory.createDescendantFont(descendantFontDictionary, this);
readEncoding();
fetchCMapUCS2();
}
/**
* Private. Creates a new TrueType font for embedding.
*/
private PDType0Font(PDDocument document, TrueTypeFont ttf, boolean embedSubset)
throws IOException
{
embedder = new PDCIDFontType2Embedder(document, dict, ttf, embedSubset, this);
descendantFont = embedder.getCIDFont();
readEncoding();
fetchCMapUCS2();
}
/**
* Loads a TTF to be embedded into a document as a Type 0 font.
*
* @param doc The PDF document that will hold the embedded font.
* @param file A TrueType font.
* @return A Type0 font with a CIDFontType2 descendant.
* @throws IOException If there is an error reading the font file.
*/
public static PDType0Font load(PDDocument doc, File file) throws IOException
{
return new PDType0Font(doc, new TTFParser().parse(file), true);
}
/**
* Loads a TTF to be embedded into a document as a Type 0 font.
*
* @param doc The PDF document that will hold the embedded font.
* @param input A TrueType font.
* @return A Type0 font with a CIDFontType2 descendant.
* @throws IOException If there is an error reading the font stream.
*/
public static PDType0Font load(PDDocument doc, InputStream input) throws IOException
{
return new PDType0Font(doc, new TTFParser().parse(input), true);
}
/**
* Loads a TTF to be embedded into a document as a Type 0 font.
*
* @param doc The PDF document that will hold the embedded font.
* @param input A TrueType font.
* @param embedSubset True if the font will be subset before embedding
* @return A Type0 font with a CIDFontType2 descendant.
* @throws IOException If there is an error reading the font stream.
*/
public static PDType0Font load(PDDocument doc, InputStream input, boolean embedSubset)
throws IOException
{
return new PDType0Font(doc, new TTFParser().parse(input), embedSubset);
}
/**
* Loads a TTF to be embedded into a document as a Type 0 font.
*
* @param doc The PDF document that will hold the embedded font.
* @param ttf A TrueType font.
* @param embedSubset True if the font will be subset before embedding
* @return A Type0 font with a CIDFontType2 descendant.
* @throws IOException If there is an error reading the font stream.
*/
public static PDType0Font load(PDDocument doc, TrueTypeFont ttf, boolean embedSubset)
throws IOException
{
return new PDType0Font(doc, ttf, embedSubset);
}
@Override
public void addToSubset(int codePoint)
{
if (!willBeSubset())
{
throw new IllegalStateException("This font was created with subsetting disabled");
}
embedder.addToSubset(codePoint);
}
@Override
public void subset() throws IOException
{
if (!willBeSubset())
{
throw new IllegalStateException("This font was created with subsetting disabled");
}
embedder.subset();
}
@Override
public boolean willBeSubset()
{
return embedder != null && embedder.needsSubset();
}
/**
* Reads the font's Encoding entry, which should be a CMap name/stream.
*/
private void readEncoding() throws IOException
{
COSBase encoding = dict.getDictionaryObject(COSName.ENCODING);
if (encoding instanceof COSName)
{
// predefined CMap
COSName encodingName = (COSName) encoding;
cMap = CMapManager.getPredefinedCMap(encodingName.getName());
if (cMap != null)
{
isCMapPredefined = true;
}
else
{
throw new IOException("Missing required CMap");
}
}
else if (encoding != null)
{
cMap = readCMap(encoding);
if (cMap == null)
{
throw new IOException("Missing required CMap");
}
else if (!cMap.hasCIDMappings())
{
LOG.warn("Invalid Encoding CMap in font " + getName());
}
}
// check if the descendant font is CJK
PDCIDSystemInfo ros = descendantFont.getCIDSystemInfo();
if (ros != null)
{
isDescendantCJK = ros.getRegistry().equals("Adobe") &&
(ros.getOrdering().equals("GB1") ||
ros.getOrdering().equals("CNS1") ||
ros.getOrdering().equals("Japan1") ||
ros.getOrdering().equals("Korea1"));
}
}
/**
* Fetches the corresponding UCS2 CMap if the font's CMap is predefined.
*/
private void fetchCMapUCS2() throws IOException
{
// if the font is composite and uses a predefined cmap (excluding Identity-H/V)
// or whose descendant CIDFont uses the Adobe-GB1, Adobe-CNS1, Adobe-Japan1, or
// Adobe-Korea1 character collection:
COSName name = dict.getCOSName(COSName.ENCODING);
if (isCMapPredefined && !(name == COSName.IDENTITY_H || name == COSName.IDENTITY_V) ||
isDescendantCJK)
{
// a) Map the character code to a CID using the font's CMap
// b) Obtain the ROS from the font's CIDSystemInfo
// c) Construct a second CMap name by concatenating the ROS in the format "R-O-UCS2"
// d) Obtain the CMap with the constructed name
// e) Map the CID according to the CMap from step d), producing a Unicode value
// todo: not sure how to interpret the PDF spec here, do we always override? or only when Identity-H/V?
String strName = null;
if (isDescendantCJK)
{
strName = descendantFont.getCIDSystemInfo().getRegistry() + "-" +
descendantFont.getCIDSystemInfo().getOrdering() + "-" +
descendantFont.getCIDSystemInfo().getSupplement();
}
else if (name != null)
{
strName = name.getName();
}
// try to find the corresponding Unicode (UC2) CMap
if (strName != null)
{
CMap cMap = CMapManager.getPredefinedCMap(strName);
if (cMap != null)
{
String ucs2Name = cMap.getRegistry() + "-" + cMap.getOrdering() + "-UCS2";
CMap ucs2CMap = CMapManager.getPredefinedCMap(ucs2Name);
if (ucs2CMap != null)
{
cMapUCS2 = ucs2CMap;
}
}
}
}
}
/**
* Returns the PostScript name of the font.
*/
public String getBaseFont()
{
return dict.getNameAsString(COSName.BASE_FONT);
}
/**
* Returns the descendant font.
*/
public PDCIDFont getDescendantFont()
{
return descendantFont;
}
/**
* Returns the font's CMap.
*/
public CMap getCMap()
{
return cMap;
}
/**
* Returns the font's UCS2 CMap, only present this font uses a predefined CMap.
*/
public CMap getCMapUCS2()
{
return cMapUCS2;
}
@Override
public PDFontDescriptor getFontDescriptor()
{
return descendantFont.getFontDescriptor();
}
@Override
public Matrix getFontMatrix()
{
return descendantFont.getFontMatrix();
}
@Override
public boolean isVertical()
{
return cMap.getWMode() == 1;
}
@Override
public float getHeight(int code) throws IOException
{
return descendantFont.getHeight(code);
}
@Override
protected byte[] encode(int unicode) throws IOException
{
return descendantFont.encode(unicode);
}
@Override
public float getAverageFontWidth()
{
return descendantFont.getAverageFontWidth();
}
@Override
public Vector getPositionVector(int code)
{
// units are always 1/1000 text space, font matrix is not used, see FOP-2252
return descendantFont.getPositionVector(code).scale(-1 / 1000f);
}
@Override
public Vector getDisplacement(int code) throws IOException
{
if (isVertical())
{
return new Vector(0, descendantFont.getVerticalDisplacementVectorY(code) / 1000f);
}
else
{
return super.getDisplacement(code);
}
}
@Override
public float getWidth(int code) throws IOException
{
return descendantFont.getWidth(code);
}
@Override
protected float getStandard14Width(int code)
{
throw new UnsupportedOperationException("not suppported");
}
@Override
public float getWidthFromFont(int code) throws IOException
{
return descendantFont.getWidthFromFont(code);
}
@Override
public boolean isEmbedded()
{
return descendantFont.isEmbedded();
}
@Override
public String toUnicode(int code) throws IOException
{
// try to use a ToUnicode CMap
String unicode = super.toUnicode(code);
if (unicode != null)
{
return unicode;
}
if ((isCMapPredefined || isDescendantCJK) && cMapUCS2 != null)
{
// if the font is composite and uses a predefined cmap (excluding Identity-H/V) then
// or if its decendant font uses Adobe-GB1/CNS1/Japan1/Korea1
// a) Map the character code to a character identifier (CID) according to the font?s CMap
int cid = codeToCID(code);
// e) Map the CID according to the CMap from step d), producing a Unicode value
return cMapUCS2.toUnicode(cid);
}
else
{
if (LOG.isWarnEnabled() && !noUnicode.contains(code))
{
// if no value has been produced, there is no way to obtain Unicode for the character.
String cid = "CID+" + codeToCID(code);
LOG.warn("No Unicode mapping for " + cid + " (" + code + ") in font " + getName());
// we keep track of which warnings have been issued, so we don't log multiple times
noUnicode.add(code);
}
return null;
}
}
@Override
public String getName()
{
return getBaseFont();
}
@Override
public BoundingBox getBoundingBox() throws IOException
{
// Will be cached by underlying font
return descendantFont.getBoundingBox();
}
@Override
public int readCode(InputStream in) throws IOException
{
return cMap.readCode(in);
}
/**
* Returns the CID for the given character code. If not found then CID 0 is returned.
*
* @param code character code
* @return CID
*/
public int codeToCID(int code)
{
return descendantFont.codeToCID(code);
}
/**
* Returns the GID for the given character code.
*
* @param code character code
* @return GID
*/
public int codeToGID(int code) throws IOException
{
return descendantFont.codeToGID(code);
}
@Override
public boolean isStandard14()
{
return false;
}
@Override
public boolean isDamaged()
{
return descendantFont.isDamaged();
}
@Override
public String toString()
{
String descendant = null;
if (getDescendantFont() != null)
{
descendant = getDescendantFont().getClass().getSimpleName();
}
return getClass().getSimpleName() + "/" + descendant + " " + getBaseFont();
}
@Override
public GeneralPath getPath(int code) throws IOException
{
return descendantFont.getPath(code);
}
@Override
public GeneralPath getNormalizedPath(int code) throws IOException
{
return descendantFont.getNormalizedPath(code);
}
@Override
public boolean hasGlyph(int code) throws IOException
{
return descendantFont.hasGlyph(code);
}
}