/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pdfbox.pdmodel.font;
import java.awt.geom.AffineTransform;
import java.awt.geom.GeneralPath;
import java.awt.geom.Point2D;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.fontbox.EncodedFont;
import org.apache.fontbox.FontBoxFont;
import org.apache.fontbox.type1.DamagedFontException;
import org.apache.fontbox.type1.Type1Font;
import org.apache.fontbox.util.BoundingBox;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.common.PDStream;
import org.apache.pdfbox.pdmodel.font.encoding.Encoding;
import org.apache.pdfbox.pdmodel.font.encoding.StandardEncoding;
import org.apache.pdfbox.pdmodel.font.encoding.Type1Encoding;
import org.apache.pdfbox.pdmodel.font.encoding.WinAnsiEncoding;
import org.apache.pdfbox.pdmodel.font.encoding.ZapfDingbatsEncoding;
import org.apache.pdfbox.util.Matrix;
import static org.apache.pdfbox.pdmodel.font.UniUtil.getUniNameOfCodePoint;
import org.apache.pdfbox.pdmodel.font.encoding.SymbolEncoding;
/**
* A PostScript Type 1 Font.
*
* @author Ben Litchfield
*/
public class PDType1Font extends PDSimpleFont implements PDVectorFont
{
private static final Log LOG = LogFactory.getLog(PDType1Font.class);
// todo: replace with enum? or getters?
public static final PDType1Font TIMES_ROMAN = new PDType1Font("Times-Roman");
public static final PDType1Font TIMES_BOLD = new PDType1Font("Times-Bold");
public static final PDType1Font TIMES_ITALIC = new PDType1Font("Times-Italic");
public static final PDType1Font TIMES_BOLD_ITALIC = new PDType1Font("Times-BoldItalic");
public static final PDType1Font HELVETICA = new PDType1Font("Helvetica");
public static final PDType1Font HELVETICA_BOLD = new PDType1Font("Helvetica-Bold");
public static final PDType1Font HELVETICA_OBLIQUE = new PDType1Font("Helvetica-Oblique");
public static final PDType1Font HELVETICA_BOLD_OBLIQUE = new PDType1Font("Helvetica-BoldOblique");
public static final PDType1Font COURIER = new PDType1Font("Courier");
public static final PDType1Font COURIER_BOLD = new PDType1Font("Courier-Bold");
public static final PDType1Font COURIER_OBLIQUE = new PDType1Font("Courier-Oblique");
public static final PDType1Font COURIER_BOLD_OBLIQUE = new PDType1Font("Courier-BoldOblique");
public static final PDType1Font SYMBOL = new PDType1Font("Symbol");
public static final PDType1Font ZAPF_DINGBATS = new PDType1Font("ZapfDingbats");
// alternative names for glyphs which are commonly encountered
private static final Map<String, String> ALT_NAMES = new HashMap<>();
private static final int PFB_START_MARKER = 0x80;
static
{
ALT_NAMES.put("ff", "f_f");
ALT_NAMES.put("ffi", "f_f_i");
ALT_NAMES.put("ffl", "f_f_l");
ALT_NAMES.put("fi", "f_i");
ALT_NAMES.put("fl", "f_l");
ALT_NAMES.put("st", "s_t");
ALT_NAMES.put("IJ", "I_J");
ALT_NAMES.put("ij", "i_j");
ALT_NAMES.put("ellipsis", "elipsis"); // misspelled in ArialMT
}
/**
* embedded font.
*/
private final Type1Font type1font;
/**
* embedded or system font for rendering.
*/
private final FontBoxFont genericFont;
private final boolean isEmbedded;
private final boolean isDamaged;
private final AffineTransform fontMatrixTransform;
/**
* to improve encoding speed.
*/
private final Map <Integer,byte[]> codeToBytesMap;
private Matrix fontMatrix;
private BoundingBox fontBBox;
/**
* Creates a Type 1 standard 14 font for embedding.
*
* @param baseFont One of the standard 14 PostScript names
*/
private PDType1Font(String baseFont)
{
super(baseFont);
dict.setItem(COSName.SUBTYPE, COSName.TYPE1);
dict.setName(COSName.BASE_FONT, baseFont);
switch (baseFont)
{
case "ZapfDingbats":
encoding = ZapfDingbatsEncoding.INSTANCE;
break;
case "Symbol":
encoding = SymbolEncoding.INSTANCE;
break;
default:
encoding = WinAnsiEncoding.INSTANCE;
dict.setItem(COSName.ENCODING, COSName.WIN_ANSI_ENCODING);
break;
}
// standard 14 fonts may be accessed concurrently, as they are singletons
codeToBytesMap = new ConcurrentHashMap<>();
// todo: could load the PFB font here if we wanted to support Standard 14 embedding
type1font = null;
FontMapping<FontBoxFont> mapping = FontMappers.instance()
.getFontBoxFont(getBaseFont(),
getFontDescriptor());
genericFont = mapping.getFont();
if (mapping.isFallback())
{
String fontName;
try
{
fontName = genericFont.getName();
}
catch (IOException e)
{
fontName = "?";
}
LOG.warn("Using fallback font " + fontName + " for base font " + getBaseFont());
}
isEmbedded = false;
isDamaged = false;
fontMatrixTransform = new AffineTransform();
}
/**
* Creates a new Type 1 font for embedding.
*
* @param doc PDF document to write to
* @param pfbIn PFB file stream
* @throws IOException
*/
public PDType1Font(PDDocument doc, InputStream pfbIn) throws IOException
{
PDType1FontEmbedder embedder = new PDType1FontEmbedder(doc, dict, pfbIn, null);
encoding = embedder.getFontEncoding();
glyphList = embedder.getGlyphList();
type1font = embedder.getType1Font();
genericFont = embedder.getType1Font();
isEmbedded = true;
isDamaged = false;
fontMatrixTransform = new AffineTransform();
codeToBytesMap = new HashMap<>();
}
/**
* Creates a new Type 1 font for embedding.
*
* @param doc PDF document to write to
* @param pfbIn PFB file stream
* @param encoding
* @throws IOException
*/
public PDType1Font(PDDocument doc, InputStream pfbIn, Encoding encoding) throws IOException
{
PDType1FontEmbedder embedder = new PDType1FontEmbedder(doc, dict, pfbIn, encoding);
this.encoding = encoding;
glyphList = embedder.getGlyphList();
type1font = embedder.getType1Font();
genericFont = embedder.getType1Font();
isEmbedded = true;
isDamaged = false;
fontMatrixTransform = new AffineTransform();
codeToBytesMap = new HashMap<>();
}
/**
* Creates a Type 1 font from a Font dictionary in a PDF.
*
* @param fontDictionary font dictionary.
* @throws IOException if there was an error initializing the font.
* @throws IllegalArgumentException if /FontFile3 was used.
*/
public PDType1Font(COSDictionary fontDictionary) throws IOException
{
super(fontDictionary);
codeToBytesMap = new HashMap<>();
PDFontDescriptor fd = getFontDescriptor();
Type1Font t1 = null;
boolean fontIsDamaged = false;
if (fd != null)
{
// a Type1 font may contain a Type1C font
PDStream fontFile3 = fd.getFontFile3();
if (fontFile3 != null)
{
throw new IllegalArgumentException("Use PDType1CFont for FontFile3");
}
// or it may contain a PFB
PDStream fontFile = fd.getFontFile();
if (fontFile != null)
{
try
{
COSStream stream = fontFile.getCOSObject();
int length1 = stream.getInt(COSName.LENGTH1);
int length2 = stream.getInt(COSName.LENGTH2);
// repair Length1 and Length2 if necessary
byte[] bytes = fontFile.toByteArray();
length1 = repairLength1(bytes, length1);
length2 = repairLength2(bytes, length1, length2);
if (bytes.length > 0 && (bytes[0] & 0xff) == PFB_START_MARKER)
{
// some bad files embed the entire PFB, see PDFBOX-2607
t1 = Type1Font.createWithPFB(bytes);
}
else
{
// the PFB embedded as two segments back-to-back
byte[] segment1 = Arrays.copyOfRange(bytes, 0, length1);
byte[] segment2 = Arrays.copyOfRange(bytes, length1, length1 + length2);
// empty streams are simply ignored
if (length1 > 0 && length2 > 0)
{
t1 = Type1Font.createWithSegments(segment1, segment2);
}
}
}
catch (DamagedFontException e)
{
LOG.warn("Can't read damaged embedded Type1 font " + fd.getFontName());
fontIsDamaged = true;
}
catch (IOException e)
{
LOG.error("Can't read the embedded Type1 font " + fd.getFontName(), e);
fontIsDamaged = true;
}
}
}
isEmbedded = t1 != null;
isDamaged = fontIsDamaged;
type1font = t1;
// find a generic font to use for rendering, could be a .pfb, but might be a .ttf
if (type1font != null)
{
genericFont = type1font;
}
else
{
FontMapping<FontBoxFont> mapping = FontMappers.instance()
.getFontBoxFont(getBaseFont(), fd);
genericFont = mapping.getFont();
if (mapping.isFallback())
{
LOG.warn("Using fallback font " + genericFont.getName() + " for " + getBaseFont());
}
}
readEncoding();
fontMatrixTransform = getFontMatrix().createAffineTransform();
fontMatrixTransform.scale(1000, 1000);
}
/**
* Some Type 1 fonts have an invalid Length1, which causes the binary segment of the font
* to be truncated, see PDFBOX-2350, PDFBOX-3677.
*
* @param bytes Type 1 stream bytes
* @param length1 Length1 from the Type 1 stream
* @return repaired Length1 value
*/
private int repairLength1(byte[] bytes, int length1)
{
// scan backwards from the end of the first segment to find 'exec'
int offset = Math.max(0, length1 - 4);
if (offset <= 0 || offset > bytes.length - 4)
{
offset = bytes.length - 4;
}
offset = findBinaryOffsetAfterExec(bytes, offset);
if (offset == 0 && length1 > 0)
{
// 2nd try with brute force
offset = findBinaryOffsetAfterExec(bytes, bytes.length - 4);
}
if (length1 - offset != 0 && offset > 0)
{
if (LOG.isWarnEnabled())
{
LOG.warn("Ignored invalid Length1 " + length1 + " for Type 1 font " + getName());
}
return offset;
}
return length1;
}
private static int findBinaryOffsetAfterExec(byte[] bytes, int startOffset)
{
int offset = startOffset;
while (offset > 0)
{
if (bytes[offset + 0] == 'e'
&& bytes[offset + 1] == 'x'
&& bytes[offset + 2] == 'e'
&& bytes[offset + 3] == 'c')
{
offset += 4;
// skip additional CR LF space characters
while (offset < bytes.length &&
(bytes[offset] == '\r' || bytes[offset] == '\n' ||
bytes[offset] == ' ' || bytes[offset] == '\t'))
{
offset++;
}
break;
}
offset--;
}
return offset;
}
/**
* Some Type 1 fonts have an invalid Length2, see PDFBOX-3475. A negative /Length2 brings an
* IllegalArgumentException in Arrays.copyOfRange(), a huge value eats up memory because of
* padding.
*
* @param bytes Type 1 stream bytes
* @param length1 Length1 from the Type 1 stream
* @param length2 Length2 from the Type 1 stream
* @return repaired Length2 value
*/
private int repairLength2(byte[] bytes, int length1, int length2)
{
// repair Length2 if necessary
if (length2 < 0 || length2 > bytes.length - length1)
{
LOG.warn("Ignored invalid Length2 " + length2 + " for Type 1 font " + getName());
return bytes.length - length1;
}
return length2;
}
/**
* Returns the PostScript name of the font.
*/
public final String getBaseFont()
{
return dict.getNameAsString(COSName.BASE_FONT);
}
@Override
public float getHeight(int code) throws IOException
{
String name = codeToName(code);
if (getStandard14AFM() != null)
{
String afmName = getEncoding().getName(code);
return getStandard14AFM().getCharacterHeight(afmName); // todo: isn't this the y-advance, not the height?
}
else
{
// todo: should be scaled by font matrix
return (float) genericFont.getPath(name).getBounds().getHeight();
}
}
@Override
protected byte[] encode(int unicode) throws IOException
{
byte[] bytes = codeToBytesMap.get(unicode);
if (bytes != null)
{
return bytes;
}
String name = getGlyphList().codePointToName(unicode);
if (isStandard14())
{
// genericFont not needed, thus simplified code
// this is important on systems with no installed fonts
if (!encoding.contains(name))
{
throw new IllegalArgumentException(
String.format("U+%04X ('%s') is not available in this font %s encoding: %s",
unicode, name, getName(), encoding.getEncodingName()));
}
if (".notdef".equals(name))
{
throw new IllegalArgumentException(
String.format("No glyph for U+%04X in font %s", unicode, getName()));
}
}
else
{
if (!encoding.contains(name))
{
throw new IllegalArgumentException(
String.format("U+%04X ('%s') is not available in this font %s (generic: %s) encoding: %s",
unicode, name, getName(), genericFont.getName(), encoding.getEncodingName()));
}
String nameInFont = getNameInFont(name);
if (".notdef".equals(nameInFont) || !genericFont.hasGlyph(nameInFont))
{
throw new IllegalArgumentException(
String.format("No glyph for U+%04X in font %s (generic: %s)", unicode, getName(), genericFont.getName()));
}
}
Map<String, Integer> inverted = encoding.getNameToCodeMap();
int code = inverted.get(name);
bytes = new byte[] { (byte)code };
codeToBytesMap.put(code, bytes);
return bytes;
}
@Override
public float getWidthFromFont(int code) throws IOException
{
String name = codeToName(code);
// width of .notdef is ignored for substitutes, see PDFBOX-1900
if (!isEmbedded && name.equals(".notdef"))
{
return 250;
}
float width = genericFont.getWidth(name);
Point2D p = new Point2D.Float(width, 0);
fontMatrixTransform.transform(p, p);
return (float)p.getX();
}
@Override
public boolean isEmbedded()
{
return isEmbedded;
}
@Override
public float getAverageFontWidth()
{
if (getStandard14AFM() != null)
{
return getStandard14AFM().getAverageCharacterWidth();
}
else
{
return super.getAverageFontWidth();
}
}
@Override
public int readCode(InputStream in) throws IOException
{
return in.read();
}
@Override
protected Encoding readEncodingFromFont() throws IOException
{
if (!isEmbedded() && getStandard14AFM() != null)
{
// read from AFM
return new Type1Encoding(getStandard14AFM());
}
else
{
// extract from Type1 font/substitute
if (genericFont instanceof EncodedFont)
{
return Type1Encoding.fromFontBox(((EncodedFont) genericFont).getEncoding());
}
else
{
// default (only happens with TTFs)
return StandardEncoding.INSTANCE;
}
}
}
/**
* Returns the embedded or substituted Type 1 font, or null if there is none.
*/
public Type1Font getType1Font()
{
return type1font;
}
@Override
public FontBoxFont getFontBoxFont()
{
return genericFont;
}
@Override
public String getName()
{
return getBaseFont();
}
@Override
public BoundingBox getBoundingBox() throws IOException
{
if (fontBBox == null)
{
fontBBox = generateBoundingBox();
}
return fontBBox;
}
private BoundingBox generateBoundingBox() throws IOException
{
if (getFontDescriptor() != null) {
PDRectangle bbox = getFontDescriptor().getFontBoundingBox();
if (bbox != null &&
(bbox.getLowerLeftX() != 0 || bbox.getLowerLeftY() != 0 ||
bbox.getUpperRightX() != 0 || bbox.getUpperRightY() != 0))
{
return new BoundingBox(bbox.getLowerLeftX(), bbox.getLowerLeftY(),
bbox.getUpperRightX(), bbox.getUpperRightY());
}
}
return genericFont.getFontBBox();
}
//@Override
public String codeToName(int code) throws IOException
{
String name = getEncoding().getName(code);
return getNameInFont(name);
}
/**
* Maps a PostScript glyph name to the name in the underlying font, for example when
* using a TTF font we might map "W" to "uni0057".
*/
private String getNameInFont(String name) throws IOException
{
if (isEmbedded() || genericFont.hasGlyph(name))
{
return name;
}
else
{
// try alternative name
String altName = ALT_NAMES.get(name);
if (altName != null && !name.equals(".notdef") && genericFont.hasGlyph(altName))
{
return altName;
}
else
{
// try unicode name
String unicodes = getGlyphList().toUnicode(name);
if (unicodes != null && unicodes.length() == 1)
{
String uniName = getUniNameOfCodePoint(unicodes.codePointAt(0));
if (genericFont.hasGlyph(uniName))
{
return uniName;
}
}
}
}
return ".notdef";
}
@Override
public GeneralPath getPath(String name) throws IOException
{
// Acrobat does not draw .notdef for Type 1 fonts, see PDFBOX-2421
// I suspect that it does do this for embedded fonts though, but this is untested
if (name.equals(".notdef") && !isEmbedded)
{
return new GeneralPath();
}
else
{
return genericFont.getPath(getNameInFont(name));
}
}
@Override
public GeneralPath getPath(int code) throws IOException
{
String name = getEncoding().getName(code);
return getPath(name);
}
@Override
public GeneralPath getNormalizedPath(int code) throws IOException
{
String name = getEncoding().getName(code);
GeneralPath path = getPath(name);
if (path == null)
{
return getPath(".notdef");
}
return path;
}
@Override
public boolean hasGlyph(String name) throws IOException
{
return genericFont.hasGlyph(getNameInFont(name));
}
@Override
public boolean hasGlyph(int code) throws IOException
{
return !getEncoding().getName(code).equals(".notdef");
}
@Override
public final Matrix getFontMatrix()
{
if (fontMatrix == null)
{
// PDF specified that Type 1 fonts use a 1000upem matrix, but some fonts specify
// their own custom matrix anyway, for example PDFBOX-2298
List<Number> numbers = null;
try
{
numbers = genericFont.getFontMatrix();
}
catch (IOException e)
{
fontMatrix = DEFAULT_FONT_MATRIX;
}
if (numbers != null && numbers.size() == 6)
{
fontMatrix = new Matrix(
numbers.get(0).floatValue(), numbers.get(1).floatValue(),
numbers.get(2).floatValue(), numbers.get(3).floatValue(),
numbers.get(4).floatValue(), numbers.get(5).floatValue());
}
else
{
return super.getFontMatrix();
}
}
return fontMatrix;
}
@Override
public boolean isDamaged()
{
return isDamaged;
}
}