PDType1Font.java example

Explorer
pdfbox-master
- pdfbox-trunk
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.pdfbox.pdmodel.font;

import java.awt.geom.AffineTransform;
import java.awt.geom.GeneralPath;
import java.awt.geom.Point2D;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.fontbox.EncodedFont;
import org.apache.fontbox.FontBoxFont;
import org.apache.fontbox.type1.DamagedFontException;
import org.apache.fontbox.type1.Type1Font;
import org.apache.fontbox.util.BoundingBox;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.common.PDStream;
import org.apache.pdfbox.pdmodel.font.encoding.Encoding;
import org.apache.pdfbox.pdmodel.font.encoding.StandardEncoding;
import org.apache.pdfbox.pdmodel.font.encoding.Type1Encoding;
import org.apache.pdfbox.pdmodel.font.encoding.WinAnsiEncoding;
import org.apache.pdfbox.pdmodel.font.encoding.ZapfDingbatsEncoding;
import org.apache.pdfbox.util.Matrix;


import static org.apache.pdfbox.pdmodel.font.UniUtil.getUniNameOfCodePoint;
import org.apache.pdfbox.pdmodel.font.encoding.SymbolEncoding;

/**
 * A PostScript Type 1 Font.
 *
 * @author Ben Litchfield
 */
public class PDType1Font extends PDSimpleFont implements PDVectorFont
{
    private static final Log LOG = LogFactory.getLog(PDType1Font.class);

    // todo: replace with enum? or getters?
    public static final PDType1Font TIMES_ROMAN = new PDType1Font("Times-Roman");
    public static final PDType1Font TIMES_BOLD = new PDType1Font("Times-Bold");
    public static final PDType1Font TIMES_ITALIC = new PDType1Font("Times-Italic");
    public static final PDType1Font TIMES_BOLD_ITALIC = new PDType1Font("Times-BoldItalic");
    public static final PDType1Font HELVETICA = new PDType1Font("Helvetica");
    public static final PDType1Font HELVETICA_BOLD = new PDType1Font("Helvetica-Bold");
    public static final PDType1Font HELVETICA_OBLIQUE = new PDType1Font("Helvetica-Oblique");
    public static final PDType1Font HELVETICA_BOLD_OBLIQUE = new PDType1Font("Helvetica-BoldOblique");
    public static final PDType1Font COURIER = new PDType1Font("Courier");
    public static final PDType1Font COURIER_BOLD = new PDType1Font("Courier-Bold");
    public static final PDType1Font COURIER_OBLIQUE = new PDType1Font("Courier-Oblique");
    public static final PDType1Font COURIER_BOLD_OBLIQUE = new PDType1Font("Courier-BoldOblique");
    public static final PDType1Font SYMBOL = new PDType1Font("Symbol");
    public static final PDType1Font ZAPF_DINGBATS = new PDType1Font("ZapfDingbats");

    // alternative names for glyphs which are commonly encountered
    private static final Map<String, String> ALT_NAMES = new HashMap<>();
    private static final int PFB_START_MARKER = 0x80;

    static
    {
        ALT_NAMES.put("ff", "f_f");
        ALT_NAMES.put("ffi", "f_f_i");
        ALT_NAMES.put("ffl", "f_f_l");
        ALT_NAMES.put("fi", "f_i");
        ALT_NAMES.put("fl", "f_l");
        ALT_NAMES.put("st", "s_t");
        ALT_NAMES.put("IJ", "I_J");
        ALT_NAMES.put("ij", "i_j");
        ALT_NAMES.put("ellipsis", "elipsis"); // misspelled in ArialMT
    }

    /**
     * embedded font.
     */
    private final Type1Font type1font;
    
    /**
     * embedded or system font for rendering.
     */
    private final FontBoxFont genericFont;
    
    private final boolean isEmbedded;
    private final boolean isDamaged;
    private final AffineTransform fontMatrixTransform;
    /**
     * to improve encoding speed.
     */
    private final Map <Integer,byte[]> codeToBytesMap;
    private Matrix fontMatrix;
    private BoundingBox fontBBox;

    /**
     * Creates a Type 1 standard 14 font for embedding.
     *
     * @param baseFont One of the standard 14 PostScript names
     */
    private PDType1Font(String baseFont)
    {
        super(baseFont);
        
        dict.setItem(COSName.SUBTYPE, COSName.TYPE1);
        dict.setName(COSName.BASE_FONT, baseFont);
        switch (baseFont)
        {
            case "ZapfDingbats":
                encoding = ZapfDingbatsEncoding.INSTANCE;
                break;
            case "Symbol":
                encoding = SymbolEncoding.INSTANCE;
                break;
            default:
                encoding = WinAnsiEncoding.INSTANCE;
                dict.setItem(COSName.ENCODING, COSName.WIN_ANSI_ENCODING);
                break;
        }

        // standard 14 fonts may be accessed concurrently, as they are singletons
        codeToBytesMap = new ConcurrentHashMap<>();

        // todo: could load the PFB font here if we wanted to support Standard 14 embedding
        type1font = null;
        FontMapping<FontBoxFont> mapping = FontMappers.instance()
                                                      .getFontBoxFont(getBaseFont(),
                                                                      getFontDescriptor());
        genericFont = mapping.getFont();
        
        if (mapping.isFallback())
        {
            String fontName;
            try
            {
                fontName = genericFont.getName();
            }
            catch (IOException e)
            {
                fontName = "?";
            }
            LOG.warn("Using fallback font " + fontName + " for base font " + getBaseFont());
        }
        isEmbedded = false;
        isDamaged = false;
        fontMatrixTransform = new AffineTransform();
    }

    /**
     * Creates a new Type 1 font for embedding.
     *
     * @param doc PDF document to write to
     * @param pfbIn PFB file stream
     * @throws IOException
     */
    public PDType1Font(PDDocument doc, InputStream pfbIn) throws IOException
    {
        PDType1FontEmbedder embedder = new PDType1FontEmbedder(doc, dict, pfbIn, null);
        encoding = embedder.getFontEncoding();
        glyphList = embedder.getGlyphList();
        type1font = embedder.getType1Font();
        genericFont = embedder.getType1Font();
        isEmbedded = true;
        isDamaged = false;
        fontMatrixTransform = new AffineTransform();
        codeToBytesMap = new HashMap<>();
    }

    /**
     * Creates a new Type 1 font for embedding.
     *
     * @param doc PDF document to write to
     * @param pfbIn PFB file stream
     * @param encoding
     * @throws IOException
     */
    public PDType1Font(PDDocument doc, InputStream pfbIn, Encoding encoding) throws IOException
    {
        PDType1FontEmbedder embedder = new PDType1FontEmbedder(doc, dict, pfbIn, encoding);
        this.encoding = encoding;
        glyphList = embedder.getGlyphList();
        type1font = embedder.getType1Font();
        genericFont = embedder.getType1Font();
        isEmbedded = true;
        isDamaged = false;
        fontMatrixTransform = new AffineTransform();
        codeToBytesMap = new HashMap<>();
    }

    /**
     * Creates a Type 1 font from a Font dictionary in a PDF.
     * 
     * @param fontDictionary font dictionary.
     * @throws IOException if there was an error initializing the font.
     * @throws IllegalArgumentException if /FontFile3 was used.
     */
    public PDType1Font(COSDictionary fontDictionary) throws IOException
    {
        super(fontDictionary);
        codeToBytesMap = new HashMap<>();

        PDFontDescriptor fd = getFontDescriptor();
        Type1Font t1 = null;
        boolean fontIsDamaged = false;
        if (fd != null)
        {
            // a Type1 font may contain a Type1C font
            PDStream fontFile3 = fd.getFontFile3();
            if (fontFile3 != null)
            {
                throw new IllegalArgumentException("Use PDType1CFont for FontFile3");
            }

            // or it may contain a PFB
            PDStream fontFile = fd.getFontFile();
            if (fontFile != null)
            {
                try
                {
                    COSStream stream = fontFile.getCOSObject();
                    int length1 = stream.getInt(COSName.LENGTH1);
                    int length2 = stream.getInt(COSName.LENGTH2);

                    // repair Length1 and Length2 if necessary
                    byte[] bytes = fontFile.toByteArray();
                    length1 = repairLength1(bytes, length1);
                    length2 = repairLength2(bytes, length1, length2);
                    
                    if (bytes.length > 0 && (bytes[0] & 0xff) == PFB_START_MARKER)
                    {
                        // some bad files embed the entire PFB, see PDFBOX-2607
                        t1 = Type1Font.createWithPFB(bytes);
                    }
                    else
                    {
                        // the PFB embedded as two segments back-to-back
                        byte[] segment1 = Arrays.copyOfRange(bytes, 0, length1);
                        byte[] segment2 = Arrays.copyOfRange(bytes, length1, length1 + length2);

                        // empty streams are simply ignored
                        if (length1 > 0 && length2 > 0)
                        {
                            t1 = Type1Font.createWithSegments(segment1, segment2);
                        }
                    }
                }
                catch (DamagedFontException e)
                {
                    LOG.warn("Can't read damaged embedded Type1 font " + fd.getFontName());
                    fontIsDamaged = true;
                }
                catch (IOException e)
                {
                    LOG.error("Can't read the embedded Type1 font " + fd.getFontName(), e);
                    fontIsDamaged = true;
                }
            }
        }
        isEmbedded = t1 != null;
        isDamaged = fontIsDamaged;
        type1font = t1;

        // find a generic font to use for rendering, could be a .pfb, but might be a .ttf
        if (type1font != null)
        {
            genericFont = type1font;
        }
        else
        {
            FontMapping<FontBoxFont> mapping = FontMappers.instance()
                                                          .getFontBoxFont(getBaseFont(), fd);
            genericFont = mapping.getFont();
            
            if (mapping.isFallback())
            {
                LOG.warn("Using fallback font " + genericFont.getName() + " for " + getBaseFont());
            }
        }
        readEncoding();
        fontMatrixTransform = getFontMatrix().createAffineTransform();
        fontMatrixTransform.scale(1000, 1000);
    }

    /**
     * Some Type 1 fonts have an invalid Length1, which causes the binary segment of the font
     * to be truncated, see PDFBOX-2350, PDFBOX-3677.
     *
     * @param bytes Type 1 stream bytes
     * @param length1 Length1 from the Type 1 stream
     * @return repaired Length1 value
     */
    private int repairLength1(byte[] bytes, int length1)
    {
        // scan backwards from the end of the first segment to find 'exec'
        int offset = Math.max(0, length1 - 4);
        if (offset <= 0 || offset > bytes.length - 4)
        {
            offset = bytes.length - 4;
        }

        offset = findBinaryOffsetAfterExec(bytes, offset);
        if (offset == 0 && length1 > 0)
        {
            // 2nd try with brute force
            offset = findBinaryOffsetAfterExec(bytes, bytes.length - 4);
        }

        if (length1 - offset != 0 && offset > 0)
        {
            if (LOG.isWarnEnabled())
            {
                LOG.warn("Ignored invalid Length1 " + length1 + " for Type 1 font " + getName());
            }
            return offset;
        }

        return length1;
    }

    private static int findBinaryOffsetAfterExec(byte[] bytes, int startOffset)
    {
        int offset = startOffset;
        while (offset > 0)
        {
            if (bytes[offset + 0] == 'e'
                    && bytes[offset + 1] == 'x'
                    && bytes[offset + 2] == 'e'
                    && bytes[offset + 3] == 'c')
            {
                offset += 4;
                // skip additional CR LF space characters
                while (offset < bytes.length && 
                        (bytes[offset] == '\r' || bytes[offset] == '\n' || 
                         bytes[offset] == ' ' || bytes[offset] == '\t'))
                {
                    offset++;
                }
                break;
            }
            offset--;
        }
        return offset;
    }

    /**
     * Some Type 1 fonts have an invalid Length2, see PDFBOX-3475. A negative /Length2 brings an
     * IllegalArgumentException in Arrays.copyOfRange(), a huge value eats up memory because of
     * padding.
     *
     * @param bytes Type 1 stream bytes
     * @param length1 Length1 from the Type 1 stream
     * @param length2 Length2 from the Type 1 stream
     * @return repaired Length2 value
     */
    private int repairLength2(byte[] bytes, int length1, int length2)
    {
        // repair Length2 if necessary
        if (length2 < 0 || length2 > bytes.length - length1)
        {
            LOG.warn("Ignored invalid Length2 " + length2 + " for Type 1 font " + getName());
            return bytes.length - length1;
        }
        return length2;
    }

    /**
     * Returns the PostScript name of the font.
     */
    public final String getBaseFont()
    {
        return dict.getNameAsString(COSName.BASE_FONT);
    }

    @Override
    public float getHeight(int code) throws IOException
    {
        String name = codeToName(code);
        if (getStandard14AFM() != null)
        {
            String afmName = getEncoding().getName(code);
            return getStandard14AFM().getCharacterHeight(afmName); // todo: isn't this the y-advance, not the height?
        }
        else
        {
            // todo: should be scaled by font matrix
            return (float) genericFont.getPath(name).getBounds().getHeight();
        }
    }

    @Override
    protected byte[] encode(int unicode) throws IOException
    {
        byte[] bytes = codeToBytesMap.get(unicode);
        if (bytes != null)
        {
            return bytes;
        }

        String name = getGlyphList().codePointToName(unicode);
        if (isStandard14())
        {
            // genericFont not needed, thus simplified code
            // this is important on systems with no installed fonts
            if (!encoding.contains(name))
            {
                throw new IllegalArgumentException(
                        String.format("U+%04X ('%s') is not available in this font %s encoding: %s",
                                unicode, name, getName(), encoding.getEncodingName()));
            }
            if (".notdef".equals(name))
            {
                throw new IllegalArgumentException(
                        String.format("No glyph for U+%04X in font %s", unicode, getName()));
            }
        }
        else
        {
            if (!encoding.contains(name))
            {
                throw new IllegalArgumentException(
                        String.format("U+%04X ('%s') is not available in this font %s (generic: %s) encoding: %s",
                                unicode, name, getName(), genericFont.getName(), encoding.getEncodingName()));
            }

            String nameInFont = getNameInFont(name);

            if (".notdef".equals(nameInFont) || !genericFont.hasGlyph(nameInFont))
            {
                throw new IllegalArgumentException(
                        String.format("No glyph for U+%04X in font %s (generic: %s)", unicode, getName(), genericFont.getName()));
            }
        }

        Map<String, Integer> inverted = encoding.getNameToCodeMap();
        int code = inverted.get(name);
        bytes = new byte[] { (byte)code };
        codeToBytesMap.put(code, bytes);
        return bytes;
    }

    @Override
    public float getWidthFromFont(int code) throws IOException
    {
        String name = codeToName(code);

        // width of .notdef is ignored for substitutes, see PDFBOX-1900
        if (!isEmbedded && name.equals(".notdef"))
        {
            return 250;
        }
        float width = genericFont.getWidth(name);

        Point2D p = new Point2D.Float(width, 0);
        fontMatrixTransform.transform(p, p);
        return (float)p.getX();
    }

    @Override
    public boolean isEmbedded()
    {
        return isEmbedded;
    }

    @Override
    public float getAverageFontWidth()
    {
        if (getStandard14AFM() != null)
        {
            return getStandard14AFM().getAverageCharacterWidth();
        }
        else
        {
            return super.getAverageFontWidth();
        }
    }

    @Override
    public int readCode(InputStream in) throws IOException
    {
        return in.read();
    }

    @Override
    protected Encoding readEncodingFromFont() throws IOException
    {
        if (!isEmbedded() && getStandard14AFM() != null)
        {
            // read from AFM
            return new Type1Encoding(getStandard14AFM());
        }
        else
        {
            // extract from Type1 font/substitute
            if (genericFont instanceof EncodedFont)
            {
                return Type1Encoding.fromFontBox(((EncodedFont) genericFont).getEncoding());
            }
            else
            {
                // default (only happens with TTFs)
                return StandardEncoding.INSTANCE;
            }
        }
    }

    /**
     * Returns the embedded or substituted Type 1 font, or null if there is none.
     */
    public Type1Font getType1Font()
    {
        return type1font;
    }

    @Override
    public FontBoxFont getFontBoxFont()
    {
        return genericFont;
    }

    @Override
    public String getName()
    {
        return getBaseFont();
    }

    @Override
    public BoundingBox getBoundingBox() throws IOException
    {
        if (fontBBox == null)
        {
            fontBBox = generateBoundingBox();
        }
        return fontBBox;
    }

    private BoundingBox generateBoundingBox() throws IOException
    {
        if (getFontDescriptor() != null) {
            PDRectangle bbox = getFontDescriptor().getFontBoundingBox();
            if (bbox != null &&
                    (bbox.getLowerLeftX() != 0 || bbox.getLowerLeftY() != 0 ||
                     bbox.getUpperRightX() != 0 || bbox.getUpperRightY() != 0))
            {
                return new BoundingBox(bbox.getLowerLeftX(), bbox.getLowerLeftY(),
                                       bbox.getUpperRightX(), bbox.getUpperRightY());
            }
        }
        return genericFont.getFontBBox();
    }

    //@Override
    public String codeToName(int code) throws IOException
    {
        String name = getEncoding().getName(code);
        return getNameInFont(name);
    }

    /**
     * Maps a PostScript glyph name to the name in the underlying font, for example when
     * using a TTF font we might map "W" to "uni0057".
     */
    private String getNameInFont(String name) throws IOException
    {
        if (isEmbedded() || genericFont.hasGlyph(name))
        {
            return name;
        }
        else
        {
            // try alternative name
            String altName = ALT_NAMES.get(name);
            if (altName != null && !name.equals(".notdef") && genericFont.hasGlyph(altName))
            {
                return altName;
            }
            else
            {
                // try unicode name
                String unicodes = getGlyphList().toUnicode(name);
                if (unicodes != null && unicodes.length() == 1)
                {
                    String uniName = getUniNameOfCodePoint(unicodes.codePointAt(0));
                    if (genericFont.hasGlyph(uniName))
                    {
                        return uniName;
                    }
                }
            }
        }
        return ".notdef";
    }

    @Override
    public GeneralPath getPath(String name) throws IOException
    {
        // Acrobat does not draw .notdef for Type 1 fonts, see PDFBOX-2421
        // I suspect that it does do this for embedded fonts though, but this is untested
        if (name.equals(".notdef") && !isEmbedded)
        {
            return new GeneralPath();
        }
        else
        {
            return genericFont.getPath(getNameInFont(name));
        }
    }

    @Override
    public GeneralPath getPath(int code) throws IOException
    {
        String name = getEncoding().getName(code);
        return getPath(name);
    }

    @Override
    public GeneralPath getNormalizedPath(int code) throws IOException
    {
        String name = getEncoding().getName(code);
        GeneralPath path = getPath(name);
        if (path == null)
        {
            return getPath(".notdef");
        }
        return path;
    }
    
    @Override
    public boolean hasGlyph(String name) throws IOException
    {
        return genericFont.hasGlyph(getNameInFont(name));
    }

    @Override
    public boolean hasGlyph(int code) throws IOException
    {
        return !getEncoding().getName(code).equals(".notdef");
    }

    @Override
    public final Matrix getFontMatrix()
    {
        if (fontMatrix == null)
        {
            // PDF specified that Type 1 fonts use a 1000upem matrix, but some fonts specify
            // their own custom matrix anyway, for example PDFBOX-2298
            List<Number> numbers = null;
            try
            {
                numbers = genericFont.getFontMatrix();
            }
            catch (IOException e)
            {
                fontMatrix = DEFAULT_FONT_MATRIX;
            }
            
            if (numbers != null && numbers.size() == 6)
            {
                fontMatrix = new Matrix(
                        numbers.get(0).floatValue(), numbers.get(1).floatValue(),
                        numbers.get(2).floatValue(), numbers.get(3).floatValue(),
                        numbers.get(4).floatValue(), numbers.get(5).floatValue());
            }
            else
            {
                return super.getFontMatrix();
            }
        }
        return fontMatrix;
    }

    @Override
    public boolean isDamaged()
    {
        return isDamaged;
    }
}