/* * Created on 13/nov/2012 * Copyright 2011 by Andrea Vacondio (andrea.vacondio@gmail.com). * * This file is part of the Sejda source code * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.sejda.impl.sambox.util; import static org.apache.commons.lang3.RandomStringUtils.randomAlphanumeric; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.not; import static org.hamcrest.Matchers.notNullValue; import static org.hamcrest.Matchers.nullValue; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; import static org.sejda.impl.sambox.util.FontUtils.canDisplay; import static org.sejda.impl.sambox.util.FontUtils.fontOrFallback; import static org.sejda.impl.sambox.util.FontUtils.getStandardType1Font; import java.awt.Color; import java.awt.Point; import java.awt.Rectangle; import java.io.File; import java.io.IOException; import java.util.Arrays; import java.util.List; import org.apache.fontbox.ttf.TrueTypeFont; import org.junit.Test; import org.sejda.core.support.io.IOUtils; import org.sejda.impl.sambox.component.DefaultPdfSourceOpener; import org.sejda.impl.sambox.component.PDDocumentHandler; import org.sejda.impl.sambox.component.PageTextWriter; import org.sejda.impl.sambox.component.PdfTextExtractorByArea; import org.sejda.impl.sambox.component.TextWithFont; import org.sejda.io.SeekableSources; import org.sejda.model.exception.TaskException; import org.sejda.model.exception.TaskIOException; import org.sejda.model.input.PdfStreamSource; import org.sejda.model.pdf.StandardType1Font; import org.sejda.sambox.cos.COSName; import org.sejda.sambox.input.PDFParser; import org.sejda.sambox.pdmodel.PDDocument; import org.sejda.sambox.pdmodel.PDPage; import org.sejda.sambox.pdmodel.PDResources; import org.sejda.sambox.pdmodel.font.FontMappers; import org.sejda.sambox.pdmodel.font.FontMapping; import org.sejda.sambox.pdmodel.font.PDFont; import org.sejda.sambox.pdmodel.font.PDType1Font; import org.sejda.sambox.pdmodel.graphics.form.PDFormXObject; /** * @author Andrea Vacondio */ public class FontUtilsTest { private static PDFont HELVETICA = FontUtils.HELVETICA; @Test public void testGetStandardType1Fontg() { assertEquals(PDType1Font.COURIER, getStandardType1Font(StandardType1Font.CURIER)); assertEquals(PDType1Font.COURIER_BOLD, getStandardType1Font(StandardType1Font.CURIER_BOLD)); assertEquals(PDType1Font.COURIER_BOLD_OBLIQUE, getStandardType1Font(StandardType1Font.CURIER_BOLD_OBLIQUE)); assertEquals(PDType1Font.COURIER_OBLIQUE, getStandardType1Font(StandardType1Font.CURIER_OBLIQUE)); assertEquals(PDType1Font.HELVETICA, getStandardType1Font(StandardType1Font.HELVETICA)); assertEquals(PDType1Font.HELVETICA_BOLD, getStandardType1Font(StandardType1Font.HELVETICA_BOLD)); assertEquals(PDType1Font.HELVETICA_BOLD_OBLIQUE, getStandardType1Font(StandardType1Font.HELVETICA_BOLD_OBLIQUE)); assertEquals(PDType1Font.HELVETICA_OBLIQUE, getStandardType1Font(StandardType1Font.HELVETICA_OBLIQUE)); assertEquals(PDType1Font.TIMES_BOLD, getStandardType1Font(StandardType1Font.TIMES_BOLD)); assertEquals(PDType1Font.TIMES_BOLD_ITALIC, getStandardType1Font(StandardType1Font.TIMES_BOLD_ITALIC)); assertEquals(PDType1Font.TIMES_ITALIC, getStandardType1Font(StandardType1Font.TIMES_ITALIC)); assertEquals(PDType1Font.TIMES_ROMAN, getStandardType1Font(StandardType1Font.TIMES_ROMAN)); assertEquals(PDType1Font.SYMBOL, getStandardType1Font(StandardType1Font.SYMBOL)); assertEquals(PDType1Font.ZAPF_DINGBATS, getStandardType1Font(StandardType1Font.ZAPFDINGBATS)); } private PDFont findFontFor(String s) { try { return FontUtils.findFontFor(new PDDocument(), s); } finally { FontUtils.clearLoadedFontCache(); } } @Test public void testCanDisplay() { assertTrue(canDisplay("Chuck", getStandardType1Font(StandardType1Font.HELVETICA))); assertFalse(canDisplay("कसौटी", getStandardType1Font(StandardType1Font.HELVETICA))); assertFalse(canDisplay("Chuck", null)); } @Test public void testFindFontFor() { assertNotNull(findFontFor("ทดสอบ")); // thai assertNotNull(findFontFor("αυτό είναι ένα τεστ")); // greek assertNotNull(findFontFor("വീട്")); // malayalam assertNotNull(findFontFor("मानक")); // hindi assertNotNull(findFontFor("జ")); // telugu assertNotNull(findFontFor("উ")); // bengali assertNotNull(findFontFor("עברית")); // hebrew assertNotNull(findFontFor("简化字")); // simplified chinese assertNotNull(findFontFor("한국어/조선말")); // korean assertNotNull(findFontFor("日本語")); // japanese assertNotNull(findFontFor("latin ąćęłńóśźż")); // latin assertNotNull(findFontFor("\uFFFD \u2997")); // symbols assertNotNull(findFontFor("Newlines\nare\r\nignored")); // newlines assertNotNull(findFontFor("\u2984 \u2583 \u2738 ☗⦄✸▃ ")); // symbols assertNotNull(findFontFor("ភាសាខ្មែរ")); // khmer assertNotNull(findFontFor("ጩ")); // ethiopic assertNotNull(findFontFor("پنجابی, ਪੰਜਾਬੀ")); // punjabi assertNotNull(findFontFor("தமிழ்")); // tamil assertNotNull(findFontFor("ગુજરાતી")); // gujarati assertNotNull(findFontFor("န\u103Aမာဘာသာ")); // myanmar assertNotNull(findFontFor("հայերէն")); // armenian assertNotNull(findFontFor("සිංහල")); // sinhalese assertNotNull(findFontFor("ᠮᠣᠩᠭᠣᠯ")); // mongolian assertNotNull(findFontFor("ಕನ್ನಡ")); // kannada assertNotNull(findFontFor("한국어 조선말")); // korean assertNotNull(findFontFor("ଓଡ଼ିଆ ଭାଷା")); // oryia } @Test public void fontForMultipleLanguagesInOneString() { assertNotNull(findFontFor( "န\u103Aမာဘာသာ සිංහල ગુજરાતી தமிழ் پنجابی, ਪੰਜਾਬੀ ਹਰਜੋਤ ਸਿੰਘ ភាសាខ្មែរጩ latin ąćęłńóśźż ทดสอบ വീട मानक हिन्दी ് జ উ ភាសាខ្មែរ עברית")); // all in one } @Test public void roundTripWriteAndRead() throws TaskException, IOException { List<String> strings = Arrays.asList("ଓଡ଼ିଆଭାଷା", "한국어", "ગુજરાતી ਪੰਜਾਬੀ தமிழ்", "සිංහල", "န\u103Aမာဘာသာ", "هذا هو كل الحق إلى اليسار", "123 יתימאה ןחבמ", "032 ציר הורמון הגדילה", "This is الحق Mixed יתימאה ןחבמ", "ਹਰਜੋਤ ਸਿੰਘ ភាសាខ្មែរ latin ąćęłńóśźż ทดสอบ വീട मानक हिन्दी ് జ উ ☗⦄✸▃ ", "ציר הורמון הגדילה"); for (String str : strings) { PDDocument doc = new PDDocument(); PDPage page = new PDPage(); new PageTextWriter(doc).write(page, new Point(10, 10), str, getStandardType1Font(StandardType1Font.HELVETICA), 10.0d, Color.BLACK); doc.addPage(page); PDDocumentHandler handler = new PDDocumentHandler(doc); File tmp = IOUtils.createTemporaryBuffer(); handler.savePDDocument(tmp); PDDocument doc2 = PDFParser.parse(SeekableSources.seekableSourceFrom(tmp)); String text = new PdfTextExtractorByArea().extractTextFromArea(doc2.getPage(0), new Rectangle(0, 0, 1000, 1000)); assertEquals(noWhitespace(str), noWhitespace(text)); } } private String noWhitespace(String in) { return in.replaceAll("\\s", ""); } @Test public void testFontOrFallbackPositive() { assertEquals(HELVETICA, fontOrFallback("Chuck", HELVETICA, new PDDocument())); } @Test public void testFontOrFallbackNegative() { assertNotNull(fontOrFallback("कसौटी", HELVETICA, new PDDocument())); } @Test public void testFontOrFallbackNotFoundFallback() { assertNull(fontOrFallback("\u1B2A\u1B35\u1B31\u1B29\u1B2E\u1B36, \u1B29\u1B32\u1B29\u1B2E\u1B36", HELVETICA, new PDDocument())); } @Test public void testCaching() { PDDocument doc = new PDDocument(); PDFont expected = FontUtils.findFontFor(doc, "ทดสอบ"); assertNotNull(expected); PDFont actual = FontUtils.findFontFor(doc, "ทด"); assertTrue("Font is cached, same instance is returned", expected == actual); } @Test public void testCanDisplayThai() { assertThat(findFontFor("นี่คือการทดสอบ"), is(notNullValue())); } @Test public void canDisplayGeorgian() { assertNotNull(findFontFor("ქართული ენა")); } @Test public void testCanDisplayType0FontsThatDontThrow() throws TaskIOException, IOException { PDDocument doc = getTestDoc("pdf/2-up-sample.pdf"); PDResources res = doc.getPage(0).getResources(); PDFormXObject form = (PDFormXObject) res.getXObject(COSName.getPDFName("Form2")); PDResources formRes = form.getResources(); PDFont font = formRes.getFont(COSName.getPDFName("F0")); assertThat(font.getName(), is("Arial-BoldMT")); assertThat(FontUtils.canDisplay("Redacted out :)", font), is(false)); } @Test public void testLoadingFullFontFromSystemForSubsetFonts() throws TaskIOException, IOException { boolean isVerdanaAvailable = isFontAvailableOnSystem("Verdana"); if (!isVerdanaAvailable) { return; } PDDocument doc = getTestDoc("pdf/subset-font.pdf"); PDResources res = doc.getPage(0).getResources(); PDFormXObject form = (PDFormXObject) res.getXObject(COSName.getPDFName("Xf1")); PDResources formRes = form.getResources(); PDFont font = formRes.getFont(COSName.getPDFName("F1")); assertThat(font.getName(), is("PXAAAA+Verdana")); PDFont original = new FontUtils.FontSubsetting(font).loadOriginal(doc); // relies on Verdana font being present on the system assertThat(original.getName(), is("Verdana")); } private PDDocument getTestDoc(String name) throws TaskIOException { PdfStreamSource source = PdfStreamSource.newInstanceNoPassword( getClass().getClassLoader().getResourceAsStream(name), randomAlphanumeric(16) + ".pdf"); return new DefaultPdfSourceOpener().open(source).getUnderlyingPDDocument(); } private boolean isFontAvailableOnSystem(String name) { FontMapping<TrueTypeFont> result = FontMappers.instance().getTrueTypeFont(name, null); return result != null && !result.isFallback(); } @Test public void resolveFontsWhenTextRepeats() throws TaskIOException { PDDocument doc = new PDDocument(); List<TextWithFont> textAndFonts = FontUtils.resolveFonts("123α456α789", HELVETICA, doc); assertThat(textAndFonts.get(0).getFont().getName(), is("Helvetica")); assertThat(textAndFonts.get(0).getText(), is("123")); assertThat(textAndFonts.get(1).getFont().getName(), is(not("Helvetica"))); assertThat(textAndFonts.get(1).getText(), is("α")); assertThat(textAndFonts.get(2).getFont().getName(), is("Helvetica")); assertThat(textAndFonts.get(2).getText(), is("456")); assertThat(textAndFonts.get(3).getFont().getName(), is(not("Helvetica"))); assertThat(textAndFonts.get(3).getText(), is("α")); } @Test public void resolveFontsWhenSpaceSeparately() throws TaskIOException { PDDocument doc = new PDDocument(); List<TextWithFont> textAndFonts = FontUtils.resolveFonts("ab cd", HELVETICA, doc); assertThat(textAndFonts.get(0).getFont().getName(), is("Helvetica")); assertThat(textAndFonts.get(0).getText(), is("ab")); assertThat(textAndFonts.get(1).getFont().getName(), is("Helvetica")); assertThat(textAndFonts.get(1).getText(), is(" ")); assertThat(textAndFonts.get(2).getFont().getName(), is("Helvetica")); assertThat(textAndFonts.get(2).getText(), is("cd")); } @Test public void resolveFontsWhenUnsupportedCharacters() throws TaskIOException { PDDocument doc = new PDDocument(); List<TextWithFont> textAndFonts = FontUtils.resolveFonts("ab\uFE0Fcd", HELVETICA, doc); assertThat(textAndFonts.get(1).getFont(), is(nullValue())); assertThat(textAndFonts.get(1).getText(), is("\uFE0F")); assertThat(FontUtils.removeUnsupportedCharacters("ab \uFE0Fcd", doc), is("ab cd")); } @Test public void removeUnsupportedCharsDoesNotChangeStringForRTLLanguages() throws TaskIOException { PDDocument doc = new PDDocument(); String text = "עברית"; assertEquals(text, FontUtils.removeUnsupportedCharacters(text, doc)); } }