/******************************************************************************* * Copyright (c) 2009, Adobe Systems Incorporated * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * · Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * · Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * · Neither the name of Adobe Systems Incorporated nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *******************************************************************************/ package com.adobe.dp.fb2.convert; import com.adobe.dp.css.*; import com.adobe.dp.epub.conv.Version; import com.adobe.dp.epub.io.BufferedDataSource; import com.adobe.dp.epub.ncx.TOCEntry; import com.adobe.dp.epub.opf.*; import com.adobe.dp.epub.ops.*; import com.adobe.dp.epub.style.Stylesheet; import com.adobe.dp.epub.util.ImageDimensions; import com.adobe.dp.fb2.*; import com.adobe.dp.otf.DefaultFontLocator; import com.adobe.dp.otf.FontLocator; import com.adobe.dp.otf.FontProperties; import com.adobe.dp.xml.util.StringUtil; import java.io.*; import java.util.*; public class FB2Converter { final static private int RESOURCE_THRESHOLD_MAX = 45000; final static private int RESOURCE_THRESHOLD_MIN = 10000; private static CSSQuotedString defaultSansSerifFont = new CSSQuotedString("Arial"); private static CSSQuotedString defaultSerifFont = new CSSQuotedString("Times New Roman"); private static CSSQuotedString defaultMonospaceFont = new CSSQuotedString("Courier New"); FB2Document doc; Publication epub; Stylesheet stylesheet; StyleResource styles; NCXResource toc; int nameCount = 0; Hashtable idMap = new Hashtable(); FB2Document templateDoc; CSSStylesheet templateRules; FontLocator fontLocator; FontLocator defaultFontLocator; static CSSStylesheet defaultStylesheet; PrintWriter log = new PrintWriter(new OutputStreamWriter(System.out)); // static FontLocator builtInFontLocator = new BuiltInFontLocator(); static float[] titleFontSizes = { 2.2f, 1.8f, 1.5f, 1.3f, 1.2f, 1.1f, 1.0f }; static { try { InputStream in = FB2Converter.class.getResourceAsStream("stylesheet.css"); CSSParser parser = new CSSParser(); defaultStylesheet = parser.readStylesheet(in); in.close(); Iterator errs = parser.errors(); if (errs != null) { while (errs.hasNext()) { CSSParsingError err = (CSSParsingError) errs.next(); System.err.println(err.getLine() + ": " + err.getError()); } } } catch (Exception e) { e.printStackTrace(); } } static class LinkRecord { Vector sources = new Vector(); Element target; } private LinkRecord getLinkRecord(String id) { LinkRecord record = (LinkRecord) idMap.get(id); if (record == null) { record = new LinkRecord(); idMap.put(id, record); } return record; } private OPSResource newCoverPageResource() { OPSResource res = epub.createOPSResource("OPS/cover.xhtml"); res.getDocument().addStyleResource(styles); epub.addToSpine(res); return res; } private OPSResource newOPSResource() { String name = "OPS/ch" + (++nameCount) + ".xhtml"; OPSResource res = epub.createOPSResource(name); res.getDocument().addStyleResource(styles); epub.addToSpine(res); return res; } private BitmapImageResource getBitmapImageResource(String name) { FB2Binary bin = doc.getBinaryResource(name); if (bin == null) return null; String path = "OPS/images/" + name; BitmapImageResource resource = (BitmapImageResource) epub.getResourceByName(path); if (resource == null) { BufferedDataSource data = new BufferedDataSource(); try { data.getOutputStream().write(bin.getData()); } catch (IOException e) { throw new Error("unexpected exception: " + e); } resource = epub.createBitmapImageResource(path, bin.getMediaType(), data); } return resource; } private String trim(String s) { StringBuffer sb = new StringBuffer(); int len = s.length(); int i = 0; while (i < len) { char c = s.charAt(i); if ((c & 0xFFFF) > ' ') break; i++; } boolean hadSpace = false; while (i < len) { char c = s.charAt(i); if ((c & 0xFFFF) > ' ') { if (hadSpace) { sb.append(' '); hadSpace = false; } sb.append(c); } else hadSpace = true; i++; } return sb.toString(); } private String getStringValue(Object val) { if (val instanceof CSSQuotedString) { return ((CSSQuotedString) val).getText(); } return val.toString(); } private boolean isBuiltIn(String name) { return name.equals("serif") || name.equals("sans-serif") || name.equals("monospace"); } private void adjustFontList(BaseRule rule) { Object fonts = rule.get("font-family"); if (fonts == null) return; int count = CSSValueList.valueCount(fonts, ','); for (int i = 0; i < count; i++) { String family = getStringValue(CSSValueList.valueAt(fonts, i, ',')); if (isBuiltIn(family)) continue; FontProperties fp = new FontProperties(family, FontProperties.WEIGHT_NORMAL, FontProperties.STYLE_REGULAR); if (fontLocator.hasFont(fp)) return; // found at least one font } Vector list = new Vector(); boolean inserted = false; for (int i = 0; i < count; i++) { Object fn = CSSValueList.valueAt(fonts, i, ','); String family = getStringValue(fn); if (!inserted) { if (family.equals("sans-serif")) { list.add(defaultSansSerifFont); inserted = true; } else if (family.equals("serif")) { list.add(defaultSerifFont); inserted = true; } else if (family.equals("monospace")) { list.add(defaultMonospaceFont); inserted = true; } } list.add(fn); } if (!inserted) list.add(defaultSerifFont); CSSValue[] vals = new CSSValue[list.size()]; list.copyInto(vals); rule.set("font-family", new CSSValueList(',', vals)); } void mergeRuleStyle(BaseRule rule, CSSStylesheet src, Selector ss) { SelectorRule docRule = src.getRuleForSelector(ss, false); if (docRule != null) { Iterator props = docRule.properties(); while (props.hasNext()) { String prop = (String) props.next(); rule.set(prop, docRule.get(prop)); } } } private Element convertElement(OPSDocument ops, Element parent, Object fb, TOCEntry entry, int level, boolean insideTitle, boolean largeResource) { if (fb instanceof FB2Element) { FB2Element fbe = (FB2Element) fb; FB2Title title = null; if (fbe instanceof FB2Section) { title = ((FB2Section) fbe).getTitle(); } String className = fbe.getName(); String name = null; CascadeResult cascade = fbe.getCascade(); InlineRule estyle = null; if (cascade != null) { estyle = cascade.getProperties().getPropertySet(); estyle = estyle.cloneObject(); CSSName ename = (CSSName) estyle.get("-epubgen-name"); if (ename != null) name = ename.toString(); estyle.set("-epubgen-name", null); adjustFontList(estyle); } if (name == null) name = "span"; if (fbe instanceof FB2Section) { level++; } else if (fbe instanceof FB2StyledText) { className = ((FB2StyledText) fbe).getStyleName(); } Element self; if (name.equals("image")) { ImageElement img = null; FB2Image image = (FB2Image) fbe; String resourceName = image.getResourceName(); String alt = image.getAlt(); String caption = image.getTitle(); if (resourceName != null) { BitmapImageResource resource = getBitmapImageResource(resourceName); if (resource != null) { img = ops.createImageElement("img"); img.setImageResource(resource); if (alt != null) img.setAltText(alt); if (cascade != null) { InlineRule style = cascade.getProperties().getPropertySetForPseudoElement("content"); style = style.cloneObject(); adjustFontList(style); img.setDesiredCascadeResult(style); } } } if (img == null) return null; self = ops.createElement("div"); self.add(img); if (caption != null && caption.length() > 0) { InlineRule style = null; CSSName tname = null; if (cascade != null) { style = cascade.getProperties().getPropertySetForPseudoElement("title"); style = style.cloneObject(); tname = (CSSName) style.get("-epubgen-name"); style.set("-epubgen-name", null); adjustFontList(style); } HTMLElement captionElement = ops.createElement(tname == null ? "p" : tname.toString()); captionElement.setClassName("image-title"); self.add(captionElement); captionElement.add(caption); captionElement.setDesiredCascadeResult(style); } } else if (name.equals("a")) { HyperlinkElement a = ops.createHyperlinkElement("a"); String link = ((FB2Hyperlink) fbe).getLinkedId(); if (link != null) { LinkRecord record = getLinkRecord(link); record.sources.add(a); } self = a; } else if (name.equals("td") || name.equals("th")) { FB2TableCell fbt = (FB2TableCell) fbe; TableCellElement td = ops.createTableCellElement(name, fbt.getAlign(), fbt.getColSpan(), fbt .getRowSpan()); self = td; } else { self = ops.createElement(name); } if (largeResource && self instanceof HTMLElement) { ((HTMLElement) self).setForceChapterBreak(true); } self.setClassName(className); self.setDesiredCascadeResult(estyle); parent.add(self); if (fbe.getId() != null) { LinkRecord record = getLinkRecord(fbe.getId()); record.target = self; } if (title != null && entry != null) { TOCEntry subentry = toc.createTOCEntry(trim(title.contentAsString()), self.getSelfRef()); entry.add(subentry); entry = subentry; } Object[] children = fbe.getChildren(); int size = 0; for (int i = 0; i < children.length; i++) { Object child = children[i]; boolean large = false; boolean over = false; int esize = 0; if (largeResource) { large = isLargeSection(child); if (large) size = 0; else { esize = FB2Element.getUTF16Size(child); size += esize; over = size > esize && size > RESOURCE_THRESHOLD_MAX; } } Element ce = convertElement(ops, self, child, entry, level, insideTitle, large); if (over && ce instanceof HTMLElement) { ((HTMLElement) ce).setForceChapterBreak(true); size = esize; } } return self; } else { parent.add(fb); return null; } } public void setFontLocator(FontLocator fontLocator) { defaultFontLocator = fontLocator; } public void setTemplate(CSSStylesheet stylesheet) throws IOException { templateRules = stylesheet; } public void setTemplate(InputStream templateStream) throws IOException, FB2FormatException { BufferedInputStream in = new BufferedInputStream(templateStream); byte[] sniff = new byte[4]; in.mark(4); in.read(sniff); in.reset(); CSSParser parser = new CSSParser(); if ((sniff[0] == 'P' && sniff[1] == 'K' && sniff[2] == 3 && sniff[3] == 4) || sniff[0] == '<' || ((sniff[0] == (byte) 0xef && sniff[1] == (byte) 0xbb && sniff[2] == (byte) 0xbf && sniff[3] == '<'))) { // template is FB2 file itself templateDoc = new FB2Document(in); templateRules = null; } else { templateDoc = null; templateRules = new CSSStylesheet(); parser.readStylesheet(in, templateRules); } } public void setTemplateFile(String file) { try { setTemplate(new FileInputStream(file)); } catch (IOException e) { e.printStackTrace(log); } catch (FB2FormatException e) { e.printStackTrace(log); } } private boolean isLargeSection(Object child) { return child instanceof FB2Section && ((FB2Section) child).getUTF16Size() >= RESOURCE_THRESHOLD_MIN; } private void convertSection(OPSDocument ops, Element body, FB2Section section, TOCEntry entry, int level) { int size = section.getUTF16Size(); FB2Title title = section.getTitle(); String sectionName = section.getSectionName(); if (sectionName != null) { if (sectionName.equals("footnotes") || sectionName.equals("notes")) { entry = null; level += 2; } } if (title != null && level > 1) { if (entry != null) { TOCEntry subentry = toc.createTOCEntry(trim(title.contentAsString()), body.getSelfRef()); entry.add(subentry); entry = subentry; } } CascadeResult cr = section.getCascade(); if (cr != null) { InlineRule style = cr.getProperties().getPropertySet().cloneObject(); style.set("-epub-name", null); adjustFontList(style); body.setDesiredCascadeResult(style); } boolean large = size > RESOURCE_THRESHOLD_MAX; Object[] children = section.getChildren(); for (int i = 0; i < children.length; i++) convertElement(ops, body, children[i], entry, level, false, large); } private boolean isUUID(String id) { return id.length() == 36 && id.charAt(8) == '-' && id.charAt(13) == '-' && id.charAt(18) == '-' && id.charAt(23) == '-'; } private void convert() { FB2TitleInfo bookInfo = doc.getTitleInfo(); styles = epub.createStyleResource("OPS/style.css"); stylesheet = styles.getStylesheet(); boolean dateAdded = false; if (bookInfo != null) { String title = bookInfo.getBookTitle(); epub.addDCMetadata("title", title); FB2AuthorInfo[] authors = bookInfo.getAuthors(); if (authors != null) { for (int i = 0; i < authors.length; i++) { epub.addDCMetadata("creator", authors[i].toString()); } } FB2AuthorInfo[] translators = bookInfo.getTranslators(); if (translators != null) { for (int i = 0; i < translators.length; i++) { epub.addMetadata(null, "FB2.book-info.translator", translators[i].toString()); } } epub.addDCMetadata("language", bookInfo.getLanguage()); FB2Section annot = bookInfo.getAnnotation(); if (annot != null) { epub.addDCMetadata("description", annot.contentAsString()); } FB2DateInfo date = bookInfo.getDate(); if (date != null) { String mr = date.getMachineReadable(); epub.addMetadata(null, "FB2.book-info.date", (mr == null ? date.getHumanReadable() : mr)); Date d = date.getDate(); if (d != null) { epub.addDCMetadata("date", StringUtil.toShortW3CDTF(d, date.isYearOnly())); dateAdded = true; } } FB2GenreInfo[] genres = bookInfo.getGenres(); if (genres != null) { for (int i = 0; i < genres.length; i++) epub.addMetadata(null, "FB2.book-info.genre", genres[i].toString()); } FB2SequenceInfo[] sequences = bookInfo.getSequences(); if (sequences != null) { for (int i = 0; i < sequences.length; i++) epub.addMetadata(null, "FB2.book-info.sequence", sequences[i].toString()); } String coverpageImage = bookInfo.getCoverpageImage(); if (coverpageImage != null) { FB2Binary binary = doc.getBinaryResource(coverpageImage); if (binary != null && binary.getData() != null) { int[] dim = ImageDimensions.getImageDimensions(binary.getData()); if (dim != null) { OPSResource coverRes = newCoverPageResource(); OPSDocument coverDoc = coverRes.getDocument(); Element body = coverDoc.getBody(); body.setClassName("cover"); SelectorRule coverBodyRule = stylesheet.getRuleForSelector(stylesheet.getSimpleSelector("body", "cover"), true); coverBodyRule.set("oeb-column-number", new CSSNumber(1)); coverBodyRule.set("margin", new CSSLength(0, "px")); coverBodyRule.set("padding", new CSSLength(0, "px")); SVGElement svg = coverDoc.createSVGElement("svg"); svg.setAttribute("viewBox", "0 0 " + dim[0] + " " + dim[1]); svg.setClassName("cover-svg"); body.add(svg); SelectorRule svgRule = stylesheet.getRuleForSelector(stylesheet.getSimpleSelector("svg", "cover-svg"), true); svgRule.set("width", new CSSLength(100, "%")); svgRule.set("height", new CSSLength(100, "%")); SVGImageElement image = coverDoc.createSVGImageElement("image"); image.setAttribute("width", Integer.toString(dim[0])); image.setAttribute("height", Integer.toString(dim[1])); BitmapImageResource resource = getBitmapImageResource(coverpageImage); image.setImageResource(resource); svg.add(image); epub.setCoverImage(resource); } } } } if (defaultFontLocator == null) defaultFontLocator = DefaultFontLocator.getInstance(DefaultFontLocator.BUILT_IN_DIRS); fontLocator = defaultFontLocator; CascadeEngine cascadeEngine = new CascadeEngine(); cascadeEngine.add(defaultStylesheet, null); if (templateRules != null) { cascadeEngine.add(templateRules, null); fontLocator = new EmbeddedFontLocator(templateRules, fontLocator); } if (templateDoc != null) { CSSStylesheet[] stylesheets = templateDoc.getStylesheets(); if (stylesheets != null) { for (int i = 0; i < stylesheets.length; i++) { CSSStylesheet stylesheet = stylesheets[i]; cascadeEngine.add(stylesheet, null); fontLocator = new EmbeddedFontLocator(stylesheet, fontLocator); } } } CSSStylesheet[] stylesheets = doc.getStylesheets(); if (stylesheets != null) { for (int i = 0; i < stylesheets.length; i++) { CSSStylesheet stylesheet = stylesheets[i]; cascadeEngine.add(stylesheet, null); fontLocator = new EmbeddedFontLocator(stylesheet, fontLocator); } } long start = System.currentTimeMillis(); doc.applyStyles(cascadeEngine); long end = System.currentTimeMillis(); System.out.println( "Applied styles in " + (end - start) + "ms"); FB2DocumentInfo docInfo = doc.getDocumentInfo(); String ident = null; if (docInfo != null) { FB2AuthorInfo[] authors = docInfo.getAuthors(); if (authors != null) { for (int i = 0; i < authors.length; i++) { epub.addMetadata(null, "FB2.document-info.author", authors[i].toString()); } } epub.addMetadata(null, "FB2.document-info.program-used", docInfo.getProgramUsed()); String[] urls = docInfo.getSrcUrls(); if (urls != null) { for (int i = 0; i < urls.length; i++) epub.addMetadata(null, "FB2.document-info.src-url", urls[i]); } epub.addMetadata(null, "FB2.document-info.src-ocr", docInfo.getSrcOcr()); FB2Section history = docInfo.getHistory(); if (history != null) epub.addMetadata(null, "FB2.document-info.history", history.contentAsString()); ident = docInfo.getId(); epub.addMetadata(null, "FB2.document-info.id", ident); epub.addMetadata(null, "FB2.document-info.version", docInfo.getVersion()); FB2DateInfo date = bookInfo.getDate(); if (date != null) { String mr = date.getMachineReadable(); epub.addMetadata(null, "FB2.document-info.date", (mr == null ? date.getHumanReadable() : mr)); } } if (ident == null || !isUUID(ident)) { epub.generateRandomIdentifier(); if (ident != null) epub.addDCMetadata("identifier", ident); } else { epub.addDCMetadata("identifier", "urn:uuid:" + ident.toLowerCase()); } FB2PublishInfo publishInfo = doc.getPublishInfo(); if (publishInfo != null) { epub.addDCMetadata("publisher", publishInfo.getPublisher()); epub.addMetadata(null, "FB2.publish-info.book-name", publishInfo.getBookName()); epub.addMetadata(null, "FB2.publish-info.city", publishInfo.getCity()); epub.addMetadata(null, "FB2.publish-info.year", publishInfo.getYear()); if (!dateAdded) { epub.addDCMetadata("date", publishInfo.getYear()); dateAdded = true; } String isbn = publishInfo.getISBN(); if (isbn != null) epub.addDCMetadata("identifier", "isbn:" + isbn.toUpperCase()); } FB2Section[] bodySections = doc.getBodySections(); toc = epub.getTOC(); TOCEntry entry = toc.getRootTOCEntry(); for (int i = 0; i < bodySections.length; i++) { OPSResource resource = newOPSResource(); OPSDocument ops = resource.getDocument(); Element body = ops.getBody(); convertSection(ops, body, bodySections[i], entry, 1); } Enumeration keys = idMap.keys(); while (keys.hasMoreElements()) { String id = (String) keys.nextElement(); LinkRecord record = (LinkRecord) idMap.get(id); if (record.target != null) { XRef ref = record.target.getSelfRef(); Enumeration sources = record.sources.elements(); while (sources.hasMoreElements()) { HyperlinkElement a = (HyperlinkElement) sources.nextElement(); a.setXRef(ref); } } } epub.addMetadata(null, "FB2EPUB.version", Version.VERSION); epub.addMetadata(null, "FB2EPUB.conversionDate", StringUtil.dateToW3CDTF(new Date())); epub.generateStyles(styles); // pass some large number, should split along where marked epub.splitLargeChapters(2000000); } public void embedFonts() { epub.addFonts(styles, fontLocator); } public void convert(FB2Document doc, Publication epub) { this.doc = doc; this.epub = epub; convert(); } public void setLog(PrintWriter log) { this.log = log; } }