/******************************************************************************* * Copyright (c) 2011-2015 Torkild U. Resheim. * * All rights reserved. This program and the accompanying materials are made * available under the terms of the Eclipse Public License v1.0 which * accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * Torkild U. Resheim - initial API and implementation *******************************************************************************/ package org.eclipse.mylyn.docs.epub.core; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.text.MessageFormat; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.eclipse.emf.common.util.EList; import org.eclipse.emf.common.util.URI; import org.eclipse.emf.ecore.resource.Resource; import org.eclipse.emf.ecore.resource.ResourceSet; import org.eclipse.emf.ecore.resource.impl.ResourceSetImpl; import org.eclipse.emf.ecore.xmi.XMLResource; import org.eclipse.emf.ecore.xmi.impl.XMLResourceFactoryImpl; import org.eclipse.mylyn.docs.epub.core.ILogger.Severity; import org.eclipse.mylyn.docs.epub.ocf.Container; import org.eclipse.mylyn.docs.epub.ocf.OCFFactory; import org.eclipse.mylyn.docs.epub.ocf.OCFPackage; import org.eclipse.mylyn.docs.epub.ocf.RootFile; import org.eclipse.mylyn.docs.epub.ocf.RootFiles; import org.eclipse.mylyn.docs.epub.ocf.util.OCFResourceImpl; import org.eclipse.mylyn.internal.docs.epub.core.EPUBFileUtil; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.ext.DefaultHandler2; /** * Represents one EPUB file. One or more publications can be added and will be a part of the distribution when packed. * <p> * The simplest usage of this API may look like the following: * </p> * * <pre> * EPUB epub = new EPUB(); * OPSPublication oebps = new OPSPublication(); * oebps.addItem(new File("chapter.xhtml")); * epub.add(oebps); * epub.pack(new File("book.epub")); * </pre> * <p> * This will create a new EPUB instance and an OPS (which is the typical content of an EPUB) with one chapter. The OPS * will have one chapter with contents from <b>chapter.xhtml</b> and the final result is an EPUB named <b>book.epub</b>. * </p> * * @author Torkild U. Resheim * @see http://www.idpf.org/doc_library/epub/OPS_2.0.1_draft.htm * @see http://www.idpf.org/epub/301/spec/epub-publications.html */ public class EPUB { /** * @since 3.0 */ public enum PublicationVersion { /** Unsupported or undetected publication version. */ UNKNOWN, /** Open Publication Structure (OPS) 2.0.1 */ V2, /** EPUB Publications 3.0.1 */ V3 } /** * SAX parser for detecting the version of an OEBPS contained within an EPUB. */ private class VersionDetector extends DefaultHandler2 { private String versionString; @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { if (qName.equals("opf:package") || qName.equals("package")) {//$NON-NLS-1$ //$NON-NLS-2$ versionString = attributes.getValue("version"); //$NON-NLS-1$ } } } /** EPUB MIME type */ public static final String MIMETYPE_EPUB = "application/epub+zip"; //$NON-NLS-1$ /** OEBPS (OPS+OPF) MIME type */ private static final String MIMETYPE_OEBPS = "application/oebps-package+xml"; //$NON-NLS-1$ /** The encoding to use for the OCF */ private static final String OCF_FILE_ENCODING = "UTF-8"; //$NON-NLS-1$ /** Suffix for OCF files */ private static final String OCF_FILE_SUFFIX = "xml"; //$NON-NLS-1$ /** Version of the OCF specification used */ private static final String OCF_VERSION = "1.0"; //$NON-NLS-1$ private ILogger logger; /** The container holding all the publications */ private Container ocfContainer; /** * Creates a new <b>empty</b> instance of an EPUB. Use {@link #add(Publication)} and {@link #pack(File)} to add * publications and ready the EPUB for distribution. */ public EPUB() { ocfContainer = OCFFactory.eINSTANCE.createContainer(); RootFiles rootFiles = OCFFactory.eINSTANCE.createRootFiles(); ocfContainer.setRootfiles(rootFiles); ocfContainer.setVersion(OCF_VERSION); registerOCFResourceFactory(); } public EPUB(ILogger logger) { this(); this.logger = logger; } /** * Adds a new publication (or root file) to the EPUB. Use {@link #add(Publication)} when adding an OEBPS * publication. * <p> * Note that while an {@link EPUB} can technically contain multiple instances of an {@link Publication}, in practice * reading systems does not support this. * </p> * * @param file * the publication to add * @param type * the MIME type of the publication * @see #add(Publication) */ public void add(File file, String type) { String name = type.substring(type.lastIndexOf('/') + 1, type.length()).toUpperCase(); RootFiles rootFiles = ocfContainer.getRootfiles(); int count = rootFiles.getRootfiles().size(); if (count >= 1) { log("Multiple root files is unsupported by most reading systems!", Severity.WARNING); //$NON-NLS-1$ } String rootFileName = count > 0 ? name + "_" + count : name; //$NON-NLS-1$ rootFileName += File.separator + file.getName(); RootFile rootFile = OCFFactory.eINSTANCE.createRootFile(); rootFile.setFullPath(rootFileName); rootFile.setMediaType(type); rootFile.setPublication(file); rootFiles.getRootfiles().add(rootFile); log(MessageFormat.format(Messages.getString("EPUB.1"), rootFile.getFullPath(), //$NON-NLS-1$ rootFile.getMediaType()), Severity.VERBOSE); } /** * Adds a new OEBPS publication to the EPUB. Use {@link #add(File, String)} to add other types of content. * <p> * Note that while an {@link EPUB} can technically contain multiple instances of an {@link Publication}, in practice * reading systems does not support this. * </p> * * @param oebps * the publication to add. * @since 2.0 */ public void add(Publication oebps) { RootFiles rootFiles = ocfContainer.getRootfiles(); int count = rootFiles.getRootfiles().size(); if (count >= 1) { log("Multiple root files is unsupported by most reading systems!", Severity.WARNING); //$NON-NLS-1$ } String rootFileName = count > 0 ? "OEBPS_" + count : "OEBPS"; //$NON-NLS-1$ //$NON-NLS-2$ rootFileName += "/content.opf"; //$NON-NLS-1$ RootFile rootFile = OCFFactory.eINSTANCE.createRootFile(); rootFile.setFullPath(rootFileName); rootFile.setMediaType(MIMETYPE_OEBPS); rootFile.setPublication(oebps); rootFiles.getRootfiles().add(rootFile); log(MessageFormat.format(Messages.getString("EPUB.0"), rootFile.getFullPath(), //$NON-NLS-1$ rootFile.getMediaType()), Severity.VERBOSE); } /** * Utility method for deleting a folder recursively. * * @param folder * the folder to delete */ private void deleteFolder(File folder) { if (folder.isDirectory()) { String[] children = folder.list(); for (String element : children) { deleteFolder(new File(folder, element)); } } folder.delete(); } /** * Returns the container instance of the EPUB. * * @return the container instance */ public Container getContainer() { return ocfContainer; } /** * Returns a list of all <i>OPS publications</i> contained within the EPUB. Publications in unsupported versions * will not be returned. However their existence can still be determined by looking at the * {@link Container#getRootfiles()} result. * * @return a list of all OPS publications * @see {@link #getContainer()} for obtaining the root file container */ public List<Publication> getOPSPublications() { ArrayList<Publication> publications = new ArrayList<Publication>(); EList<RootFile> rootFiles = ocfContainer.getRootfiles().getRootfiles(); for (RootFile rootFile : rootFiles) { if (rootFile.getMediaType().equals(MIMETYPE_OEBPS)) { // May be null if the publications is in an unsupported format. if (rootFile.getPublication() != null) { publications.add((Publication) rootFile.getPublication()); } } } return publications; } /** * Use to check whether or not the specified file is in a supported format and can be opened as an EPUB. If it's not * an EPUB <code>false</code> will be returned. Note that this methods does not test the contents of the EPUB which * may or may not contain unsupported root files. * * @param epubFile * the target EPUB file * @return <code>true</code> if the file can be opened * @throws IOException */ public boolean isEPUB(File epubFile) throws IOException { String mimeType = EPUBFileUtil.getMimeType(epubFile); if (mimeType.equals(MIMETYPE_EPUB)) { return isEPUB(new FileInputStream(epubFile)); } return false; } private static final int BUFFERSIZE = 2048; /** * Used to verify that the given {@link InputStream} contents is an EPUB. As per specification the first entry in * the file must be named "mimetype" and contain the string <i>application/epub+zip</i>. Further verification is not * done at this stage. * * @param inputStream * the EPUB input stream * @return <code>true</code> if the file is an EPUB file * @throws IOException */ public static boolean isEPUB(InputStream inputStream) throws IOException { ZipInputStream in = new ZipInputStream(inputStream); try { byte[] buf = new byte[BUFFERSIZE]; ZipEntry entry = null; if ((entry = in.getNextEntry()) != null) { String entryName = entry.getName(); if (entryName.equals("mimetype")) { //$NON-NLS-1$ String type = new String(); while ((in.read(buf, 0, BUFFERSIZE)) > 0) { type = type + new String(buf); } if (type.trim().equals(EPUB.MIMETYPE_EPUB)) { return true; } } } } catch (IOException e) { return false; } finally { in.close(); } return false; } /** * Determines the publication version of the root file. * * @param rootFile * the root file * @return the publication version */ private PublicationVersion readPublicationVersion(File rootFile) { try { SAXParserFactory factory = SAXParserFactory.newInstance(); VersionDetector vd = new VersionDetector(); SAXParser parser = factory.newSAXParser(); parser.parse(rootFile, vd); if (vd.versionString == null) { return PublicationVersion.UNKNOWN; } String[] segments = vd.versionString.split("\\."); //$NON-NLS-1$ if (segments[0].equals("2") && segments[1].equals("0")) { //$NON-NLS-1$ //$NON-NLS-2$ return PublicationVersion.V2; } else if (segments[0].equals("3") && segments[1].equals("0")) { //$NON-NLS-1$ //$NON-NLS-2$ return PublicationVersion.V3; } else { return PublicationVersion.UNKNOWN; } } catch (ParserConfigurationException | SAXException | IOException e) { return PublicationVersion.UNKNOWN; } } private void log(String message, Severity severity) { if (logger != null) { logger.log(message, severity); } } /** * Assembles the EPUB file using a temporary working folder. The folder will be deleted as soon as the assembly has * completed. * * @param epubFile * the target EPUB file * @throws Exception */ public File pack(File epubFile) throws Exception { File workingFolder = File.createTempFile("epub_", null); //$NON-NLS-1$ if (workingFolder.delete() && workingFolder.mkdirs()) { pack(epubFile, workingFolder); } deleteFolder(workingFolder); return workingFolder; } /** * Assembles the EPUB file using the specified working folder. The contents of the working folder will <b>not</b> be * removed when the operation has completed. If the temporary data is not interesting, use {@link #pack(File)} * instead. * * @param epubFile * the target EPUB file * @param rootFolder * the root folder holding all the EPUB contents * @throws Exception * @see {@link #pack(File)} */ public void pack(File epubFile, File rootFolder) throws Exception { if (ocfContainer.getRootfiles().getRootfiles().isEmpty()) { throw new ValidationException("EPUB does not contain any publications"); //$NON-NLS-1$ } rootFolder.mkdirs(); if (rootFolder.isDirectory() || rootFolder.mkdirs()) { writeOCF(rootFolder); EList<RootFile> publications = ocfContainer.getRootfiles().getRootfiles(); log(MessageFormat.format(Messages.getString("EPUB.2"), epubFile.getAbsolutePath()), Severity.INFO); //$NON-NLS-1$ for (RootFile rootFile : publications) { Object publication = rootFile.getPublication(); File root = new File(rootFolder.getAbsolutePath() + File.separator + rootFile.getFullPath()); if (publication instanceof Publication) { ((Publication) publication).pack(root); } else { if (rootFile.getPublication() instanceof File) { EPUBFileUtil.copy((File) rootFile.getPublication(), root); } else { throw new IllegalArgumentException("Unknown publication type in root file"); //$NON-NLS-1$ } } } EPUBFileUtil.zip(epubFile, rootFolder); log(MessageFormat.format(Messages.getString("EPUB.3"), //$NON-NLS-1$ publications.size()), Severity.INFO); } else { throw new IOException("Could not create working folder in " + rootFolder.getAbsolutePath()); //$NON-NLS-1$ } } /** * Reads the <i>Open Container Format (OCF)</i> formatted list of contents of this EPUB. The result of this * operation is placed in the {@link #ocfContainer} instance. * * @param rootFolder * the folder where the EPUB was unpacked * @throws IOException * @see {@link #unpack(File)} * @see {@link #unpack(File, File)} * @see <a href="http://idpf.org/epub/30/spec/epub30-ocf.html">EPUB3 OCF specification</a> * @see <a href="http://idpf.org/epub/20/spec/OCF_2.0.1_draft.doc">EPUB2 OCF specification</a> */ protected void readOCF(File rootFolder) throws IOException { // These file names are listed in the OCF specification and must not be // changed. File metaFolder = new File(rootFolder.getAbsolutePath() + File.separator + "META-INF"); //$NON-NLS-1$ File containerFile = new File(metaFolder.getAbsolutePath() + File.separator + "container.xml"); //$NON-NLS-1$ ResourceSet resourceSet = new ResourceSetImpl(); URI fileURI = URI.createFileURI(containerFile.getAbsolutePath()); Resource resource = resourceSet.createResource(fileURI); resource.load(null); ocfContainer = (Container) resource.getContents().get(0); } /** * Registers a new resource factory for OCF data structures. This is normally done through Eclipse extension points * but we also need to be able to create this factory without the Eclipse runtime. */ private void registerOCFResourceFactory() { // Register package so that it is available even without the Eclipse // runtime @SuppressWarnings("unused") OCFPackage packageInstance = OCFPackage.eINSTANCE; // Register the file suffix Resource.Factory.Registry.INSTANCE.getExtensionToFactoryMap().put(OCF_FILE_SUFFIX, new XMLResourceFactoryImpl() { @Override public Resource createResource(URI uri) { OCFResourceImpl xmiResource = new OCFResourceImpl(uri); Map<Object, Object> loadOptions = xmiResource.getDefaultLoadOptions(); Map<Object, Object> saveOptions = xmiResource.getDefaultSaveOptions(); // We use extended metadata saveOptions.put(XMLResource.OPTION_EXTENDED_META_DATA, Boolean.TRUE); loadOptions.put(XMLResource.OPTION_EXTENDED_META_DATA, Boolean.TRUE); // Required in order to correctly read in attributes loadOptions.put(XMLResource.OPTION_LAX_FEATURE_PROCESSING, Boolean.TRUE); // Treat "href" attributes as features loadOptions.put(XMLResource.OPTION_USE_ENCODED_ATTRIBUTE_STYLE, Boolean.TRUE); // UTF-8 encoding is required per specification saveOptions.put(XMLResource.OPTION_ENCODING, OCF_FILE_ENCODING); // Do not download any external DTDs. Map<String, Object> parserFeatures = new HashMap<String, Object>(); parserFeatures.put("http://xml.org/sax/features/validation", Boolean.FALSE); //$NON-NLS-1$ parserFeatures.put("http://apache.org/xml/features/nonvalidating/load-external-dtd", //$NON-NLS-1$ Boolean.FALSE); loadOptions.put(XMLResource.OPTION_PARSER_FEATURES, parserFeatures); return xmiResource; } }); } /** * Unpacks the EPUB file to a temporary location and populates the data model with the content. * * @param epubFile * the EPUB file to unpack * @return the location when the EPUB is unpacked * @throws Exception * @see {@link #unpack(File, File)} */ public File unpack(File epubFile) throws Exception { File workingFolder = File.createTempFile("epub_", null); //$NON-NLS-1$ workingFolder.deleteOnExit(); // XXX: Avoid using deleteOnExit() if (workingFolder.delete() && workingFolder.mkdirs()) { unpack(epubFile, workingFolder); } return workingFolder; } /** * Unpacks the given EPUB file into the specified destination and populates the data model with the content. Note * that when the destination folder already exists or is empty the file EPUB will not be unpacked or verified, but * the contents of the destination will be treated as an already unpacked EPUB. If this behaviour is not desired one * should take steps to delete the folder prior to unpacking. * <p> * When performing the unpacking, the modification date of the destination folder will be set to the modification * date of the source EPUB. Additionally the contents of the EPUB will retain the original modification date if set. * </p> * <p> * Multiple OPS root files in the publication will populate the OCF container instance with one {@link Publication} * for each as expected. The contents of the data model starting with the OCF container will be replaced. If the * publication is in an unsupported version it will not be added to the data model. * </p> * * @param epubFile * the EPUB file to unpack * @param rootFolder * the destination folder * @throws Exception * @see {@link #unpack(File)} when destination is not interesting * @see {@link #getContainer()} to obtain the container instance * @see {@link #getOPSPublications()} to get a list of all contained OPS publications */ public void unpack(File epubFile, File rootFolder) throws Exception { if (!isEPUB(epubFile)) { throw new IllegalArgumentException(MessageFormat.format("{0} is not an EPUB file", epubFile)); //$NON-NLS-1$ } if (!rootFolder.exists() || rootFolder.list().length == 0) { EPUBFileUtil.unzip(epubFile, rootFolder); } readOCF(rootFolder); EList<RootFile> rootFiles = ocfContainer.getRootfiles().getRootfiles(); for (RootFile rootFile : rootFiles) { if (rootFile.getMediaType().equals(MIMETYPE_OEBPS)) { File root = new File(rootFolder.getAbsolutePath() + File.separator + rootFile.getFullPath()); switch (readPublicationVersion(root)) { case V2: Publication ops2 = Publication.getVersion2Instance(logger); ops2.unpack(root); rootFile.setPublication(ops2); break; case V3: Publication ops3 = Publication.getVersion3Instance(); ops3.unpack(root); rootFile.setPublication(ops3); break; default: log(MessageFormat.format("Unsupported OEBPS version in root file {0}", rootFile.getFullPath()), //$NON-NLS-1$ Severity.WARNING); break; } } } } /** * Creates a new folder named META-INF and writes the required (as per the OPS specification) <b>container.xml</b> * in that folder. This is part of the packing procedure. * * @param rootFolder * the root folder * @see <a href="http://idpf.org/epub/30/spec/epub30-ocf.html">EPUB3 OCF specification</a> * @see <a href="http://idpf.org/epub/20/spec/OCF_2.0.1_draft.doc">EPUB2 OCF specification</a> */ private void writeOCF(File rootFolder) throws IOException { File metaFolder = new File(rootFolder.getAbsolutePath() + File.separator + "META-INF"); //$NON-NLS-1$ if (metaFolder.mkdir()) { File containerFile = new File(metaFolder.getAbsolutePath() + File.separator + "container.xml"); //$NON-NLS-1$ ResourceSet resourceSet = new ResourceSetImpl(); // Register the packages to make it available during loading. URI fileURI = URI.createFileURI(containerFile.getAbsolutePath()); Resource resource = resourceSet.createResource(fileURI); resource.getContents().add(ocfContainer); resource.save(null); } } }