package nl.siegmann.epublib.epub; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.util.Arrays; import java.util.Collection; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; import nl.siegmann.epublib.Constants; import nl.siegmann.epublib.domain.Book; import nl.siegmann.epublib.domain.MediaType; import nl.siegmann.epublib.domain.Resource; import nl.siegmann.epublib.domain.Resources; import nl.siegmann.epublib.service.MediatypeService; import nl.siegmann.epublib.util.ResourceUtil; import org.rr.commons.utils.StringUtil; import org.w3c.dom.Document; import org.w3c.dom.Element; /** * Reads an epub file. * * @author paul * */ public class EpubReader { private static final Logger log = Logger.getLogger(EpubReader.class.getName()); private BookProcessor bookProcessor = BookProcessor.IDENTITY_BOOKPROCESSOR; public Book readEpub(InputStream in, String name) throws IOException { return readEpub(in, Constants.ENCODING, name); } public Book readEpub(ZipInputStream in, String name) throws IOException { return readEpub(in, Constants.ENCODING, name); } /** * Read epub from inputstream * * @param in the inputstream from which to read the epub * @param encoding the encoding to use for the html files within the epub * @return * @throws IOException */ public Book readEpub(InputStream in, String name, String encoding) throws IOException { return readEpub(new ZipInputStream(in), encoding); } /** * Reads this EPUB without loading all resources into memory. * * @param name the file to load * @param encoding the encoding for XHTML files * @param lazyLoadedTypes a list of the MediaType to load lazily * @return * @throws IOException */ public Book readEpubLazy( String name, String encoding, List<MediaType> lazyLoadedTypes ) throws IOException { Book result = new Book(name); Resources resources = readLazyResources(name, encoding, lazyLoadedTypes); handleMimeType(result, resources); String packageResourceHref = getPackageResourceHref(resources, result); Resource packageResource = processPackageResource(packageResourceHref, result, resources); result.setOpfResource(packageResource); Resource ncxResource = processNcxResource(packageResource, result); result.setNcxResource(ncxResource); result = postProcessBook(result); return result; } /** * Reads this EPUB without loading any resources into memory. * * @param fileName the file to load * @param encoding the encoding for XHTML files * * @return * @throws IOException */ public Book readEpubLazy( String fileName, String encoding ) throws IOException { return readEpubLazy(fileName, encoding, Arrays.asList(MediatypeService.mediatypes) ); } public Book readEpub(ZipInputStream in, String encoding, String name) throws IOException { Resources resources = readResources(in, encoding); return readEpub(resources, encoding, name); } public Book readEpub( Resources resources, String encoding, String name ) throws IOException { Book result = new Book(name); handleMimeType(result, resources); String packageResourceHref = getPackageResourceHref(resources, result); Resource packageResource = processPackageResource(packageResourceHref, result, resources); result.setOpfResource(packageResource); Resource ncxResource = processNcxResource(packageResource, result); result.setNcxResource(ncxResource); Resources unlistedResources = processUnlistedResources(result, resources); result.setUnlistedResources(unlistedResources); result = postProcessBook(result); return result; } private Book postProcessBook(Book book) { if (bookProcessor != null) { book = bookProcessor.processBook(book); } return book; } private Resource processNcxResource(Resource packageResource, Book book) { return NCXDocument.read(book, this); } private Resources processUnlistedResources(Book book, Resources allResources) { Resources result = new Resources(); Resources resources = book.getResources(); Collection<Resource> allResourcesCollection = allResources.getAll(); for (Resource resource : allResourcesCollection) { if( !resources.containsByHref(resource.getHref()) ) { resource.setPackageHref(null); result.add(resource); // log.warning((book.getName() != null ? book.getName() : "Resource") + " with href '" + resource.getHref() + "' is not a registered member"); } } return result; } private Resource processPackageResource(String packageResourceHref, Book book, Resources resources) { Resource packageResource = resources.remove(packageResourceHref); try { if(packageResource != null) { PackageDocumentReader.read(packageResource, this, book, resources); } else { log.log(Level.WARNING, "Reference " + packageResourceHref + " for epub " + book.getName() + " did not exists"); } } catch (Exception e) { log.log(Level.WARNING, e.getMessage() + " " + packageResourceHref + " for epub " + book.getName(), e); } return packageResource; } private String getPackageResourceHref(Resources resources, Book book) { String defaultResult = "OEBPS/content.opf"; String result = defaultResult; Resource containerResource = resources.remove("META-INF/container.xml"); if(containerResource == null) { return result; } try { Document document = ResourceUtil.getAsDocument(containerResource); Element rootFileElement = (Element) ((Element) document.getDocumentElement().getElementsByTagName("rootfiles").item(0)).getElementsByTagName("rootfile").item(0); result = rootFileElement.getAttribute("full-path"); } catch (Exception e) { log.log(Level.WARNING, e.getMessage() + " META-INF/container.xml for epub " + book.getName(), e); } if(StringUtil.isEmpty(result)) { result = defaultResult; } return result; } private void handleMimeType(Book result, Resources resources) { resources.remove("mimetype"); } private Resources readLazyResources( String name, String defaultHtmlEncoding, List<MediaType> lazyLoadedTypes) throws IOException { ZipInputStream in = new ZipInputStream(new FileInputStream(name)); Resources result = new Resources(); for(ZipEntry zipEntry = in.getNextEntry(); zipEntry != null; zipEntry = in.getNextEntry()) { if(zipEntry.isDirectory()) { continue; } String href = zipEntry.getName(); MediaType mediaType = MediatypeService.determineMediaType(href); Resource resource; if ( lazyLoadedTypes.contains(mediaType) ) { resource = new Resource(name, zipEntry.getSize(), href); } else { resource = new Resource( in, href ); } if(resource.getMediaType() == MediatypeService.XHTML) { resource.setInputEncoding(defaultHtmlEncoding); } result.add(resource); } return result; } private Resources readResources(ZipInputStream in, String defaultHtmlEncoding) throws IOException { try { Resources result = new Resources(); for(ZipEntry zipEntry = in.getNextEntry(); zipEntry != null; zipEntry = in.getNextEntry()) { if(zipEntry.isDirectory()) { continue; } Resource resource = ResourceUtil.createResource(zipEntry, in); if(resource.getMediaType() == MediatypeService.XHTML) { resource.setInputEncoding(defaultHtmlEncoding); } result.add(resource); } return result; } finally { //need to be closed after reading. Otherwise the file stays locked. try { in.close(); } catch(IOException e) {/*quietly*/} } } }