package nl.siegmann.epublib.epub;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Enumeration;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import java.util.zip.ZipInputStream;
import java.nio.charset.Charset;
import nl.siegmann.epublib.Constants;
import nl.siegmann.epublib.domain.Book;
import nl.siegmann.epublib.domain.MediaType;
import nl.siegmann.epublib.domain.Resource;
import nl.siegmann.epublib.domain.Resources;
import nl.siegmann.epublib.service.MediatypeService;
import nl.siegmann.epublib.util.ResourceUtil;
import nl.siegmann.epublib.util.StringUtil;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
/**
* Reads an epub file.
*
* @author paul
*
*/
public class EpubReader {
private BookProcessor bookProcessor = BookProcessor.IDENTITY_BOOKPROCESSOR;
public Book readEpub(InputStream in) throws IOException {
return readEpub(in, Constants.CHARACTER_ENCODING);
}
public Book readEpub(ZipInputStream in) throws IOException {
return readEpub(in, Constants.CHARACTER_ENCODING);
}
public Book readEpub(ZipFile zipfile) throws IOException {
return readEpub(zipfile, Constants.CHARACTER_ENCODING);
}
/**
* Read epub from inputstream
*
* @param in the inputstream from which to read the epub
* @param encoding the encoding to use for the html files within the epub
* @return the Book as read from the inputstream
* @throws IOException
*/
public Book readEpub(InputStream in, String encoding) throws IOException {
return readEpub(new ZipInputStream(in, Charset.forName("UTF-8")), encoding);
}
/**
* Reads this EPUB without loading all resources into memory.
*
* @param fileName the file to load
* @param encoding the encoding for XHTML files
* @param lazyLoadedTypes a list of the MediaType to load lazily
* @return this Book without loading all resources into memory.
* @throws IOException
*/
public Book readEpubLazy( String fileName, String encoding, List<MediaType> lazyLoadedTypes ) throws IOException {
Book result = new Book();
Resources resources = readLazyResources(fileName, encoding, lazyLoadedTypes);
handleMimeType(result, resources);
String packageResourceHref = getPackageResourceHref(resources);
Resource packageResource = processPackageResource(packageResourceHref, result, resources);
result.setOpfResource(packageResource);
Resource ncxResource = processNcxResource(packageResource, result);
result.setNcxResource(ncxResource);
result = postProcessBook(result);
return result;
}
/**
* Reads this EPUB without loading any resources into memory.
*
* @param fileName the file to load
* @param encoding the encoding for XHTML files
*
* @return this Book without loading all resources into memory.
* @throws IOException
*/
public Book readEpubLazy( String fileName, String encoding ) throws IOException {
return readEpubLazy(fileName, encoding, Arrays.asList(MediatypeService.mediatypes) );
}
public Book readEpub(ZipInputStream in, String encoding) throws IOException {
return readEpubResources(readResources(in, encoding));
}
public Book readEpub(ZipFile in, String encoding) throws IOException {
return readEpubResources(readResources(in, encoding));
}
public Book readEpubResources(Resources resources) throws IOException{
Book result = new Book();
handleMimeType(result, resources);
String packageResourceHref = getPackageResourceHref(resources);
Resource packageResource = processPackageResource(packageResourceHref, result, resources);
result.setOpfResource(packageResource);
Resource ncxResource = processNcxResource(packageResource, result);
result.setNcxResource(ncxResource);
result = postProcessBook(result);
return result;
}
private Book postProcessBook(Book book) {
if (bookProcessor != null) {
book = bookProcessor.processBook(book);
}
return book;
}
private Resource processNcxResource(Resource packageResource, Book book) {
return NCXDocument.read(book, this);
}
private Resource processPackageResource(String packageResourceHref, Book book, Resources resources) {
Resource packageResource = resources.remove(packageResourceHref);
try {
PackageDocumentReader.read(packageResource, this, book, resources);
} catch (Exception e) {
}
return packageResource;
}
private String getPackageResourceHref(Resources resources) {
String defaultResult = "OEBPS/content.opf";
String result = defaultResult;
Resource containerResource = resources.remove("META-INF/container.xml");
if(containerResource == null) {
return result;
}
try {
Document document = ResourceUtil.getAsDocument(containerResource);
Element rootFileElement = (Element) ((Element) document.getDocumentElement().getElementsByTagName("rootfiles").item(0)).getElementsByTagName("rootfile").item(0);
result = rootFileElement.getAttribute("full-path");
} catch (Exception e) {
}
if(StringUtil.isBlank(result)) {
result = defaultResult;
}
return result;
}
private void handleMimeType(Book result, Resources resources) {
resources.remove("mimetype");
}
private Resources readLazyResources( String fileName, String defaultHtmlEncoding,
List<MediaType> lazyLoadedTypes) throws IOException {
ZipInputStream in = new ZipInputStream(new FileInputStream(fileName), Charset.forName("UTF-8"));
Resources result = new Resources();
for(ZipEntry zipEntry = in.getNextEntry(); zipEntry != null; zipEntry = in.getNextEntry()) {
if(zipEntry.isDirectory()) {
continue;
}
String href = zipEntry.getName();
MediaType mediaType = MediatypeService.determineMediaType(href);
Resource resource;
if ( lazyLoadedTypes.contains(mediaType) ) {
resource = new Resource(fileName, zipEntry.getSize(), href);
} else {
resource = new Resource( in, fileName, (int) zipEntry.getSize(), href );
}
if(resource.getMediaType() == MediatypeService.XHTML) {
resource.setInputEncoding(defaultHtmlEncoding);
}
result.add(resource);
}
return result;
}
private Resources readResources(ZipInputStream in, String defaultHtmlEncoding) throws IOException {
Resources result = new Resources();
for(ZipEntry zipEntry = in.getNextEntry(); zipEntry != null; zipEntry = in.getNextEntry()) {
if(zipEntry.isDirectory()) {
continue;
}
Resource resource = ResourceUtil.createResource(zipEntry, in);
if(resource.getMediaType() == MediatypeService.XHTML) {
resource.setInputEncoding(defaultHtmlEncoding);
}
result.add(resource);
}
return result;
}
private Resources readResources(ZipFile zipFile, String defaultHtmlEncoding) throws IOException {
Resources result = new Resources();
Enumeration<? extends ZipEntry> entries = zipFile.entries();
while(entries.hasMoreElements()){
ZipEntry zipEntry = entries.nextElement();
if(zipEntry != null && !zipEntry.isDirectory()){
Resource resource = ResourceUtil.createResource(zipEntry, zipFile.getInputStream(zipEntry));
if(resource.getMediaType() == MediatypeService.XHTML) {
resource.setInputEncoding(defaultHtmlEncoding);
}
result.add(resource);
}
}
return result;
}
}