/*
* Copyright (c) 2013 Allogy Interactive.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.allogy.app.media;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import org.xml.sax.Attributes;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
/**
* This class extracts and parses and EPUB format EBook
*
* @author Yazen Ghannam
*/
public class EPub extends EBook {
private final String LOG_TAG = EPub.class.getName();
// File paths required for parsing epub
private static String mTempPath = "/sdcard/temp/epubReader/";
private static String mContainerPath = mTempPath + "META-INF/container.xml";
private static String mOpsPath = mTempPath + "OPS/";
private static String mOpfPath;
private final static int BUFFER = 2048;
private String mSectionContent;
private int numPages;
private int bookLength;
private ArrayList<String> TabOfConts;
private ArrayList<String> Sections;
private MetaData metadata;
private static XMLparser parser;
// File paths to all content and image files.
private static ArrayList<String> contentPaths = new ArrayList<String>();
private static ArrayList<String> imagePaths = new ArrayList<String>();
public EPub(String filename) {
ExtractBook(filename);
System.setProperty("org.xml.sax.driver", "org.xmlpull.v1.sax2.Driver");
parser = new XMLparser();
// Parse the container file and return the location of the OPF
mOpfPath = mTempPath + parser.parseContainer(mContainerPath);
// Parse the packaging file and store all the content paths
TabOfConts = new ArrayList<String>();
parser.parsePackaging(mOpfPath);
Sections = new ArrayList<String>();
bookLength = 0;
// Loop through content paths and parse each content file
for (String s : contentPaths) {
mSectionContent = new String();
parser.parseSection(mOpsPath + s);
Sections.add(mSectionContent);
bookLength += mSectionContent.length();
}
}
/*
* Returns the Array list that contains all of the epub's text
*/
public ArrayList<String> getSections() {
return Sections;
}
/*
* Returns the epub's table of contents
*/
protected ArrayList<String> getTabOfConts() {
return TabOfConts;
}
// Extracts the epub archive to local disk
private static void ExtractBook(String path) {
try {
File temp = new File(mTempPath);
temp.mkdirs();
BufferedOutputStream dest = null;
File fin = new File(path);
ZipInputStream zin =
new ZipInputStream(new BufferedInputStream(new FileInputStream(fin)));
ZipEntry entry;
// Loop through all files in the archive
while ((entry = zin.getNextEntry()) != null) {
if (entry.isDirectory()) {
new File(mTempPath + "//" + entry.getName()).mkdir();
} else {
FileOutputStream fout =
new FileOutputStream(mTempPath + "\\" + entry.getName());
dest = new BufferedOutputStream(fout, BUFFER);
int count;
byte data[] = new byte[BUFFER];
while ((count = zin.read(data, 0, BUFFER)) != -1) {
dest.write(data, 0, count);
}
dest.flush();
dest.close();
}
}
zin.close();
} catch (Exception e) {
// TODO: proper error handling
}
}
public class XMLparser extends DefaultHandler {
/**
* @param args
*/
int count;
DefaultHandler contentHandler;
PrintStream fout;
public String type = null;
public String mOpfPath = null;
public String elementName = "x";
// Booleans for HTML tags
public boolean paragraph;
public boolean link;
public boolean body;
// Used to parse the "container.xml" file
public String parseContainer(String mContainerPath) {
contentHandler = new ContainerContentHandler();
perform(mContainerPath);
return mOpfPath;
}
// Used to parse the OPF file
public void parsePackaging(String packagingPath) {
metadata = new MetaData();
contentHandler = new PackagingContentHandler();
perform(packagingPath);
}
// Used to parse the content files
public void parseSection(String sectionPath) {
contentHandler = new SectionContentHandler();
perform(sectionPath);
}
public void perform(String uri) {
try {
XMLReader sp = XMLReaderFactory.createXMLReader();
sp.setEntityResolver(new DTDResolver());
sp.setContentHandler(contentHandler);
sp.parse(uri);
}
catch (IOException e) {
// TODO: proper error handling
}
catch (SAXException e) {
// TODO: proper error handling
}
}
class ContainerContentHandler extends DefaultHandler {
@Override
public void startElement(String namespaceURI, String localName,
String rawName, Attributes atts) throws SAXException {
for (int i = 0; i < atts.getLength(); i++)
// Return path to "OPF" file, only applies when parsing
// "container.xml"
if (atts.getValue(i).contains(".opf")) mOpfPath = atts.getValue(i);
}
}
class PackagingContentHandler extends DefaultHandler {
@Override
public void characters(char[] ch, int start, int end) throws SAXException {
String s = new String(ch, start, end);
// TODO: Add all MetaData according to EPUB Specifications
// Parse MetaData information from OPF
if (elementName != null) {
if (elementName.contains("title")) {
if (metadata.title == null)
metadata.title = s;
else
metadata.title = metadata.title.concat(s);
}
if (elementName.contains("creator")) {
if (metadata.creator == null)
metadata.creator = s;
else
metadata.creator = metadata.creator.concat(s);
}
}
}
@Override
public void endElement(String namespaceURI, String localName,
String rawName) throws SAXException {
if (localName.equals("a"))
link = false;
else if (localName.equals("p")) {
fout.println("\n");
paragraph = false;
}
}
@Override
public void startElement(String namespaceURI, String localName,
String rawName, Attributes atts) throws SAXException {
String ref = null;
String id = null;
for (int i = 0; i < atts.getLength(); i++) {
if (rawName.compareTo("item") == 0) {
if (atts.getLocalName(i).compareTo("href") == 0)
ref = atts.getValue(i);
else if (atts.getLocalName(i).compareTo("id") == 0)
id = atts.getValue(i);
else if (atts.getLocalName(i).compareTo("media-type") == 0)
if (atts.getValue(i).contains("xhtml")) {
contentPaths.add(ref);
TabOfConts.add(id);
} else if (atts.getValue(i).contains("image"))
imagePaths.add(ref);
}
}
}
}
class SectionContentHandler extends DefaultHandler {
public void characters(char[] ch, int start, int end) throws SAXException {
String s = new String(ch, start, end);
if (body) mSectionContent = mSectionContent.concat(s);
}
@Override
public void endElement(String namespaceURI, String localName,
String rawName) throws SAXException {
if (localName.equals("a"))
link = false;
else if (localName.equals("body")) {
mSectionContent = mSectionContent.concat("\n");
body = false;
}
}
@Override
public void startElement(String namespaceURI, String localName,
String rawName, Attributes atts) throws SAXException {
if (localName.equals("body")) body = true;
}
}
// Used to look up XHTML DTD locally
public class DTDResolver implements EntityResolver {
@Override
public InputSource resolveEntity(String publicID, String systemID)
throws SAXException {
if (systemID.equals("http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd")) {
return new InputSource("/res/raw/dtd/xhtml11.dtd");
} else if (systemID
.equals("http://www.w3.org/TR/xhtml-modularization/DTD/xhtml-inlstyle-1.mod")) {
return new InputSource("/res/raw/dtd/xhtml-inlstyle-1.mod");
} else if (systemID
.equals("http://www.w3.org/TR/xhtml-modularization/DTD/xhtml-framework-1.mod")) {
return new InputSource("/res/raw/dtd/xhtml-framework-1.mod");
} else if (systemID
.equals("http://www.w3.org/TR/xhtml-modularization/DTD/xhtml-text-1.mod")) {
return new InputSource("/res/raw/dtd/xhtml-text-1.mod");
} else if (systemID
.equals("http://www.w3.org/TR/xhtml-modularization/DTD/xhtml-hypertext-1.mod")) {
return new InputSource("/res/raw/dtd/xhtml-hypertext-1.mod");
} else if (systemID
.equals("http://www.w3.org/TR/xhtml-modularization/DTD/xhtml-list-1.mod")) {
return new InputSource("/res/raw/dtd/xhtml-list-1.mod");
} else if (systemID
.equals("http://www.w3.org/TR/xhtml-modularization/DTD/xhtml-edit-1.mod")) {
return new InputSource("/res/raw/dtd/xhtml-edit-1.mod");
} else if (systemID
.equals("http://www.w3.org/TR/xhtml-modularization/DTD/xhtml-bdo-1.mod")) {
return new InputSource("/res/raw/dtd/xhtml-bdo-1.mod");
} else if (systemID
.equals("http://www.w3.org/TR/xhtml-modularization/DTD/xhtml-pres-1.mod")) {
return new InputSource("/res/raw/dtd/xhtml-pres-1.mod");
} else if (systemID
.equals("http://www.w3.org/TR/xhtml-modularization/DTD/xhtml-inlpres-1.mod")) {
return new InputSource("/res/raw/dtd/xhtml-inlpres-1.mod");
} else if (systemID
.equals("http://www.w3.org/TR/xhtml-modularization/DTD/xhtml-link-1.mod")) {
return new InputSource("/res/raw/dtd/xhtml-link-1.mod");
} else if (systemID
.equals("http://www.w3.org/TR/xhtml-modularization/DTD/xhtml-meta-1.mod")) {
return new InputSource("/res/raw/dtd/xhtml-meta-1.mod");
} else if (systemID
.equals("http://www.w3.org/TR/xhtml-modularization/DTD/xhtml-base-1.mod")) {
return new InputSource("/res/raw/dtd/xhtml-base-1.mod");
} else if (systemID
.equals("http://www.w3.org/TR/xhtml-modularization/DTD/xhtml-script-1.mod")) {
return new InputSource("/res/raw/dtd/xhtml-script-1.mod");
} else if (systemID
.equals("http://www.w3.org/TR/xhtml-modularization/DTD/xhtml-style-1.mod")) {
return new InputSource("/res/raw/dtd/xhtml-style-1.mod");
} else if (systemID
.equals("http://www.w3.org/TR/xhtml-modularization/DTD/xhtml-image-1.mod")) {
return new InputSource("/res/raw/dtd/xhtml-image-1.mod");
} else if (systemID
.equals("http://www.w3.org/TR/xhtml-modularization/DTD/xhtml-csismap-1.mod")) {
return new InputSource("/res/raw/dtd/xhtml-csismap-1.mod");
} else if (systemID
.equals("http://www.w3.org/TR/xhtml-modularization/DTD/xhtml-ssismap-1.mod")) {
return new InputSource("/res/raw/dtd/xhtml-ssismap-1.mod");
} else if (systemID
.equals("http://www.w3.org/TR/xhtml-modularization/DTD/xhtml-param-1.mod")) {
return new InputSource("/res/raw/dtd/xhtml-param-1.mod");
} else if (systemID
.equals("http://www.w3.org/TR/xhtml-modularization/DTD/xhtml-object-1.mod")) {
return new InputSource("/res/raw/dtd/xhtml-object-1.mod");
} else if (systemID
.equals("http://www.w3.org/TR/xhtml-modularization/DTD/xhtml-table-1.mod")) {
return new InputSource("/res/raw/dtd/xhtml-table-1.mod");
} else if (systemID
.equals("http://www.w3.org/TR/xhtml-modularization/DTD/xhtml-form-1.mod")) {
return new InputSource("/res/raw/dtd/xhtml-form-1.mod");
} else if (systemID
.equals("http://www.w3.org/TR/xhtml-modularization/DTD/xhtml-struct-1.mod")) {
return new InputSource("/res/raw/dtd/xhtml-struct-1.mod");
}
return null;
}
}
}
public class MetaData {
public String title;
public String creator;
public ArrayList<String> subjects = new ArrayList<String>();
public String description;
public String publisher;
public String contributor;
public String date;
public String type;
public String format;
public String identifier;
public String source;
public String language;
public String relation;
public String converage;
public String rights;
}
}