package org.rr.jeborker.metadata;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.List;
import org.apache.commons.io.IOUtils;
import org.rr.commons.log.LoggerFactory;
import org.rr.commons.mufs.IResourceHandler;
import org.rr.commons.utils.CommonUtils;
import org.rr.commons.utils.DateConversionUtils;
import org.rr.commons.utils.ListUtils;
import org.rr.commons.utils.StringUtil;
import org.rr.commons.utils.compression.CompressedDataEntry;
import org.rr.commons.utils.compression.FileEntryFilter;
import org.rr.commons.utils.compression.zip.LazyZipEntryStream;
import org.rr.commons.utils.compression.zip.ZipUtils;
import org.rr.jeborker.db.item.EbookPropertyItem;
import org.rr.jeborker.metadata.IMetadataReader.COMMON_METADATA_TYPES;
import nl.siegmann.epublib.domain.Book;
import nl.siegmann.epublib.domain.Identifier;
import nl.siegmann.epublib.domain.Resource;
import nl.siegmann.epublib.domain.Resources;
import nl.siegmann.epublib.epub.EpubReader;
abstract class AEpubMetadataHandler extends AMetadataHandler {
private IResourceHandler ebookResourceHandler;
private Date ebookResourceHandlerTimestamp;
private byte[] containerOpfData = null;
private String opfFileName = null;
protected static interface MetadataEntryType {
String getName();
void fillItem(MetadataProperty metadataProperty, EbookPropertyItem item);
}
static enum EPUB_METADATA_TYPES implements MetadataEntryType {
JB_AGE_SUGGESTION {
public String getName() {
return "jeboorker:age_suggestion";
}
public void fillItem(MetadataProperty metadataProperty, EbookPropertyItem item) {
item.setAgeSuggestion(metadataProperty.getValueAsString());
}
},JB_KEYWORDS {
public String getName() {
return "jeboorker:keywords";
}
public void fillItem(MetadataProperty metadataProperty, EbookPropertyItem item) {
List<String> keywords = ListUtils.split(metadataProperty.getValueAsString(), ",");
item.setKeywords(keywords);
}
},CALIBRE_RATING {
public String getName() {
return "calibre:rating";
}
public void fillItem(MetadataProperty metadataProperty, EbookPropertyItem item) {
Number number = CommonUtils.toNumber(metadataProperty.getValueAsString());
item.setRating(number != null ? number.intValue() : null);
}
},CALIBRE_SERIES_INDEX {
public String getName() {
return "calibre:series_index";
}
public void fillItem(MetadataProperty metadataProperty, EbookPropertyItem item) {
item.setSeriesIndex(metadataProperty.getValueAsString());
}
},CALIBRE_SERIES {
public String getName() {
return "calibre:series";
}
public void fillItem(MetadataProperty metadataProperty, EbookPropertyItem item) {
COMMON_METADATA_TYPES.SERIES_NAME.fillItem(metadataProperty, item);
}
},SUBJECT {
public String getName() {
return "subject";
}
@Override
public void fillItem(MetadataProperty metadataProperty, EbookPropertyItem item) {
COMMON_METADATA_TYPES.GENRE.fillItem(metadataProperty, item);
}
},PUBLISHER {
public String getName() {
return "publisher";
}
public void fillItem(MetadataProperty metadataProperty, EbookPropertyItem item) {
item.setPublisher(metadataProperty.getValueAsString());
}
},IDENTIFIER {
public String getName() {
return "identifier";
}
@SuppressWarnings("unchecked")
@Override
public void fillItem(MetadataProperty metadataProperty, EbookPropertyItem item) {
Identifier identifier = ((EpubLibMetadataProperty<Identifier>)metadataProperty).getType();
if("uuid".equalsIgnoreCase(identifier.getScheme())) {
item.setUuid(metadataProperty.getValueAsString());
} else if("idbn".equalsIgnoreCase(identifier.getScheme())) {
item.setIsbn(metadataProperty.getValueAsString());
}
}
},ISBN {
public String getName() {
return "isbn";
}
@Override
public void fillItem(MetadataProperty metadataProperty, EbookPropertyItem item) {
item.setIsbn(metadataProperty.getValueAsString());
}
},UUID {
public String getName() {
return "uuid";
}
@Override
public void fillItem(MetadataProperty metadataProperty, EbookPropertyItem item) {
item.setUuid(metadataProperty.getValueAsString());
}
},RIGHTS {
public String getName() {
return "rights";
}
public void fillItem(MetadataProperty metadataProperty, EbookPropertyItem item) {
item.setRights(metadataProperty.getValueAsString());
}
},LANGUAGE {
public String getName() {
return "language";
}
public void fillItem(MetadataProperty metadataProperty, EbookPropertyItem item) {
item.setLanguage(metadataProperty.getValueAsString());
}
},DESCRIPTION {
public String getName() {
return "description";
}
public void fillItem(MetadataProperty metadataProperty, EbookPropertyItem item) {
item.setDescription(metadataProperty.getValueAsString());
}
},TITLE {
public String getName() {
return COMMON_METADATA_TYPES.TITLE.getName();
}
public void fillItem(MetadataProperty metadataProperty, EbookPropertyItem item) {
COMMON_METADATA_TYPES.TITLE.fillItem(metadataProperty, item);
}
},DATE {
public String getName() {
return "date";
}
@Override
public void fillItem(MetadataProperty metadataProperty, EbookPropertyItem item) {
item.setCreationDate(DateConversionUtils.toDate(metadataProperty.getValueAsString()));
}
},PUBLICATION_DATE {
public String getName() {
return "pubdate";
}
@Override
public void fillItem(MetadataProperty metadataProperty, EbookPropertyItem item) {
item.setPublishingDate(DateConversionUtils.toDate(metadataProperty.getValueAsString()));
}
},CREATION_DATE {
public String getName() {
return "createdate";
}
@Override
public void fillItem(MetadataProperty metadataProperty, EbookPropertyItem item) {
item.setCreationDate(DateConversionUtils.toDate(metadataProperty.getValueAsString()));
}
},MODIFICATION_DATE {
public String getName() {
return "modifydate";
}
@Override
public void fillItem(MetadataProperty metadataProperty, EbookPropertyItem item) {
//no EbookPropertyItem
}
},CREATOR {
public String getName() {
return "creator";
}
public void fillItem(MetadataProperty metadataProperty, EbookPropertyItem item) {
//no EbookPropertyItem
}
},AUTHOR {
public String getName() {
return "author";
}
public void fillItem(MetadataProperty metadataProperty, EbookPropertyItem item) {
COMMON_METADATA_TYPES.AUTHOR.fillItem(metadataProperty, item);
}
},TYPE {
public String getName() {
return "type";
}
public void fillItem(MetadataProperty metadataProperty, EbookPropertyItem item) {
//no EbookPropertyItem
}
},CONTRIBUTOR {
public String getName() {
return "contributor";
}
public void fillItem(MetadataProperty metadataProperty, EbookPropertyItem item) {
//no EbookPropertyItem
}
},FORMAT {
public String getName() {
return "format";
}
public void fillItem(MetadataProperty metadataProperty, EbookPropertyItem item) {
//no EbookPropertyItem
}
},COVER {
public String getName() {
return "cover";
}
public void fillItem(MetadataProperty metadataProperty, EbookPropertyItem item) {
COMMON_METADATA_TYPES.COVER.fillItem(metadataProperty, item);
}
}
}
AEpubMetadataHandler(IResourceHandler ebookResourceHandler) {
this.ebookResourceHandler = ebookResourceHandler;
this.ebookResourceHandlerTimestamp = ebookResourceHandler.getModifiedAt();
}
/**
* Gets the {@link IResourceHandler} instance for the ebook which is processed
* by this {@link AMetadataHandler} instance.
* @return The desired {@link IResourceHandler} instance.
*/
public List<IResourceHandler> getEbookResource() {
return Collections.singletonList(this.ebookResourceHandler);
}
/**
* Gets the Opf file where the metadata is stored. The information where the opf
* file could be found is stored in the META-INF/container.xml file. This information
* will be extracted from there.
*
* @param zipData The zip data bytes.
* @return The desired file name or <code>null</code> if no one could be found.
* @throws IOException
*/
protected String getOpfFile(final IResourceHandler ebookResource) throws IOException {
if(this.opfFileName == null) {
final String fullPathString = "full-path=";
InputStream contentInputStream = null;
try {
final CompressedDataEntry containerXml = ZipUtils.extract(ebookResource, "META-INF/container.xml");
if(containerXml!=null) {
final String containerXmlData = new String(containerXml.getBytes());
final int fullPathIndex = containerXmlData.indexOf(fullPathString);
if(fullPathIndex!=-1) {
final int startIdx = fullPathIndex + fullPathString.length() + 1;
final int endIdx = containerXmlData.indexOf('"', startIdx);
final String fullPathValue = containerXmlData.substring(startIdx, endIdx);
this.opfFileName = fullPathValue;
}
}
} finally {
if(contentInputStream != null) {
IOUtils.closeQuietly(contentInputStream);
}
}
}
return this.opfFileName;
}
/**
* gets the container opf file content bytes containing the metdadata informations.
*/
protected byte[] getContainerOPF(final IResourceHandler ebookResource) throws IOException {
if (this.containerOpfData == null || isModified()) {
final String opfFile = this.getOpfFile(ebookResource);
if (opfFile != null) {
InputStream contentInputStream = null;
try {
final CompressedDataEntry containerXml = ZipUtils.extract(ebookResource, opfFile);
if (containerXml != null) {
this.containerOpfData = containerXml.getBytes();
} else {
LoggerFactory.logWarning(this, "Could not get file" + opfFile, new RuntimeException("dumpstack"));
}
} finally {
if(contentInputStream != null) {
IOUtils.closeQuietly(contentInputStream);
}
}
}
}
return this.containerOpfData;
}
protected boolean isModified() {
if(this.ebookResourceHandlerTimestamp != null && this.ebookResourceHandler.getModifiedAt() != null) {
return !this.ebookResourceHandler.getModifiedAt().equals(ebookResourceHandlerTimestamp);
}
return true;
}
/**
* Read all entries from, the given zip data and creates a {@link Book} instance from them.
* @throws IOException
*/
protected Book readBook(final InputStream zipData, final IResourceHandler ebookResourceHandler, final boolean lazy) throws IOException {
try {
final EpubReader reader = new EpubReader();
final Resources resources = new Resources();
final EpubZipFileFilter epubZipFileFilter = new EpubZipFileFilter(lazy);
final List<CompressedDataEntry> extracted = ZipUtils.extract(ebookResourceHandler, epubZipFileFilter);
final List<String> lazyEntries = epubZipFileFilter.getLazyEntries();
final List<byte[]> lazyRawEntries = epubZipFileFilter.getLazyRawEntries();
for(CompressedDataEntry entry : extracted) {
Resource resource = new Resource(entry.getBytes(), entry.rawPath);
resources.add(resource);
}
if(lazyEntries.size() == lazyRawEntries.size()) {
for(int i = 0; i < lazyEntries.size(); i++) {
String entry = lazyEntries.get(i);
byte[] rawEntry = lazyRawEntries.get(i);
Resource resource = new Resource(new LazyZipEntryStream(ebookResourceHandler, entry), rawEntry);
resources.add(resource);
}
} else {
throw new IOException("Zip entries not even");
}
final Book epub = reader.readEpub(resources, StringUtil.UTF_8, ebookResourceHandler.getName());
return epub;
} finally {
IOUtils.closeQuietly(zipData);
}
}
/**
* Zip file filter that collects all zip file entries and support lazy handling for
* having not all files to be extracted. Only these files will be extracted which
* are commonly used by the {@link EpubReader}.
*/
private static class EpubZipFileFilter implements FileEntryFilter {
boolean lazy = false;
List<String> lazyEntries = new ArrayList<>();
List<byte[]> lazyRawEntries = new ArrayList<byte[]>();
EpubZipFileFilter(boolean lazy) {
this.lazy = lazy;
}
@Override
public boolean accept(String entry, byte[] rawEntry) {
boolean accept = true;
if(lazy) {
String lowerCaseEntry = entry.toLowerCase();
if(lowerCaseEntry.endsWith("/container.xml")) {
accept = true;
} else if(lowerCaseEntry.endsWith(".opf")) {
accept = true;
} else if(lowerCaseEntry.endsWith(".ncx")) {
accept = true;
} else if(lowerCaseEntry.endsWith("cover.jpg") || lowerCaseEntry.endsWith("cover.jpeg")) {
accept = true;
} else {
accept = false;
}
}
if(!accept) {
lazyEntries.add(entry);
lazyRawEntries.add(rawEntry);
}
return accept;
}
public List<String> getLazyEntries() {
return this.lazyEntries;
}
public List<byte[]> getLazyRawEntries() {
return this.lazyRawEntries;
}
}
}