/*
* Copyright (c) 2015 Felix Husse under MIT License
* see LICENSE file
*/
package de.fatalix.book.importer;
import com.google.gson.Gson;
import de.fatalix.bookery.solr.model.BookEntry;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.UpdateResponse;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
/**
*
* @author felix.husse
*/
public class BookMigrator {
private static final Pattern PATTERN = Pattern.compile("[%\\.\"\\*/:<>\\?\\\\\\|\\+,\\.;=\\[\\]]");
public static void clearDB(String solrURL, String solrCore) throws SolrServerException, IOException {
SolrServer server = SolrHandler.createConnection(solrURL, solrCore);
System.out.println("RESET:");
server.deleteByQuery("*:*");
server.commit();
}
public static List<File> findAllBooks(String importPath) throws IOException {
List<File> result = new ArrayList<>();
File importFolder = new File(importPath);
if (!importFolder.isDirectory()) {
throw new IOException(importFolder.getAbsolutePath() + " is not a folder!");
}
return walkTree(result, importFolder);
}
private static List<File> walkTree(List<File> result, File currentFolder) {
if (hasFolderBook(currentFolder)) {
result.add(currentFolder);
} else {
File[] subFolders = currentFolder.listFiles(new FileFilter() {
@Override
public boolean accept(File file) {
return file.isDirectory();
}
});
for (File subFolder : subFolders) {
result = walkTree(result, subFolder);
}
}
return result;
}
private static boolean hasFolderBook(File folder) {
return folder.listFiles(new FileFilter() {
@Override
public boolean accept(File file) {
return file.getName().contains(".epub") || file.getName().contains(".mobi");
}
}).length > 0;
}
public static void filterBooks(List<File> bookFolders, File filteredFolder) throws IOException {
int total = bookFolders.size();
int counter = 0;
int filteredCounter = 0;
int percentageDone = 0;
Gson gson = new Gson();
File coverFilterFolder = new File(filteredFolder, "cover");
File descriptionFilterFolder = new File(filteredFolder, "description");
for (File bookFolder : bookFolders) {
BookEntry bookEntry = importBatchWise(bookFolder, gson);
boolean filtered = false;
if (bookEntry.getCover() == null) {
filteredCounter++;
try {
String validFolderName = toValidFileName(bookEntry.getAuthor() + "-" + bookEntry.getTitle());
FileUtils.moveDirectory(bookFolder, new File(coverFilterFolder,validFolderName));
} catch (IOException ex) {
System.out.println("Catched...");
}
System.out.println("Filtered " + filteredCounter + " of " + total);
filtered = true;
}
if (!filtered && bookEntry.getDescription() == null) {
filteredCounter++;
try {
String validFolderName = toValidFileName(bookEntry.getAuthor() + "-" + bookEntry.getTitle());
FileUtils.moveDirectory(bookFolder, new File(descriptionFilterFolder,validFolderName));
} catch (IOException ex) {
System.out.println("Catched...");
}
System.out.println("Filtered " + filteredCounter + " of " + total);
filtered = true;
}
counter++;
int currentProgress = counter * 100 / total;
if (currentProgress > percentageDone) {
percentageDone = currentProgress;
System.out.println(percentageDone + "% done..");
}
}
System.out.println("Finished processing");
}
private static String toValidFileName(String input) {
return input.replaceAll("[:\\\\/*\"?|<>']", " ");
}
/**
*
* @param solrURL
* @param solrCore
* @param batchSize
* @param importPath
* @param reset
* @throws IOException
* @throws SolrServerException
*/
public static void importBooks(String solrURL, String solrCore, int batchSize, List<File> bookFolders, boolean reset) throws IOException, SolrServerException {
SolrServer server = SolrHandler.createConnection(solrURL, solrCore);
if (reset) {
System.out.println("RESET:");
server.deleteByQuery("*:*");
server.commit();
}
System.out.println("Connection established");
Gson gson = new Gson();
int total = bookFolders.size();
int counter = 0;
List<BookEntry> bookEntries = new ArrayList<>();
for (File bookFolder : bookFolders) {
bookEntries.add(importBatchWise(bookFolder, gson));
counter++;
if (bookEntries.size() >= batchSize) {
UpdateResponse response = SolrHandler.addBeans(server, bookEntries);
if (response.getStatus() != 0) {
throw new SolrServerException("Update failed with CODE " + response.getStatus());
}
bookEntries.clear();
System.out.println("Processed " + counter + " of " + total);
}
}
if (bookEntries.size() > 0) {
UpdateResponse response = SolrHandler.addBeans(server, bookEntries);
if (response.getStatus() != 0) {
throw new SolrServerException("Update failed with CODE " + response.getStatus());
}
bookEntries.clear();
System.out.println("Processed " + counter + " of " + total);
}
}
/**
*
* @param solrURL
* @param solrCore
* @param batchSize
* @param exportPath
*/
public static void exportBooks(String solrURL, String solrCore, int batchSize, String exportPath) throws SolrServerException, IOException {
File exportFolder = new File(exportPath);
if (!exportFolder.isDirectory()) {
throw new IOException(exportFolder.getAbsolutePath() + " is not a folder!");
}
SolrServer server = SolrHandler.createConnection(solrURL, solrCore);
System.out.println("Connection established");
Gson gson = new Gson();
exportBatchWise(server, exportFolder, batchSize, 0, gson);
}
private static BookEntry importBatchWise(File bookFolder, Gson gson) throws IOException {
BookEntry bookEntry = new BookEntry();
for (File file : bookFolder.listFiles()) {
if (file.getName().contains(".mobi")) {
byte[] bookData = Files.readAllBytes(file.toPath());
bookEntry.setMobi(bookData);
} else if (file.getName().contains(".jpg")) {
byte[] coverData = Files.readAllBytes(file.toPath());
bookEntry.setCover(coverData);
} else if (file.getName().contains(".epub")) {
byte[] bookData = Files.readAllBytes(file.toPath());
bookEntry.setEpub(bookData);
} else if (file.getName().contains(".json")) {
BookMetaData bmd = gson.fromJson(IOUtils.toString(new FileInputStream(file), Charset.defaultCharset()), BookMetaData.class);
bookEntry.setAuthor(bmd.getAuthor()).setTitle(bmd.getTitle()).setIsbn(bmd.getIsbn())
.setPublisher(bmd.getPublisher()).setDescription(bmd.getDescription()).setLanguage(bmd.getLanguage())
.setMimeType(bmd.getMimeType()).setUploadDate(bmd.getUploadDate()).setReleaseDate(bmd.getReleaseDate());
} else if (file.getName().contains(".opf")) {
bookEntry = parseOPF(file, bookEntry);
bookEntry.setMimeType("mobi").setUploadDate(new DateTime(DateTimeZone.UTC).toDate());
}
}
return bookEntry;
}
private static void exportBatchWise(SolrServer server, File exportFolder, int batchSize, int offset, Gson gson) throws SolrServerException, IOException {
QueryResponse response = SolrHandler.searchSolrIndex(server, "*:*", batchSize, offset);
List<BookEntry> bookEntries = response.getBeans(BookEntry.class);
System.out.println("Retrieved " + (bookEntries.size() + offset) + " of " + response.getResults().getNumFound());
for (BookEntry bookEntry : bookEntries) {
String bookTitle = bookEntry.getTitle();
bookTitle = bookTitle.replaceAll(":", " ");
File bookFolder = new File(exportFolder, bookEntry.getAuthor() + "-" + bookTitle);
bookFolder.mkdirs();
if (bookEntry.getCover() != null) {
if (bookEntry.getEpub() != null) {
File bookData = new File(bookFolder, bookEntry.getAuthor() + "-" + bookTitle + ".epub");
Files.write(bookData.toPath(), bookEntry.getMobi(), StandardOpenOption.CREATE_NEW);
}
if (bookEntry.getMobi() != null) {
File bookData = new File(bookFolder, bookEntry.getAuthor() + "-" + bookTitle + ".mobi");
Files.write(bookData.toPath(), bookEntry.getMobi(), StandardOpenOption.CREATE_NEW);
}
File coverData = new File(bookFolder, bookEntry.getAuthor() + "-" + bookTitle + ".jpg");
Files.write(coverData.toPath(), bookEntry.getCover(), StandardOpenOption.CREATE_NEW);
File metaDataFile = new File(bookFolder, bookEntry.getAuthor() + "-" + bookTitle + ".json");
BookMetaData metaData = new BookMetaData(bookEntry.getAuthor(), bookEntry.getTitle(), bookEntry.getIsbn(),
bookEntry.getPublisher(), bookEntry.getDescription(), bookEntry.getLanguage(),
bookEntry.getReleaseDate(), bookEntry.getMimeType(), bookEntry.getUploadDate(),
bookEntry.getViewed(), bookEntry.getShared());
gson.toJson(metaData);
Files.write(metaDataFile.toPath(), gson.toJson(metaData).getBytes(), StandardOpenOption.CREATE_NEW);
}
}
if (response.getResults().getNumFound() > offset) {
exportBatchWise(server, exportFolder, batchSize, offset + batchSize, gson);
}
}
private static BookEntry parseOPF(File pathToOPF, BookEntry bmd) throws IOException {
List<String> lines = Files.readAllLines(pathToOPF.toPath(), Charset.forName("UTF-8"));
boolean multiLineDescription = false;
String description = "";
for (String line : lines) {
if (multiLineDescription) {
multiLineDescription = false;
if (line.split("<").length == 1) {
multiLineDescription = true;
description = description + line;
} else {
description = description + line.split("<")[0];
description = StringEscapeUtils.unescapeXml(description);
bmd.setDescription(description);
}
} else if (line.contains("dc:title")) {
String title = line.split(">")[1].split("<")[0];
bmd.setTitle(title);
} else if (line.contains("dc:creator")) {
String creator = line.split(">")[1].split("<")[0];
bmd.setAuthor(creator);
} else if (line.contains("dc:description")) {
String value = line.split(">")[1];
if (value.split("<").length == 1) {
multiLineDescription = true;
description = value;
} else {
value = value.split("<")[0];
value = StringEscapeUtils.unescapeXml(value);
bmd.setDescription(value);
}
} else if (line.contains("dc:publisher")) {
String value = line.split(">")[1].split("<")[0];
bmd.setPublisher(value);
} else if (line.contains("dc:date")) {
String value = line.split(">")[1].split("<")[0];
DateTime dtReleaseDate = new DateTime(value, DateTimeZone.UTC);
if (dtReleaseDate.getYear() != 101) {
bmd.setReleaseDate(dtReleaseDate.toDate());
}
} else if (line.contains("dc:language")) {
String value = line.split(">")[1].split("<")[0];
bmd.setLanguage(value);
} else if (line.contains("opf:scheme=\"ISBN\"")) {
String value = line.split(">")[1].split("<")[0];
bmd.setIsbn(value);
}
}
return bmd;
}
}