package org.rr.jeborker.metadata.download;
import static org.rr.commons.utils.BooleanUtils.not;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLEncoder;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.regex.Pattern;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.rr.commons.collection.TransformValueList;
import org.rr.commons.log.LoggerFactory;
import org.rr.commons.mufs.IResourceHandler;
import org.rr.commons.mufs.ResourceHandlerFactory;
import org.rr.commons.utils.StringUtil;
import org.rr.commons.utils.ThreadUtils;
class MetadataDownloadUtils {
static byte[] loadPage(URL url) throws IOException {
try {
LoggerFactory.getLogger().log(Level.INFO, "Downloading " + url);
IResourceHandler resourceLoader = ResourceHandlerFactory.getResourceHandler(url);
if(resourceLoader != null) {
return resourceLoader.getContent();
}
LoggerFactory.getLogger(MetadataDownloadUtils.class).log(Level.INFO, "No resource loader for " + url);
} catch (IOException e) {
LoggerFactory.getLogger(MetadataDownloadUtils.class).log(Level.INFO, "Failed load " + url, e);
}
return null;
}
static List<byte[]> loadPages(Iterable<URL> url, int threads) throws IOException {
return ThreadUtils.loopAndWait(url, new ThreadUtils.RunnableImpl<URL, byte[]>() {
@Override
public byte[] run(URL url) {
try {
return loadPage(url);
} catch (IOException e) {
LoggerFactory.getLogger(this).log(Level.INFO, "Failed load " + url, e);
}
return null;
}
}, threads);
}
static Document getDocument(byte[] content, String url) throws IOException {
return Jsoup.parse(new ByteArrayInputStream(content), StringUtil.UTF_8, url);
}
static List<Document> getDocuments(List<byte[]> content, String url) throws IOException {
List<Document> documents = new ArrayList<>(content.size());
for (byte[] bs : content) {
if(content != null) {
documents.add(getDocument(bs, url));
}
}
return documents;
}
/**
* Load the content of given links. All given links has to be relative links.
*
* @param allLinks Links to be loaded.
* @param url The main url for the links.
* @return A list with the content of all links. Can contain <code>null</code> entries if a links could not be loaded.
* @throws IOException
*/
static List<byte[]> loadLinkContent(List<String> allLinks, final String url) throws IOException {
List<byte[]> loadPages = MetadataDownloadUtils.loadPages(new TransformValueList<String, URL>(allLinks) {
@Override
public URL transform(String link) {
try {
String absoluteUrl = url + link;
if(not(url.endsWith("/")) || not(link.startsWith("/"))) {
absoluteUrl = url + "/" + link;
}
return new URL(absoluteUrl);
} catch (MalformedURLException e) {
LoggerFactory.getLogger(this).log(Level.SEVERE, "Failed to create url for " + link, e);
}
return null;
}
}, 10);
return loadPages;
}
static List<URL> getSearchPageUrls(String searchTerm, int pagesToLoad, String queryUrl) throws UnsupportedEncodingException, MalformedURLException {
String encodesSearchPhrase = URLEncoder.encode(searchTerm, StringUtil.UTF_8);
List<URL> urls = new ArrayList<>(pagesToLoad);
for (int i = 0; i < pagesToLoad; i++) {
String position = String.valueOf(i * 10);
String url = MessageFormat.format(queryUrl, new Object[] {encodesSearchPhrase, position});
urls.add(new URL(url));
}
return urls;
}
static byte[] loadImage(String imageUrl) {
if(imageUrl != null) {
LoggerFactory.getLogger(MetadataDownloadUtils.class).log(Level.INFO, "Downloading image from " + imageUrl);
try {
IResourceHandler resourceHandler = ResourceHandlerFactory.getResourceHandler(new URL(imageUrl));
return resourceHandler.getContent();
} catch (Exception e) {
LoggerFactory.getLogger(MetadataDownloadUtils.class).log(Level.WARNING, "Failed to get image " + imageUrl);
}
}
return null;
}
static boolean isIsbn13(String isbn) {
return Pattern.matches("97(?:8|9)([ -])\\d{1,5}\\1\\d{1,7}\\1\\d{1,6}\\1\\d", isbn);
}
static boolean isIsbn10(String isbn) {
return Pattern.matches("^\\d{1,5}([- ])\\d{1,7}\\1\\d{1,6}\\1(\\d|X)$", isbn);
}
}