/*
* Created by Angel Leon (@gubatron), Alden Torres (aldenml)
* Copyright (c) 2011-2014,, FrostWire(R). All rights reserved.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package com.frostwire.search;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.frostwire.search.domainalias.DomainAliasManager;
import com.frostwire.util.OSUtils;
/**
* @author gubatron
* @author aldenml
*
*/
public abstract class CrawlPagedWebSearchPerformer<T extends CrawlableSearchResult> extends PagedWebSearchPerformer {
private static final Logger LOG = LoggerFactory.getLogger(CrawlPagedWebSearchPerformer.class);
private static final int DEFAULT_CRAWL_TIMEOUT = 10000; // 10 seconds
private static final int DEFAULT_MAGNET_DOWNLOAD_TIMEOUT = OSUtils.isAndroid() ? 4000 : 20000; // 4 seconds for android, 20 seconds for desktop
private static CrawlCache cache = null;
private static MagnetDownloader magnetDownloader = null;
private int numCrawls;
public CrawlPagedWebSearchPerformer(DomainAliasManager domainAliasManager, long token, String keywords, int timeout, int pages, int numCrawls) {
super(domainAliasManager, token, keywords, timeout, pages);
this.numCrawls = numCrawls;
}
public static void setCache(CrawlCache cache) {
CrawlPagedWebSearchPerformer.cache = cache;
}
public static MagnetDownloader getMagnetDownloader() {
return magnetDownloader;
}
public static void setMagnetDownloader(MagnetDownloader magnetDownloader) {
CrawlPagedWebSearchPerformer.magnetDownloader = magnetDownloader;
}
@Override
public void crawl(CrawlableSearchResult sr) {
if (numCrawls > 0) {
numCrawls--;
T obj = cast(sr);
if (obj != null) {
String url = getCrawlUrl(obj);
if (url != null) {
byte[] data = cacheGet(url);
if (data == null) { // not a big deal about synchronization here
LOG.debug("Downloading data for: " + url);
if (url.startsWith("magnet")) {
data = fetchMagnet(url);
} else {
data = fetchBytes(url, sr.getDetailsUrl(), DEFAULT_CRAWL_TIMEOUT);
}
//we put this here optimistically hoping this is actually
//valid data. if no data can be crawled from this we remove it
//from the cache. we do this because this same data may come
//from another search engine and this way we avoid the
//expense of performing another download.
if (data != null) {
cachePut(url, data);
} else {
LOG.warn("Failed to download data: " + url);
}
}
try {
if (data != null) {
List<? extends SearchResult> results = crawlResult(obj, data);
if (results != null) {
onResults(this, results);
}
}
} catch (Throwable e) {
LOG.warn("Error creating crawled results from downloaded data: " + e.getMessage());
cacheRemove(url); // invalidating cache data
}
} else {
try {
List<? extends SearchResult> results = crawlResult(obj, null);
if (results != null) {
onResults(this, results);
}
} catch (Throwable e) {
LOG.warn("Error creating crawled results from search result alone: " + obj.getDetailsUrl() + ", e=" + e.getMessage());
}
}
}
}
}
protected abstract String getCrawlUrl(T sr);
protected abstract List<? extends SearchResult> crawlResult(T sr, byte[] data) throws Exception;
protected byte[] fetchMagnet(String magnet) {
if (magnetDownloader != null) {
return magnetDownloader.download(magnet, DEFAULT_MAGNET_DOWNLOAD_TIMEOUT);
} else {
LOG.warn("Magnet downloader not set, download not supported: " + magnet);
return null;
}
}
private byte[] cacheGet(String key) {
if (cache != null) {
synchronized (cache) {
return cache.get(key);
}
} else {
return null;
}
}
private void cachePut(String key, byte[] data) {
if (cache != null) {
synchronized (cache) {
cache.put(key, data);
}
}
}
private void cacheRemove(String key) {
if (cache != null) {
synchronized (cache) {
cache.remove(key);
}
}
}
@SuppressWarnings("unchecked")
private T cast(CrawlableSearchResult sr) {
try {
return (T) sr;
} catch (ClassCastException e) {
LOG.warn("Something wrong with the logic, need to pass a crawlable search result with the correct type");
}
return null;
}
public static void clearCache() {
if (cache != null) {
synchronized (cache) {
cache.clear();
}
}
}
public static long getCacheSize() {
long result = 0;
if (cache != null) {
synchronized (cache) {
result = cache.size();
}
}
return result;
}
}