package org.limewire.core.impl.search.torrentweb; import java.io.BufferedOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import java.text.MessageFormat; import java.util.ArrayList; import java.util.List; import java.util.Set; import java.util.Map.Entry; import org.apache.http.Header; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpHead; import org.apache.http.client.methods.HttpUriRequest; import org.apache.http.protocol.HTTP; import org.apache.http.util.EntityUtils; import org.htmlcleaner.HtmlCleaner; import org.htmlcleaner.TagNode; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; import org.limewire.bittorrent.BTData; import org.limewire.bittorrent.Torrent; import org.limewire.core.api.download.DownloadException; import org.limewire.core.api.download.DownloadItem; import org.limewire.core.api.download.DownloadListManager; import org.limewire.core.api.search.Search; import org.limewire.core.api.search.SearchCategory; import org.limewire.core.api.search.SearchListener; import org.limewire.core.api.search.SearchResult; import org.limewire.core.impl.TorrentFactory; import org.limewire.core.impl.download.DownloadItemFactory; import org.limewire.core.impl.download.DownloadItemFactoryRegistry; import org.limewire.core.settings.SearchSettings; import org.limewire.http.httpclient.HttpClientUtils; import org.limewire.http.httpclient.LimeHttpClient; import org.limewire.io.IOUtils; import org.limewire.logging.Log; import org.limewire.logging.LogFactory; import org.limewire.util.FileUtils; import org.limewire.util.URIUtils; import com.google.inject.Inject; import com.google.inject.Provider; import com.google.inject.assistedinject.Assisted; import com.limegroup.gnutella.filters.response.FilterFactory; import com.limegroup.gnutella.filters.response.SearchResultFilter; import com.limegroup.gnutella.http.HttpClientListener; import com.limegroup.gnutella.http.HttpExecutor; import com.limegroup.gnutella.metadata.MetaDataReader; import com.limegroup.gnutella.util.QueryUtils; import com.limegroup.gnutella.xml.LimeXMLDocument; /** * Performs a websearch for torrents. */ public class TorrentWebSearch implements Search { private static final Log LOG = LogFactory.getLog(TorrentWebSearch.class); /** * Content type of torrent files. */ private static final String BITTORENT_CONTENT_TYPE = "application/x-bittorrent"; /** * Conent type of html files. */ private static final String HTML_CONTENT_TYPE = "text/html"; private final HttpExecutor httpExecutor; /** * Template for search uri. */ private final String searchUriTemplate = SearchSettings.TORRENT_WEB_SEARCH_URI_TEMPLATE.get(); private final Provider<LimeHttpClient> httpClient; /** * The query as entered by the user. */ private final String query; private final TorrentUriPrioritizerFactory torrentUriPrioritizerFactory; private final MetaDataReader metaDataReader; private final TorrentFactory torrentFactory; private final SearchResultFilter filter; private final TorrentRobotsTxt torrentRobotsTxt; private volatile boolean stopped = false; private volatile SearchListener searchListener; @Inject public TorrentWebSearch(HttpExecutor httpExecutor, Provider<LimeHttpClient> httpClient, TorrentUriPrioritizerFactory torrentUriPrioritizerFactory, MetaDataReader metaDataReader, TorrentFactory torrentFactory, @Assisted String query, FilterFactory responseFilterFactory, TorrentRobotsTxt torrentRobotsTxt) { this.httpExecutor = httpExecutor; this.httpClient = httpClient; this.torrentUriPrioritizerFactory = torrentUriPrioritizerFactory; this.metaDataReader = metaDataReader; this.torrentFactory = torrentFactory; this.query = query; this.torrentRobotsTxt = torrentRobotsTxt; this.filter = responseFilterFactory.createResultFilter(); } @Inject void register(DownloadItemFactoryRegistry registry, DownloadListManager downloadListManager) { registry.register(new TorrentWebSearchCoreDownloadItemFactory(downloadListManager)); } @Override public void start() { try { HttpGet get = new HttpGet(MessageFormat.format(searchUriTemplate, URIUtils.encodeUriComponent(query))); get.addHeader(HTTP.CONN_DIRECTIVE, HTTP.CONN_CLOSE); httpExecutor.execute(get, new GoogleJsonResponseHandler(query)); } catch (URISyntaxException e) { throw new RuntimeException(e); } } @Override public void stop() { LOG.debug("stop"); stopped = true; } @Override public void addSearchListener(SearchListener searchListener) { assert this.searchListener == null; this.searchListener = searchListener; } @Override public SearchCategory getCategory() { return SearchCategory.TORRENT; } @Override public void removeSearchListener(SearchListener searchListener) { assert this.searchListener == searchListener; this.searchListener = null; } @Override public void repeat() { } private void handleTorrentResult(File torrentFile, URI uri, URI referrer) { BTData torrentData = TorrentWebSearchUtils.parseTorrentFile(torrentFile); if (torrentData != null) { Torrent torrent = null; LimeXMLDocument xmlDocument = null; try { xmlDocument = metaDataReader.readDocument(torrentFile); if (xmlDocument != null) { if (!matchesQuery(xmlDocument)) { LOG.debugf("query {0} does not match doc {1}", query, xmlDocument); return; } torrent = torrentFactory.createTorrentFromXML(xmlDocument); if (torrent != null) { SearchResult result = new TorrentWebSearchResult(torrentData, referrer, torrentFile, torrent); if (filter.allow(result, xmlDocument)) { LOG.debugf("result accepted: {0}", torrent); searchListener.handleSearchResult(this, result); } else{ LOG.debugf("result rejected: {0}", torrent); } } else { LOG.debug("torrent null"); } } } catch (IOException ie) { LOG.debug("error parsing torrent file", ie); } } else { LOG.debug("torrent data null"); } } boolean matchesQuery(LimeXMLDocument xmlDocument) { Set<String> queryTokens = QueryUtils.extractKeywords(query, true); for (Entry<String, String> entry : xmlDocument.getNameValueSet()) { Set<String> valueTokens = QueryUtils.extractKeywords(entry.getValue(), true); if (valueTokens.containsAll(queryTokens)) { return true; } } return false; } private boolean isTorrentFile(File file) { return FileUtils.getFileExtension(file).equals("torrent"); } private boolean isHtmlFile(File file) { return FileUtils.getFileExtension(file).equals("html"); } private void handleGoogleResults(List<URI> uris, String query) { LOG.debugf("results: {0}", uris); for (URI uri : uris) { if (stopped) { LOG.debug("stopping"); break; } if (!torrentRobotsTxt.isAllowed(uri)) { LOG.debugf("not allowed by robots.txt {0}", uri); continue; } File file = getContent(uri); if (file == null) { continue; } if (isTorrentFile(file)) { handleTorrentResult(file, uri, null); } else if (isHtmlFile(file)) { try { List<URI> candidates = extractTorrentUriCandidates(file, uri); TorrentUriPrioritizer prioritizer = torrentUriPrioritizerFactory.create(query, uri); checkForTorrents(prioritizer.prioritize(candidates), prioritizer, uri); } catch (IOException e) { LOG.debug("error parsing html", e); } } } } /** * Extracts all uris from <code>htmlFile</code> that are the targets of anchor * elements and could be potential torrent uris. */ List<URI> extractTorrentUriCandidates(File htmlFile, URI referrer) throws IOException { HtmlCleaner cleaner = new HtmlCleaner(); TagNode tagNode = cleaner.clean(htmlFile); @SuppressWarnings("unchecked") List<TagNode> anchors = tagNode.getElementListHavingAttribute("href", true); List<URI> candidates = new ArrayList<URI>(anchors.size()); for (TagNode node : anchors) { if (!"a".equalsIgnoreCase(node.getName())) { continue; } String href = node.getAttributeByName("href"); LOG.debugf("resolving: {0} with {1}", href, referrer); try { URI link = URIUtils.toURI(href); if (canBeTorrentUri(link)) { candidates.add(link); } else { link = org.apache.http.client.utils.URIUtils.resolve(referrer, link); if (canBeTorrentUri(link)) { candidates.add(link); } else { LOG.debugf("not a potential torrent link: {0}", link); } } } catch (URISyntaxException e) { LOG.debug("error parsing", e); } } return candidates; } private void checkForTorrents(List<URI> candidates, TorrentUriPrioritizer prioritizer, URI referrer) { int count = 0; for (URI uri : candidates) { if (stopped) { LOG.debug("stopping"); break; } if (!torrentRobotsTxt.isAllowed(uri)) { LOG.debugf("not allowed by robots.txt: {0}", uri); continue; } ++count; try { if (isTorrent(uri)) { LOG.debugf("found torrent after {0} checks", count); prioritizer.setIsTorrent(uri, true); File file = getContent(uri); if (file != null && isTorrentFile(file)) { handleTorrentResult(file, uri, referrer); break; } } else { prioritizer.setIsTorrent(uri, false); } } catch (IOException ie) { LOG.debugf(ie, "couldn't head {0}", uri); } } } private boolean canBeTorrentUri(URI uri) { String scheme = uri.getScheme(); if (scheme == null) { return false; } return scheme.equalsIgnoreCase("http") || scheme.equalsIgnoreCase("magnet"); } private boolean isTorrent(URI uri) throws IOException { LOG.debugf("torrent verification request: {0}", uri); if ("magnet".equalsIgnoreCase(uri.getScheme())) { return true; } HttpHead head = new HttpHead(uri); HttpResponse response = null; LimeHttpClient client = httpClient.get(); try { response = client.execute(head); Header header = response.getFirstHeader(HTTP.CONTENT_TYPE); if (header != null) { LOG.debugf("content type: {0}", header); if (BITTORENT_CONTENT_TYPE.equals(header.getValue())) { return true; } } return false; } catch (IOException e) { LOG.error("error with head request", e); throw e; } finally { client.releaseConnection(response); } } private static File createTmpFile(URI uri, String contentType) throws IOException { try { String prefix = URIUtils.encodeUriComponent(uri.toASCIIString()); File file; if (BITTORENT_CONTENT_TYPE.equals(contentType)) { file = File.createTempFile(prefix, ".torrent"); } else if (HTML_CONTENT_TYPE.equals(contentType)) { file = File.createTempFile(prefix, ".html"); } else { return null; } file.deleteOnExit(); return file; } catch (URISyntaxException e) { throw new RuntimeException(e); } } private File getContent(URI uri) { LOG.debugf("get content: {0}", uri); HttpGet get = new HttpGet(uri); HttpResponse response = null; BufferedOutputStream out = null; LimeHttpClient client = httpClient.get(); try { response = client.execute(get); HttpEntity entity = response.getEntity(); if (entity == null) { LOG.debug("no entity"); return null; } Header contentType = entity.getContentType(); if (contentType == null) { LOG.debug("no content type"); return null; } File file = createTmpFile(uri, contentType.getValue()); if (file == null) { LOG.debugf("not file for content type: {0}", contentType); return null; } out = new BufferedOutputStream(new FileOutputStream(file)); entity.writeTo(out); return file; } catch (IOException ie) { LOG.debug("error with GET request", ie); } finally { IOUtils.close(out); client.releaseConnection(response); } return null; } private class GoogleJsonResponseHandler implements HttpClientListener { private final String query; public GoogleJsonResponseHandler(String query) { this.query = query; } @Override public boolean allowRequest(HttpUriRequest request) { return true; } @Override public boolean requestComplete(HttpUriRequest request, HttpResponse response) { HttpEntity entity = response.getEntity(); if (entity != null) { try { String json = EntityUtils.toString(entity); JSONObject object = new JSONObject(json); JSONArray results = object.getJSONObject("responseData").getJSONArray("results"); List<URI> uris = new ArrayList<URI>(results.length()); for (int i = 0; i < results.length(); i++) { try { uris.add(URIUtils.toURI(results.getJSONObject(i).getString("url"))); } catch (URISyntaxException e) { LOG.error("couldn't parse url", e); } } handleGoogleResults(uris, query); } catch (IOException e) { LOG.error("error getting enitity", e); } catch (JSONException e) { LOG.error("error parsing json", e); } finally { HttpClientUtils.releaseConnection(response); } } return false; } @Override public boolean requestFailed(HttpUriRequest request, HttpResponse response, IOException exc) { LOG.debugf(exc, "request failed: {0}, {1}", request, response); return false; } } private static class TorrentWebSearchCoreDownloadItemFactory implements DownloadItemFactory { private final DownloadListManager downloadListManager; public TorrentWebSearchCoreDownloadItemFactory(DownloadListManager downloadListManager) { this.downloadListManager = downloadListManager; } @Override public DownloadItem create(Search search, List<? extends SearchResult> searchResults, File saveFile, boolean overwrite) throws DownloadException { if (searchResults.get(0) instanceof TorrentWebSearchResult) { TorrentWebSearchResult result = (TorrentWebSearchResult)searchResults.get(0); return downloadListManager.addTorrentDownload(result.getTorrentFile(), saveFile, overwrite); } return null; } } }