/* * RSSFeed - Azureus2 Plugin * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, * MA 02111-1307 USA */ package org.kmallan.azureus.rssfeed; import javax.swing.text.html.*; import javax.swing.text.MutableAttributeSet; import java.util.*; import java.net.*; import java.io.IOException; /** * Used to parse the HTML on a URL recieved from the rss feed. * * First, it looks for a .torrent link. If one is not found, it checks * each URL to see if it's HEAD is of torrent type * * Created by IntelliJ IDEA. * User: Johan Frank * Date: Jan 7, 2005 * Time: 12:48:57 AM */ public class HtmlAnalyzer extends HTMLEditorKit.ParserCallback implements Runnable { private static final int HREF_CHECK_TIMEOUT = 60000; private List hrefs = new ArrayList(); private String torrentUrl = null, baseUrl = null; private StringBuffer text = new StringBuffer(); private ListBean listBean; private String lastURL = null; private boolean lastWasURL = false; private String lastURLtext; public HtmlAnalyzer() { this("", null); } public HtmlAnalyzer(String baseUrl, ListBean listBean) { this.baseUrl = baseUrl; this.listBean = listBean; } public void handleStartTag(HTML.Tag tag, MutableAttributeSet mas, int pos) { if(tag == HTML.Tag.A) { lastURL = (String)mas.getAttribute(HTML.Attribute.HREF); if(lastURL != null) { if(lastURL.indexOf("://") < 0) { try { lastURL = resolveRelativeURL(baseUrl, lastURL); } catch(MalformedURLException e) {} } if(lastURL.toLowerCase().endsWith(".torrent")) { torrentUrl = lastURL; } else { lastWasURL = true; } } } else { lastWasURL = false; } } public void handleSimpleTag(HTML.Tag tag, MutableAttributeSet mutableAttributeSet, int i) { if(tag == HTML.Tag.BR) text.append("\n"); } public void handleEndTag(HTML.Tag tag, int i) { if (tag == HTML.Tag.BODY || tag == HTML.Tag.HTML || tag == HTML.Tag.HEAD) return; else if (tag == HTML.Tag.A && lastWasURL && torrentUrl == null) { // put links with .torrent name in them at the top of parsing list if (lastURLtext != null && lastURLtext.contains(".torrent")) { hrefs.add(0, lastURL); } hrefs.add(lastURL); } else { text.append("\n"); } } public void handleText(char[] chars, int i) { String s = new String(chars); if (lastWasURL) { lastURLtext = s; } text.append(s.replace('<', ' ').replace('>', ' ').trim()); // remove remnants of broken tags :-P } public synchronized String getTorrentUrl() { if(torrentUrl == null &! hrefs.isEmpty()) { Plugin.debugOut("No url ending in .torrent, checking " + hrefs.size() + " URL(s) to see if any are application/x-bittorrent"); Plugin.debugOut("After " + (HREF_CHECK_TIMEOUT / 1000) + " seconds, check will abort."); Thread hrefChecker = new Thread(this, "HrefContentCheckerThread"); hrefChecker.start(); try { wait(HREF_CHECK_TIMEOUT); } catch(InterruptedException e) {} hrefChecker.interrupt(); } Plugin.debugOut("returning torrentUrl: " + torrentUrl); return torrentUrl; } public String getPlainText() { return text.toString(); } protected static String resolveRelativeURL(String url, String href) throws MalformedURLException { URL u = new URL(url); String newUrl = u.getProtocol() + "://" + u.getHost(); if(u.getPort() > 0) newUrl += ":" + u.getPort(); if(!href.startsWith("/")) { // path relative to current String path = u.getPath(); // e.g /dir/file.php if(path.indexOf("/") > -1) path = path.substring(0, path.lastIndexOf("/") + 1); // strip file part newUrl += path; // append /dir if(!newUrl.endsWith("/")) newUrl += "/"; } return newUrl + href; } /** * Check all the URLs that don't end in 'torrent' to see if they are actually * torrents */ public void run() { synchronized(this) { String href = null; int count = 1; for(Iterator iter = hrefs.iterator(); iter.hasNext(); ) { href = (String)iter.next(); if(isHrefTorrent(href)) { torrentUrl = href; Plugin.debugOut("found torrent: " + href); break; } updateView(count++); } notifyAll(); } } private void updateView(int count) { if(listBean == null) return; listBean.setState(Downloader.DOWNLOADER_CHECKING); listBean.setAmount(count); listBean.setPercent(hrefs.size()); Plugin.updateView(listBean); } /** * Check one URL to see if it's a torrent by grabbing the HEAD and seeing * if the connection type is of torrent type. * * @param href * @return */ private boolean isHrefTorrent(String href) { try { URLConnection conn = new URL(href).openConnection(); if(conn instanceof HttpURLConnection) { ((HttpURLConnection)conn).setRequestMethod("HEAD"); String cookie = listBean.getFeed().getCookie(); if(cookie != null && cookie.length() > 0) conn.setRequestProperty("Cookie", cookie); conn.connect(); String ct = conn.getContentType(); ((HttpURLConnection)conn).disconnect(); if(ct != null) { Plugin.debugOut("href: " + href + " -> " + ct); return ct.toLowerCase().startsWith("application/x-bittorrent"); } } } catch(IOException e) { e.printStackTrace(); } return false; } }