/* * Created by Angel Leon (@gubatron), Alden Torres (aldenml) * Copyright (c) 2011-2014, FrostWire(R). All rights reserved. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package com.frostwire.search.btjunkie; import com.frostwire.search.CrawlRegexSearchPerformer; import com.frostwire.search.PerformersHelper; import com.frostwire.search.SearchMatcher; import com.frostwire.search.SearchResult; import com.frostwire.search.domainalias.DomainAliasManager; import com.google.code.regexp.Pattern; import java.util.Calendar; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.regex.Matcher; public class BtjunkieSearchPerformer extends CrawlRegexSearchPerformer<BtjunkieSearchResult> { private static final int MAX_PAGES = 2; private static final int MAX_SEARCH_RESULTS = 20; private static Pattern YEAR_MONTH_DATE_PATTERN = Pattern.compile("([\\d]{4})-([\\d]{2})-([\\d]{2})"); private static final String HTML_REGEX = "(?is)<tr>.*?<td data-href=\".*?\" class=\"type_td\">.*?<td data-href=\"(?<detailsUrl>.*?)\" class=\"title_td\"><a title=\"View details for [\\d]+ - (?<title>.*?)\" href=\".*?\"><h2>.*?<td data-href=\"(?<magnet>.*?)\" class=\"magnet_td\">.*?<td class=\"size_td\">(?<size>.*?)</td>.*?<td class=\"date_td\">(?<date>.*?)</td>.*?<td class=\"seed_td\">(?<seeds>.*?)</td>.*?</tr>"; private static final Pattern PATTERN = Pattern.compile(HTML_REGEX); private final static long[] BYTE_MULTIPLIERS = new long[]{1, 2 << 9, 2 << 19, 2 << 29, 2 << 39, 2 << 49}; private static final Map<String, Integer> UNIT_TO_BYTE_MULTIPLIERS_MAP; private static final java.util.regex.Pattern sizePattern; static { UNIT_TO_BYTE_MULTIPLIERS_MAP = new HashMap<String, Integer>(); UNIT_TO_BYTE_MULTIPLIERS_MAP.put("B", 0); UNIT_TO_BYTE_MULTIPLIERS_MAP.put("KB", 1); UNIT_TO_BYTE_MULTIPLIERS_MAP.put("MB", 2); UNIT_TO_BYTE_MULTIPLIERS_MAP.put("GB", 3); UNIT_TO_BYTE_MULTIPLIERS_MAP.put("TB", 4); UNIT_TO_BYTE_MULTIPLIERS_MAP.put("PB", 5); sizePattern = java.util.regex.Pattern.compile("([\\d+\\.]+)([BKMGTP])"); } public BtjunkieSearchPerformer(DomainAliasManager domainAliasManager, long token, String keywords, int timeout) { super(domainAliasManager, token, keywords, timeout, 1, MAX_PAGES, MAX_SEARCH_RESULTS); } @Override protected String getUrl(int page, String encodedKeywords) { return "http://btjunkie.eu/all/by-default_sort/desc/page" + page + "/" + encodedKeywords; } @Override public Pattern getPattern() { return PATTERN; } @Override public BtjunkieSearchResult fromMatcher(SearchMatcher matcher) { final String domainName = getDomainNameToUse() ; BtjunkieSearchResult sr = new BtjunkieSearchResult( domainName, "http://" + domainName + matcher.group("detailsUrl"), parseFileName(matcher.group("title")), parseDisplayName(matcher.group("title")), matcher.group(3), PerformersHelper.parseInfoHash(matcher.group("magnet")), parseSize(matcher.group("size")), parseDate(matcher.group("date")), parseSeeds(matcher.group("seeds"))); return sr; } private long parseDate(String group) { Calendar instance = Calendar.getInstance(); long result = instance.getTimeInMillis(); com.google.code.regexp.Matcher matcher = YEAR_MONTH_DATE_PATTERN.matcher(group); if (matcher.matches()) { try { instance.clear(); int year = Integer.valueOf(matcher.group(1)); int month = Integer.valueOf(matcher.group(2)); int date = Integer.valueOf(matcher.group(3)); instance.set(year, month, date); result = instance.getTimeInMillis(); } catch (Throwable t) { // return now. } } return result; } private int parseSeeds(String group) { try { return Integer.parseInt(group); } catch (Exception e) { return 0; } } private String parseDisplayName(String rawdisplayname) { return rawdisplayname.replaceAll("[\\:*?\"|\\[\\]]+"," "); } private String parseFileName(String filename) { return filename.replaceAll("[\\\\/:*?\"<>|\\[\\]]+", "_") + ".torrent"; } private long parseSize(String group) { long result = 0; Matcher matcher = sizePattern.matcher(group); if (matcher.find()) { String amount = matcher.group(1); String unit = matcher.group(2); long multiplier = BYTE_MULTIPLIERS[UNIT_TO_BYTE_MULTIPLIERS_MAP.get(unit)]; //fractional size if (amount.indexOf(".") > 0) { float floatAmount = Float.parseFloat(amount); result = (long) (floatAmount * multiplier); } //integer based size else { int intAmount = Integer.parseInt(amount); result = (long) (intAmount * multiplier); } } return result; } @Override protected String getCrawlUrl(BtjunkieSearchResult sr) { return sr.getTorrentUrl(); } @Override protected List<? extends SearchResult> crawlResult(BtjunkieSearchResult sr, byte[] data) throws Exception { return PerformersHelper.crawlTorrent(this, sr, data); } /** public static void main(String[] args) throws IOException { System.out.println(HTML_REGEX); byte[] readAllBytes = Files.readAllBytes(Paths.get("/Users/gubatron/tmp/test.html")); String fileStr = new String(readAllBytes,"utf-8"); com.google.code.regexp.Matcher matcher = PATTERN.matcher(fileStr); int found = 0; while (matcher.find()) { found++; System.out.println("\nfound " + found); System.out.println("group 1: " + matcher.group(1)); System.out.println("group 2: " + matcher.group(2)); System.out.println("group 3: " + matcher.group(3)); System.out.println("group 4: " + matcher.group(4)); System.out.println("group 5: " + matcher.group(5)); System.out.println("group 6: " + matcher.group(6)); } }*/ }