/* * Created on May 6, 2008 * Created by Paul Gardner * * Copyright 2008 Vuze, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; version 2 of the License only. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. */ package com.aelitis.azureus.core.metasearch.impl.web; import java.io.BufferedInputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.net.InetAddress; import java.net.Proxy; import java.net.URL; import java.net.URLEncoder; import java.nio.charset.Charset; import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.gudy.azureus2.core3.util.Debug; import org.gudy.azureus2.core3.util.TorrentUtils; import org.gudy.azureus2.core3.util.UrlUtils; import org.gudy.azureus2.plugins.utils.StaticUtilities; import org.gudy.azureus2.plugins.utils.resourcedownloader.ResourceDownloader; import org.gudy.azureus2.plugins.utils.resourcedownloader.ResourceDownloaderException; import org.gudy.azureus2.plugins.utils.resourcedownloader.ResourceDownloaderFactory; import org.json.simple.JSONArray; import org.json.simple.JSONObject; import com.aelitis.azureus.core.metasearch.SearchException; import com.aelitis.azureus.core.metasearch.SearchLoginException; import com.aelitis.azureus.core.metasearch.SearchParameter; import com.aelitis.azureus.core.metasearch.impl.*; import com.aelitis.azureus.core.proxy.AEProxyFactory; import com.aelitis.azureus.core.proxy.AEProxyFactory.PluginProxy; import com.aelitis.azureus.core.util.GeneralUtils; import com.aelitis.azureus.core.vuzefile.VuzeFile; import com.aelitis.azureus.core.vuzefile.VuzeFileComponent; import com.aelitis.azureus.core.vuzefile.VuzeFileHandler; import com.aelitis.azureus.util.ImportExportUtils; import com.aelitis.azureus.util.UrlFilter; public abstract class WebEngine extends EngineImpl { public static final String AM_TRANSPARENT = "transparent"; public static final String AM_PROXY = "proxy"; private static final boolean NEEDS_AUTH_DEFAULT = false; private static final boolean AUTOMATIC_DATE_PARSER_DEFAULT = true; static private final Pattern baseTagPattern = Pattern.compile("(?i)<base.*?href=\"([^\"]+)\".*?>"); static private final Pattern rootURLPattern = Pattern.compile("(https?://[^/]+)"); static private final Pattern baseURLPattern = Pattern.compile("(https?://.*/)"); private String searchURLFormat; private String timeZone; private boolean automaticDateParser; private String userDateFormat; private String downloadLinkCSS; private FieldMapping[] mappings; private String rootPage; private String basePage; private DateParser dateParser; private boolean needsAuth; private String authMethod; private String loginPageUrl; private String iconUrl; private String[] requiredCookies; private String local_cookies; // manual test constructor public WebEngine( MetaSearchImpl meta_search, int type, long id, long last_updated, float rank_bias, String name, String searchURLFormat, String timeZone, boolean automaticDateParser, String userDateFormat, FieldMapping[] mappings, boolean needs_auth, String auth_method, String login_url, String[] required_cookies ) { super( meta_search, type, id, last_updated, rank_bias, name ); this.searchURLFormat = searchURLFormat; this.timeZone = timeZone; this.automaticDateParser = automaticDateParser; this.userDateFormat = userDateFormat; this.mappings = mappings; this.needsAuth = needs_auth; this.authMethod = auth_method; this.loginPageUrl = login_url; this.requiredCookies = required_cookies; init(); } // bencoded constructor protected WebEngine( MetaSearchImpl meta_search, Map map ) throws IOException { super( meta_search, map ); searchURLFormat = ImportExportUtils.importString( map, "web.search_url_format" ); timeZone = ImportExportUtils.importString( map, "web.time_zone" ); userDateFormat = ImportExportUtils.importString( map, "web.date_format" ); downloadLinkCSS = ImportExportUtils.importString( map, "web.dl_link_css" ); needsAuth = ImportExportUtils.importBoolean(map, "web.needs_auth", NEEDS_AUTH_DEFAULT ); authMethod = ImportExportUtils.importString( map, "web.auth_method", WebEngine.AM_TRANSPARENT ); loginPageUrl = ImportExportUtils.importString( map, "web.login_page" ); requiredCookies = ImportExportUtils.importStringArray( map, "web.required_cookies" ); automaticDateParser = ImportExportUtils.importBoolean( map, "web.auto_date", AUTOMATIC_DATE_PARSER_DEFAULT ); iconUrl = ImportExportUtils.importString( map, "web.icon_url" ); List maps = (List)map.get( "web.maps" ); mappings = new FieldMapping[maps.size()]; for (int i=0;i<mappings.length;i++){ Map m = (Map)maps.get(i); mappings[i] = new FieldMapping( ImportExportUtils.importString( m, "name" ), ((Long)m.get( "field")).intValue()); } init(); } protected void exportToBencodedMap( Map map, boolean generic ) throws IOException { super.exportToBencodedMap( map, generic ); if ( generic ){ if ( searchURLFormat != null ){ ImportExportUtils.exportString( map, "web.search_url_format", searchURLFormat ); } if ( timeZone != null ){ ImportExportUtils.exportString( map, "web.time_zone", timeZone ); } if ( userDateFormat != null ){ ImportExportUtils.exportString( map, "web.date_format", userDateFormat ); } if ( downloadLinkCSS != null ){ ImportExportUtils.exportString( map, "web.dl_link_css", downloadLinkCSS ); } if ( needsAuth != NEEDS_AUTH_DEFAULT ){ ImportExportUtils.exportBoolean( map, "web.needs_auth", needsAuth ); } if ( authMethod != null && !authMethod.equals( WebEngine.AM_TRANSPARENT )){ ImportExportUtils.exportString( map, "web.auth_method", authMethod ); } if ( loginPageUrl != null ){ ImportExportUtils.exportString( map, "web.login_page", loginPageUrl ); } if ( iconUrl != null ) { ImportExportUtils.exportString(map, "web.icon_url", iconUrl); } if ( requiredCookies != null && requiredCookies.length > 0 ){ ImportExportUtils.exportStringArray( map, "web.required_cookies", requiredCookies ); } if (automaticDateParser != AUTOMATIC_DATE_PARSER_DEFAULT ){ ImportExportUtils.exportBoolean( map, "web.auto_date", automaticDateParser ); } }else{ ImportExportUtils.exportString( map, "web.search_url_format", searchURLFormat ); ImportExportUtils.exportString( map, "web.time_zone", timeZone ); ImportExportUtils.exportString( map, "web.date_format", userDateFormat ); ImportExportUtils.exportString( map, "web.dl_link_css", downloadLinkCSS ); ImportExportUtils.exportBoolean( map, "web.needs_auth", needsAuth ); ImportExportUtils.exportString( map, "web.auth_method", authMethod ); ImportExportUtils.exportString( map, "web.login_page", loginPageUrl ); ImportExportUtils.exportString( map, "web.icon_url", iconUrl); ImportExportUtils.exportStringArray( map, "web.required_cookies", requiredCookies ); ImportExportUtils.exportBoolean( map, "web.auto_date", automaticDateParser ); } List maps = new ArrayList(); map.put( "web.maps", maps ); for (int i=0;i<mappings.length;i++){ FieldMapping fm = mappings[i]; Map m = new HashMap(); ImportExportUtils.exportString( m, "name", fm.getName()); m.put( "field", new Long( fm.getField())); maps.add( m ); } } // json encoded constructor protected WebEngine( MetaSearchImpl meta_search, int type, long id, long last_updated, float rank_bias, String name, JSONObject map ) throws IOException { super( meta_search, type, id, last_updated, rank_bias, name, map ); searchURLFormat = ImportExportUtils.importURL( map, "searchURL" ); timeZone = ImportExportUtils.importString( map, "timezone" ); userDateFormat = ImportExportUtils.importString( map, "time_format" ); downloadLinkCSS = ImportExportUtils.importURL( map, "download_link" ); needsAuth = ImportExportUtils.importBoolean( map, "needs_auth", false ); authMethod = ImportExportUtils.importString( map, "auth_method", WebEngine.AM_TRANSPARENT ); loginPageUrl = ImportExportUtils.importURL( map, "login_page" ); iconUrl = ImportExportUtils.importURL( map, "icon_url" ); requiredCookies = ImportExportUtils.importStringArray( map, "required_cookies" ); automaticDateParser = userDateFormat == null || userDateFormat.trim().length() == 0; List maps = (List)map.get( "column_map" ); List conv_maps = new ArrayList(); for (int i=0;i<maps.size();i++){ Map m = (Map)maps.get(i); // wha? getting some nulls here :( // from JSON like "column_map\":[null,null,{\"group_nb\":\"3 if ( m == null ){ continue; } // backwards compact from when there was a mapping entry Map test = (Map)m.get( "mapping" ); if ( test != null ){ m = test; } String vuze_field = ImportExportUtils.importString( m, "vuze_field" ).toUpperCase(); String field_name = ImportExportUtils.importString( m, "group_nb" ); // regexp case if ( field_name == null ){ field_name = ImportExportUtils.importString( m, "field_name" ); // json case } if ( vuze_field == null || field_name == null ){ log( "Missing field mapping name/value in '" + m + "'" ); } int field_id = vuzeFieldToID( vuze_field ); if ( field_id == -1 ){ log( "Unrecognised field mapping '" + vuze_field + "'" ); continue; } conv_maps.add( new FieldMapping( field_name, field_id )); } mappings = (FieldMapping[])conv_maps.toArray( new FieldMapping[conv_maps.size()]); init(); } protected void exportToJSONObject( JSONObject res ) throws IOException { super.exportToJSONObject( res ); ImportExportUtils.exportJSONURL( res, "searchURL", searchURLFormat ); ImportExportUtils.exportJSONString( res, "timezone", timeZone ); if ( downloadLinkCSS != null ){ ImportExportUtils.exportJSONURL( res, "download_link", downloadLinkCSS ); } ImportExportUtils.exportJSONBoolean( res, "needs_auth", needsAuth ); ImportExportUtils.exportJSONString( res, "auth_method", authMethod ); ImportExportUtils.exportJSONURL( res, "login_page", loginPageUrl ); ImportExportUtils.exportJSONURL( res, "icon_url", iconUrl ); ImportExportUtils.exportJSONStringArray( res, "required_cookies", requiredCookies ); if ( !automaticDateParser ){ ImportExportUtils.exportJSONString( res, "time_format", userDateFormat ); } JSONArray maps = new JSONArray(); res.put( "column_map", maps ); for (int i=0;i<mappings.length;i++){ FieldMapping fm = mappings[i]; int field_id = fm.getField(); String field_value = vuzeIDToField( field_id ); if ( field_value == null ){ log( "JSON export: unknown field id " + field_id ); }else{ JSONObject entry = new JSONObject(); maps.add( entry ); entry.put( "vuze_field", field_value ); if ( getType() == ENGINE_TYPE_JSON ){ entry.put( "field_name", fm.getName()); }else{ entry.put( "group_nb", fm.getName()); } } } } protected void init() { try { Matcher m = rootURLPattern.matcher(searchURLFormat); if(m.find()) { this.rootPage = m.group(1); } } catch(Exception e) { //Didn't find the root url within the URL this.rootPage = null; } try { Matcher m = baseURLPattern.matcher(searchURLFormat); if(m.find()) { this.basePage = m.group(1); } } catch(Exception e) { //Didn't find the root url within the URL this.basePage = null; } this.dateParser = new DateParserRegex(timeZone,automaticDateParser,userDateFormat); local_cookies = getLocalString( LD_COOKIES ); // normalise to permit == to be safely used when testing method authMethod = authMethod.intern(); // see if we have explicit cookie information in the URL: int cook_pos = searchURLFormat.indexOf( ":COOKIE:" ); if ( cook_pos != -1 ){ String explicit_cookie = searchURLFormat.substring( cook_pos + 8 ); setNeedsAuth( true ); setCookies( explicit_cookie ); setRequiredCookies( CookieParser.getCookiesNames( explicit_cookie )); searchURLFormat = searchURLFormat.substring( 0, cook_pos ); setPublic( false ); String name = getName(); int n_pos = name.indexOf( ":COOKIE:" ); if ( n_pos != -1 ){ setName( name.substring( 0, n_pos )); } } } public String getNameEx() { String url = getRootPage(); if ( url == null || url.length() == 0 ){ url = searchURLFormat; } String name = getName(); if ( name.indexOf( url ) == -1 ){ return( name + " (" + url + ")"); }else{ return( name ); } } public String getReferer() { return( getRootPage()); } public boolean supportsContext( String context_key ) { try{ URL url = new URL( searchURLFormat ); String host = url.getHost(); if ( org.gudy.azureus2.core3.util.Constants.isAzureusDomain( host )){ return( true ); } if ( UrlFilter.getInstance().isWhitelisted( searchURLFormat )){ return( true ); } // allow local addresses for testing purposes InetAddress iad = InetAddress.getByName(host); if ( iad.isLoopbackAddress() || iad.isLinkLocalAddress() || iad.isSiteLocalAddress()){ return( true ); } }catch( Throwable e ){ } return( false ); } public boolean isShareable() { try{ return( !UrlUtils.containsPasskey( new URL( searchURLFormat ))); }catch( Throwable e ){ return( true ); } } protected pageDetails getWebPageContent( SearchParameter[] searchParameters, Map<String,String> searchContext, String headers, boolean only_if_modified ) throws SearchException { String searchURL = searchURLFormat; try{ pageDetails details = getWebPageContentSupport( null, null, searchURL, searchParameters, searchContext, headers, only_if_modified ); return( details ); }catch( SearchException e ){ try{ URL original_url = new URL( searchURL ); PluginProxy plugin_proxy = AEProxyFactory.getPluginProxy( "getting search results ", original_url ); if ( plugin_proxy == null ){ throw( e ); }else{ URL url = plugin_proxy.getURL(); Proxy proxy = plugin_proxy.getProxy(); boolean ok = false; try{ String proxy_host = original_url.getHost() + (original_url.getPort()==-1?"":(":" + original_url.getPort())); pageDetails details = getWebPageContentSupport( proxy, proxy_host, url.toExternalForm(), searchParameters, searchContext, headers, only_if_modified ); ok = true; return( details ); }finally{ plugin_proxy.setOK( ok ); } } }catch( Throwable f ){ throw( e ); } } } private pageDetails getWebPageContentSupport( Proxy proxy, String proxy_host, String searchURL, SearchParameter[] searchParameters, Map<String,String> searchContext, String headers, boolean only_if_modified ) throws SearchException { try { TorrentUtils.setTLSDescription( "Search: " + getName()); if ( requiresLogin()){ throw new SearchLoginException("login required"); } boolean vuze_file = searchURL.toLowerCase().startsWith( "vuze:" ); if ( !vuze_file ){ String[] from_strs = new String[ searchParameters.length ]; String[] to_strs = new String[ searchParameters.length ]; for( int i = 0 ; i < searchParameters.length ; i++ ){ SearchParameter parameter = searchParameters[i]; from_strs[i] = "%" + parameter.getMatchPattern(); to_strs[i] = URLEncoder.encode(parameter.getValue(),"UTF-8"); } searchURL = GeneralUtils.replaceAll( searchURL, from_strs, to_strs ); Iterator<Map.Entry<String, String>> it = searchContext.entrySet().iterator(); while( it.hasNext()){ Map.Entry<String, String> entry = it.next(); String key = entry.getKey(); if ( supportsContext( key )){ if ( searchURL.indexOf('?') == -1 ){ searchURL += "?"; }else{ searchURL += "&"; } String value = entry.getValue(); searchURL += key + "=" + URLEncoder.encode( value, "UTF-8" ); } } } //System.out.println(searchURL); // hack to support POST by encoding into URL // http://xxxx/index.php?main=search&azmethod=post_basic:SearchString1=%s&SearchString=&search=Search ResourceDownloaderFactory rdf = StaticUtilities.getResourceDownloaderFactory(); URL initial_url; ResourceDownloader initial_url_rd; int post_pos = searchURL.indexOf( "azmethod=" ); if ( post_pos > 0 ){ String post_params = searchURL.substring( post_pos+9 ); searchURL = searchURL.substring( 0, post_pos-1 ); debugLog( "search_url: " + searchURL + ", post=" + post_params ); initial_url = new URL(searchURL); int sep = post_params.indexOf( ':' ); String type = post_params.substring( 0, sep ); if ( !type.equals( "post_basic" )){ throw( new SearchException( "Only basic type supported" )); } post_params = post_params.substring( sep+1 ); // already URL encoded if ( proxy == null ){ initial_url_rd = rdf.create( initial_url, post_params ); }else{ initial_url_rd = rdf.create( initial_url, post_params, proxy ); } initial_url_rd.setProperty( "URL_Content-Type", "application/x-www-form-urlencoded" ); }else{ debugLog( "search_url: " + searchURL ); initial_url = new URL(searchURL); if ( proxy == null ){ initial_url_rd = rdf.create( initial_url ); }else{ initial_url_rd = rdf.create( initial_url, proxy ); } } if ( proxy_host != null ){ initial_url_rd.setProperty( "URL_HOST", proxy_host ); } setHeaders( initial_url_rd, headers ); if ( needsAuth && local_cookies != null ){ initial_url_rd.setProperty( "URL_Cookie", local_cookies ); } if ( only_if_modified ){ String last_modified = getLocalString( LD_LAST_MODIFIED ); String etag = getLocalString( LD_ETAG ); if ( last_modified != null ){ initial_url_rd.setProperty( "URL_If-Modified-Since", last_modified ); } if ( etag != null ){ initial_url_rd.setProperty( "URL_If-None-Match", etag ); } } InputStream is = null; try{ String content_charset = "UTF-8"; ResourceDownloader mr_rd = null; if ( initial_url.getProtocol().equalsIgnoreCase( "file" )){ // handle file://c:/ - map to file:/c:/ String str = initial_url.toExternalForm(); if ( initial_url.getAuthority() != null ){ str = str.replaceFirst( "://", ":/" ); } int pos = str.indexOf( '?' ); if ( pos != -1 ){ str = str.substring( 0, pos ); } is = new FileInputStream( new File( new URL( str ).toURI())); }else{ if ( proxy == null ){ initial_url_rd.setProperty( "URL_Connect_Timeout", 10*1000 ); initial_url_rd.setProperty( "URL_Read_Timeout", 10*1000 ); } mr_rd = rdf.getMetaRefreshDownloader( initial_url_rd ); try{ is = mr_rd.download(); }catch( ResourceDownloaderException e ){ Long response = (Long)mr_rd.getProperty( "URL_HTTP_Response" ); if ( response != null && response.longValue() == 304 ){ // not modified return( new pageDetails( initial_url, initial_url, "" )); }else{ throw( e ); } } if ( needsAuth ){ List cookies_list = (List)mr_rd.getProperty( "URL_Set-Cookie" ); List cookies_set = new ArrayList(); if ( cookies_list != null ){ for (int i=0;i<cookies_list.size();i++){ String[] cookies = ((String)cookies_list.get(i)).split(";"); for (int j=0;j<cookies.length;j++){ String cookie = cookies[j].trim(); if ( cookie.indexOf('=') != -1 ){ cookies_set.add( cookie ); } } } } // well, not much we can do with the cookies anyway as in general the ones // set are the ones missing/expired, not the existing ones. That is, we can't // deduce anything from the fact that a required cookie is not 'set' here // the most we could do is catch a server that explicitly deleted invalid // cookies by expiring it, but I doubt this is a common practice. // Also note the complexity of cookie syntax // Set-Cookie: old standard using expires=, new using MaxAge // Set-Cookie2: // Maybe use http://jcookie.sourceforge.net/ if needed } if ( only_if_modified ){ String last_modified = extractProperty( mr_rd.getProperty( "URL_Last-Modified" )); String etag = extractProperty( mr_rd.getProperty( "URL_ETag" )); if ( last_modified != null ){ setLocalString( LD_LAST_MODIFIED, last_modified ); } if ( etag != null ){ setLocalString( LD_ETAG, etag ); } } List cts = (List)mr_rd.getProperty( "URL_Content-Type" ); if ( cts != null && cts.size() > 0 ){ String content_type = (String)cts.get(0); int pos = content_type.toLowerCase().indexOf( "charset" ); if ( pos != -1 ){ content_type = content_type.substring( pos+1 ); pos = content_type.indexOf('='); if ( pos != -1 ){ content_type = content_type.substring( pos+1 ).trim(); pos = content_type.indexOf(';'); if ( pos != -1 ){ content_type = content_type.substring(0,pos).trim(); } if ( content_type.startsWith("\"" )){ content_type = content_type.substring(1).trim(); } if ( content_type.endsWith("\"" )){ content_type = content_type.substring(0,content_type.length()-1).trim(); } try{ if ( Charset.isSupported( content_type )){ debugLog( "charset: " + content_type ); content_charset = content_type; } }catch( Throwable e ){ try{ // handle lowercase 'utf-8' for example content_type = content_type.toUpperCase(); if ( Charset.isSupported( content_type )){ debugLog( "charset: " + content_type ); content_charset = content_type; } }catch( Throwable f ){ log( "Content type '" + content_type + "' not supported", f ); } } } } } } ByteArrayOutputStream baos = new ByteArrayOutputStream(8192); byte[] buffer = new byte[8192]; while( true ){ int len = is.read( buffer ); if ( len <= 0 ){ break; } baos.write( buffer, 0, len ); } byte[] data = baos.toByteArray(); if ( vuze_file ){ try{ VuzeFileHandler vfh = VuzeFileHandler.getSingleton(); VuzeFile vf = vfh.loadVuzeFile( data ); vfh.handleFiles( new VuzeFile[]{ vf }, VuzeFileComponent.COMP_TYPE_NONE ); }catch( Throwable e ){ Debug.out( e ); } return( new pageDetails( initial_url, initial_url, null )); } String page = null; String content = new String( data, 0, Math.min( data.length, 2048 ), content_charset ); String lc_content = content.toLowerCase(); { // first look for xml charset // e.g. <?xml version="1.0" encoding="windows-1251" ?> int pos1 = lc_content.indexOf( "<?xml" ); if ( pos1 != -1 ){ int pos2 = lc_content.indexOf( "?>" ); if ( pos2 != -1 ){ int pos3 = lc_content.indexOf( "encoding", pos1 ); if ( pos3 != -1 ){ pos3 = lc_content.indexOf( "\"", pos3 ); } if ( pos3 > pos1 && pos3 < pos2 ){ pos3++; int pos4 = lc_content.indexOf( "\"", pos3 ); if ( pos4 > pos3 && pos4 < pos2 ){ String encoding = content.substring( pos3, pos4 ).trim(); try{ if ( Charset.isSupported( encoding )){ debugLog( "charset from xml tag: " + encoding ); content_charset = encoding; // some feeds have crap at the start which makes pos2 mismatch for the above '?' - adjust if necessary int data_start = pos2; int max_skip = 64; while( data[data_start] != '?' && max_skip-- > 0 ){ data_start++; } page = content.substring( 0, pos3 ) + "utf-8" + content.substring( pos4, pos2 ) + new String( data, data_start, data.length - data_start, content_charset ); } }catch( Throwable e ){ log( "Content type '" + encoding + "' not supported", e ); } } } } } } if ( page == null ){ // next look for http-equiv charset // e.g. <meta http-equiv="Content-Type" content="text/html; charset=windows-1251" /> int pos = 0; while( true ){ int pos1 = lc_content.indexOf( "http-equiv", pos ); if ( pos1 != -1 ){ int pos2 = lc_content.indexOf( ">", pos1 ); if ( pos2 != -1 ){ int pos3 = lc_content.indexOf( "charset", pos1 ); if ( pos3 != -1 && pos3 < pos2 ){ pos3 = lc_content.indexOf( "=", pos3 ); if ( pos3 != -1 ){ pos3++; int pos4 = lc_content.indexOf( "\"", pos3 ); if ( pos4 != -1 ){ int pos5 = lc_content.indexOf( ";", pos3 ); if ( pos5 != -1 && pos5 < pos4 ){ pos4 = pos5; } String encoding = content.substring( pos3, pos4 ).trim(); try{ if ( Charset.isSupported( encoding )){ debugLog( "charset from http-equiv : " + encoding ); content_charset = encoding; // some feeds have crap at the start which makes pos2 mismatch for the above '?' - adjust if necessary int data_start = pos2; int max_skip = 64; while( data[data_start] != '?' && max_skip-- > 0 ){ data_start++; } page = content.substring( 0, pos3 ) + "utf-8" + content.substring( pos4, pos2 ) + new String( data, data_start, data.length - data_start, content_charset ); } }catch( Throwable e ){ log( "Content type '" + encoding + "' not supported", e ); } break; } } } pos = pos2; }else{ break; } }else{ break; } } } if ( page == null ){ page = new String( data, content_charset ); } debugLog( "page:" ); debugLog( page ); // List cookie = (List)url_rd.getProperty( "URL_Set-Cookie" ); try { Matcher m = baseTagPattern.matcher(page); if(m.find()) { basePage = m.group(1); debugLog( "base_page: " + basePage ); } } catch(Exception e) { //No BASE tag in the page } URL final_url = initial_url; if ( mr_rd != null ){ URL x = (URL)mr_rd.getProperty( "URL_URL" ); if ( x != null ){ final_url = x; } } return( new pageDetails( initial_url, final_url, page )); }finally{ if ( is != null ){ is.close(); } } }catch( SearchException e ){ throw( e ); }catch( Throwable e) { // e.printStackTrace(); debugLog( "Failed to load page: " + Debug.getNestedExceptionMessageAndStack(e)); throw( new SearchException( "Failed to load page", e )); }finally{ TorrentUtils.setTLSDescription( null ); } } protected String extractProperty( Object o ) { if ( o instanceof String ){ return((String)o); }else if ( o instanceof List ){ List l = (List)o; if ( l.size() > 0 ){ if ( l.size() > 1 ){ Debug.out( "Property has multiple values!" ); } Object x = l.get(0); if ( x instanceof String ){ return((String)x); }else{ Debug.out( "Property value isn't a String:" + x ); } } } return( null ); } protected void setHeaders( ResourceDownloader rd, String encoded_headers ) { UrlUtils.setBrowserHeaders( rd, encoded_headers, rootPage ); } public String getIcon() { if (iconUrl != null) { return iconUrl; } if(rootPage != null) { return rootPage + "/favicon.ico"; } return null; } protected FieldMapping[] getMappings() { return( mappings ); } public boolean supportsField( int field_id ) { for (int i=0;i<mappings.length;i++){ if ( mappings[i].getField() == field_id ){ return( true ); } } return( false ); } protected String getRootPage() { return( rootPage ); } protected String getBasePage() { return( basePage ); } protected DateParser getDateParser() { return( dateParser ); } public String getDownloadLinkCSS() { if ( downloadLinkCSS == null ){ return( "" ); } return( downloadLinkCSS ); } public boolean requiresLogin() { return needsAuth && ! CookieParser.cookiesContain(requiredCookies, local_cookies); } public void setCookies(String cookies) { this.local_cookies = cookies; setLocalString( LD_COOKIES, cookies ); } public String getSearchUrl( boolean raw ) { if ( raw ){ return( searchURLFormat ); }else{ return( getSearchUrl()); } } public String getSearchUrl() { return( searchURLFormat.replaceAll("%s", "")); } public void setSearchUrl( String str ) { searchURLFormat = str; init(); } public String getLoginPageUrl() { //Let's try with no login page url //return loginPageUrl; return searchURLFormat.replaceAll("%s", ""); } public void setLoginPageUrl(String loginPageUrl) { this.loginPageUrl = loginPageUrl; } public String[] getRequiredCookies() { return requiredCookies; } public void setRequiredCookies(String[] requiredCookies) { this.requiredCookies = requiredCookies; } public boolean isNeedsAuth() { return needsAuth; } public boolean isAuthenticated() { return( isNeedsAuth()); } protected void setNeedsAuth( boolean b ) { needsAuth = b; } public String getAuthMethod() { return( authMethod ); } public String getCookies() { return local_cookies; } public String getString() { return( getString( false )); } public String getString( boolean full ) { return( super.getString() + (full?(", url=" + searchURLFormat ):"") + ", auth=" + isNeedsAuth() + (isNeedsAuth()?" [cookies=" + local_cookies + "]":"" )); } public static class pageDetails { private URL initial_url; private URL final_url; private String content; protected pageDetails( URL _initial_url, URL _final_url, String _content ) { initial_url = _initial_url; final_url = _final_url; content = _content; } public URL getInitialURL() { return( initial_url ); } public URL getFinalURL() { return( final_url ); } public String getContent() { return( content ); } } }