package com.limegroup.gnutella.search; import java.util.ArrayList; import java.util.Collections; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.limegroup.gnutella.Connection; import com.limegroup.gnutella.ConnectionManager; import com.limegroup.gnutella.ErrorService; import com.limegroup.gnutella.ForMeReplyHandler; import com.limegroup.gnutella.GUID; import com.limegroup.gnutella.ManagedConnection; import com.limegroup.gnutella.MessageRouter; import com.limegroup.gnutella.ReplyHandler; import com.limegroup.gnutella.RouterService; import com.limegroup.gnutella.messages.BadPacketException; import com.limegroup.gnutella.messages.QueryRequest; import com.limegroup.gnutella.routing.QueryRouteTable; /** * This class is a factory for creating <tt>QueryRequest</tt> instances * for dynamic queries. Dynamic queries adjust to the varying conditions of * a query, such as the number of results received, the number of nodes * hit or theoretically hit, etc. This class makes it convenient to * rapidly generate <tt>QueryRequest</tt>s with similar characteristics, * such as guids, the query itself, the xml query, etc, but with customized * settings, such as the TTL. */ public final class QueryHandler { private static final Log LOG = LogFactory.getLog(QueryHandler.class); /** * Constant for the number of results to look for. */ private final int RESULTS; /** * Constant for the max TTL for a query. */ public static final byte MAX_QUERY_TTL = (byte) 6; /** * The number of results to try to get if we're an Ultrapeer originating * the query. */ public static final int ULTRAPEER_RESULTS = 150; /** * Ultrapeers seem to get less results - lets give them a little boost. */ public static final double UP_RESULT_BUMP = 1.15; /** * The number of results to try to get if the query came from an old * leaf -- they are connected to 2 other Ultrapeers that may or may * not use this algorithm. */ private static final int OLD_LEAF_RESULTS = 20; /** * The number of results to try to get for new leaves -- they only * maintain 2 connections and don't generate as much overall traffic, * so give them a little more. */ private static final int NEW_LEAF_RESULTS = 38; /** * The number of results to try to get for queries by hash -- really * small since you need relatively few exact matches. */ private static final int HASH_QUERY_RESULTS = 10; /** * If Leaf Guidance is in effect, the maximum number of hits to route. */ private static final int MAXIMUM_ROUTED_FOR_LEAVES = 75; /** * The number of milliseconds to wait per query hop. So, if we send * out a TTL=3 query, we will then wait TTL*_timeToWaitPerHop * milliseconds. As the query continues and we gather more data * regarding the popularity of the file, this number may decrease. */ private volatile long _timeToWaitPerHop = 2400; /** * Variable for the number of milliseconds to shave off of the time * to wait per hop after a certain point in the query. As the query * continues, the time to shave may increase as well. */ private volatile long _timeToDecreasePerHop = 10; /** * Variable for the number of times we've decremented the per hop wait * time. This is used to determine how much more we should decrement * it on this pass. */ private volatile int _numDecrements = 0; /** * Constant for the maximum number of milliseconds the entire query * can last. The query expires when this limit is reached. */ public static final int MAX_QUERY_TIME = 200 * 1000; /** * Handle to the <tt>MessageRouter</tt> instance. Non-final for * testing purposes. */ private static MessageRouter _messageRouter = RouterService.getMessageRouter(); /** * Handle to the <tt>ConnectionManager</tt> instance. Non-final for * testing purposes. */ private static ConnectionManager _connectionManager = RouterService.getConnectionManager(); /** * Variable for the number of results the leaf reports it has. */ private volatile int _numResultsReportedByLeaf = 0; /** * Variable for the next time after which a query should be sent. */ private volatile long _nextQueryTime = 0; /** * The theoretical number of hosts that have been reached by this query. */ private volatile int _theoreticalHostsQueried = 1; /** * Constant for the <tt>ResultCounter</tt> for this query -- used * to access the number of replies returned. */ private final ResultCounter RESULT_COUNTER; /** * Constant list of connections that have already been queried. */ private final List QUERIED_CONNECTIONS = new ArrayList(); /** * <tt>List</tt> of TTL=1 probe connections that we've already used. */ private final List QUERIED_PROBE_CONNECTIONS = new ArrayList(); /** * The time the query started. */ private volatile long _queryStartTime = 0; /** * The current time, taken each time the query is initiated again. */ private volatile long _curTime = 0; /** * <tt>ReplyHandler</tt> for replies received for this query. */ private final ReplyHandler REPLY_HANDLER; /** * Constant for the <tt>QueryRequest</tt> used to build new queries. */ final QueryRequest QUERY; /** * Boolean for whether or not the query has been forwarded to leaves of * this ultrapeer. */ private volatile boolean _forwardedToLeaves = false; /** * Boolean for whether or not we've sent the probe query. */ private boolean _probeQuerySent; /** * used to preference which connections to use when searching * if the search comes from a leaf with a certain locale preference * then those connections (of this ultrapeer) which match the * locale will be used before the other connections. */ private final String _prefLocale; /** * Private constructor to ensure that only this class creates new * <tt>QueryFactory</tt> instances. * * @param request the <tt>QueryRequest</tt> to construct a handler for * @param results the number of results to get -- this varies based * on the type of servant sending the request and is respeceted unless * it's a query for a specific hash, in which case we try to get * far fewer matches, ignoring this parameter * @param handler the <tt>ReplyHandler</tt> for routing replies * @param counter the <tt>ResultCounter</tt> that keeps track of how * many results have been returned for this query */ private QueryHandler(QueryRequest query, int results, ReplyHandler handler, ResultCounter counter) { if( query == null ) throw new IllegalArgumentException("null query"); if( handler == null ) throw new IllegalArgumentException("null reply handler"); if( counter == null ) throw new IllegalArgumentException("null result counter"); boolean isHashQuery = !query.getQueryUrns().isEmpty(); QUERY = query; if(isHashQuery) { RESULTS = HASH_QUERY_RESULTS; } else { RESULTS = results; } REPLY_HANDLER = handler; RESULT_COUNTER = counter; _prefLocale = handler.getLocalePref(); } /** * Factory constructor for generating a new <tt>QueryHandler</tt> * for the given <tt>QueryRequest</tt>. * * @param guid the <tt>QueryRequest</tt> instance containing data * for this set of queries * @param handler the <tt>ReplyHandler</tt> for routing the replies * @param counter the <tt>ResultCounter</tt> that keeps track of how * many results have been returned for this query * @return the <tt>QueryHandler</tt> instance for this query */ public static QueryHandler createHandler(QueryRequest query, ReplyHandler handler, ResultCounter counter) { return new QueryHandler(query, ULTRAPEER_RESULTS, handler, counter); } /** * Factory constructor for generating a new <tt>QueryHandler</tt> * for the given <tt>QueryRequest</tt>. Used by supernodes to run * their own queries (ties up to ForMeReplyHandler.instance()). * * @param guid the <tt>QueryRequest</tt> instance containing data * for this set of queries * @param counter the <tt>ResultCounter</tt> that keeps track of how * many results have been returned for this query * @return the <tt>QueryHandler</tt> instance for this query */ public static QueryHandler createHandlerForMe(QueryRequest query, ResultCounter counter) { // because UPs seem to get less results, give them more than usual return new QueryHandler(query, (int)(ULTRAPEER_RESULTS * UP_RESULT_BUMP), ForMeReplyHandler.instance(), counter); } /** * Factory constructor for generating a new <tt>QueryHandler</tt> * for the given <tt>QueryRequest</tt>. * * @param guid the <tt>QueryRequest</tt> instance containing data * for this set of queries * @param handler the <tt>ReplyHandler</tt> for routing the replies * @param counter the <tt>ResultCounter</tt> that keeps track of how * many results have been returned for this query * @return the <tt>QueryHandler</tt> instance for this query */ public static QueryHandler createHandlerForOldLeaf(QueryRequest query, ReplyHandler handler, ResultCounter counter) { return new QueryHandler(query, OLD_LEAF_RESULTS, handler, counter); } /** * Factory constructor for generating a new <tt>QueryHandler</tt> * for the given <tt>QueryRequest</tt>. * * @param guid the <tt>QueryRequest</tt> instance containing data * for this set of queries * @param handler the <tt>ReplyHandler</tt> for routing the replies * @param counter the <tt>ResultCounter</tt> that keeps track of how * many results have been returned for this query * @return the <tt>QueryHandler</tt> instance for this query */ public static QueryHandler createHandlerForNewLeaf(QueryRequest query, ReplyHandler handler, ResultCounter counter) { return new QueryHandler(query, NEW_LEAF_RESULTS, handler, counter); } /** * Factory method for creating new <tt>QueryRequest</tt> instances with * the same guid, query, xml query, urn types, etc. * * @param ttl the time to live of the new query * @return a new <tt>QueryRequest</tt> instance with all of the * pre-defined parameters and the specified TTL * @throw <tt>IllegalArgumentException</tt> if the ttl is not within * what is considered reasonable bounds * @throw NullPointerException if the <tt>query</tt> argument is * <tt>null</tt> */ public static QueryRequest createQuery(QueryRequest query, byte ttl) { if(ttl < 1 || ttl > MAX_QUERY_TTL) throw new IllegalArgumentException("ttl too high: "+ttl); if(query == null) { throw new NullPointerException("null query"); } // build it from scratch if it's from us if(query.getHops() == 0) { return QueryRequest.createQuery(query, ttl); } else { try { return QueryRequest.createNetworkQuery(query.getGUID(), ttl, query.getHops(), query.getPayload(), query.getNetwork()); } catch(BadPacketException e) { // this should never happen, since the query was already // read from the network, so report an error ErrorService.error(e); return null; } } } /** * Convenience method for creating a new query with the given TTL * with this <tt>QueryHandler</tt>. * * @param ttl the time to live for the new query */ QueryRequest createQuery(byte ttl) { return createQuery(QUERY, ttl); } /** * Sends the query to the current connections. If the query is not * yet ready to be processed, this returns immediately. */ public void sendQuery() { if(hasEnoughResults()) return; _curTime = System.currentTimeMillis(); if(_curTime < _nextQueryTime) return; if (LOG.isTraceEnabled()) LOG.trace("Query = " + QUERY.getQuery() + ", numHostsQueried: " + _theoreticalHostsQueried); if(_queryStartTime == 0) { _queryStartTime = _curTime; } // handle 3 query cases // 1) If we haven't sent the query to our leaves, send it if(!_forwardedToLeaves) { _forwardedToLeaves = true; QueryRouteTable qrt = RouterService.getMessageRouter().getQueryRouteTable(); QueryRequest query = createQuery(QUERY, (byte)1); _theoreticalHostsQueried += 25; // send the query to our leaves if there's a hit and wait, // otherwise we'll move on to the probe if(qrt != null && qrt.contains(query)) { RouterService.getMessageRouter(). forwardQueryRequestToLeaves(query, REPLY_HANDLER); _nextQueryTime = System.currentTimeMillis() + _timeToWaitPerHop; return; } } // 2) If we haven't sent the probe query, send it if(!_probeQuerySent) { ProbeQuery pq = new ProbeQuery(_connectionManager.getInitializedConnections(), this); long timeToWait = pq.getTimeToWait(); _theoreticalHostsQueried += pq.sendProbe(); _nextQueryTime = System.currentTimeMillis() + timeToWait; _probeQuerySent = true; return; } // 3) If we haven't yet satisfied the query, keep trying else { // Otherwise, just send a normal query -- make a copy of the // connections because we'll be modifying it. int newHosts = sendQuery( new ArrayList( _connectionManager.getInitializedConnections())); if(newHosts == 0) { // if we didn't query any new hosts, wait awhile for new // connections to potentially appear _nextQueryTime = System.currentTimeMillis() + 6000; } _theoreticalHostsQueried += newHosts; // if we've already queried quite a few hosts, not gotten // many results, and have been querying for awhile, start // decreasing the per-hop wait time if(_timeToWaitPerHop > 100 && (System.currentTimeMillis() - _queryStartTime) > 6000) { _timeToWaitPerHop -= _timeToDecreasePerHop; int resultFactor = Math.max(1, (RESULTS/2)-(30*RESULT_COUNTER.getNumResults())); int decrementFactor = Math.max(1, (_numDecrements/6)); // the current decrease is weighted based on the number // of results returned and on the number of connections // we've tried -- the fewer results and the more // connections, the more the decrease int currentDecrease = resultFactor * decrementFactor; currentDecrease = Math.max(5, currentDecrease); _timeToDecreasePerHop += currentDecrease; _numDecrements++; if(_timeToWaitPerHop < 100) { _timeToWaitPerHop = 100; } } } } /** * Sends a query to one of the specified <tt>List</tt> of connections. * This is the heart of the dynamic query. We dynamically calculate the * appropriate TTL to use based on our current estimate of how widely the * file is distributed, how many connections we have, etc. This is static * to decouple the algorithm from the specific <tt>QueryHandler</tt> * instance, making testing significantly easier. * * @param handler the <tt>QueryHandler</tt> instance containing data * for this query * @param list the <tt>List</tt> of Gnutella connections to send * queries over * @return the number of new hosts theoretically reached by this * query iteration */ private int sendQuery(List ultrapeersAll) { //we want to try to use all connections in ultrapeersLocale first. List ultrapeers = /** method returns a copy */ _connectionManager.getInitializedConnectionsMatchLocale (_prefLocale); QUERIED_CONNECTIONS.retainAll(ultrapeersAll); QUERIED_PROBE_CONNECTIONS.retainAll(ultrapeersAll); //if we did get a list of connections that matches the locale //of the query if(!ultrapeers.isEmpty()) { ultrapeers.removeAll(QUERIED_CONNECTIONS); ultrapeers.removeAll(QUERIED_PROBE_CONNECTIONS); //at this point ultrapeers could become empty } if(ultrapeers.isEmpty()) { ultrapeers = ultrapeersAll; // now, remove any connections we've used from our current list // of connections to try ultrapeers.removeAll(QUERIED_CONNECTIONS); ultrapeers.removeAll(QUERIED_PROBE_CONNECTIONS); } int length = ultrapeers.size(); if (LOG.isTraceEnabled()) LOG.trace("potential querier size: " + length); byte ttl = 0; ManagedConnection mc = null; // add randomization to who we send our queries to Collections.shuffle(ultrapeers); // weed out all connections that aren't yet stable for(int i=0; i<length; i++) { ManagedConnection curConnection = (ManagedConnection)ultrapeers.get(i); // if the connection hasn't been up for long, don't use it, // as the replies will never make it back to us if the // connection is dropped, wasting bandwidth if(!curConnection.isStable(_curTime)) continue; mc = curConnection; break; } int remainingConnections = Math.max(length+QUERIED_PROBE_CONNECTIONS.size(), 0); // return if we don't have any connections to query at this time if(remainingConnections == 0) return 0; // pretend we have fewer connections than we do in case we // lose some if(remainingConnections > 4) remainingConnections -= 4; boolean probeConnection = false; // mc can still be null if the list of connections was empty. if(mc == null) { // if we have no connections to query, simply return for now if(QUERIED_PROBE_CONNECTIONS.isEmpty()) { return 0; } // we actually remove this from the list to make sure that // QUERIED_CONNECTIONS and QUERIED_PROBE_CONNECTIONS do // not have any of the same entries, as this connection // will be added to QUERIED_CONNECTIONS mc = (ManagedConnection)QUERIED_PROBE_CONNECTIONS.remove(0); probeConnection = true; } int results = (_numResultsReportedByLeaf > 0 ? _numResultsReportedByLeaf : RESULT_COUNTER.getNumResults()); double resultsPerHost = (double)results/(double)_theoreticalHostsQueried; int resultsNeeded = RESULTS - results; int hostsToQuery = 40000; if(resultsPerHost != 0) { hostsToQuery = (int)(resultsNeeded/resultsPerHost); } int hostsToQueryPerConnection = hostsToQuery/remainingConnections;; ttl = calculateNewTTL(hostsToQueryPerConnection, mc.getNumIntraUltrapeerConnections(), mc.headers().getMaxTTL()); // If we're sending the query down a probe connection and we've // already used that connection, or that connection doesn't have // a hit for the query, send it at TTL=2. In these cases, // sending the query at TTL=1 is pointless because we've either // already sent this query, or the Ultrapeer doesn't have a // match anyway if(ttl == 1 && ((mc.isUltrapeerQueryRoutingConnection() && !mc.shouldForwardQuery(QUERY)) || probeConnection)) { ttl = 2; } QueryRequest query = createQuery(QUERY, ttl); // send out the query on the network, returning the number of new // hosts theoretically reached return sendQueryToHost(query, mc, this); } /** * Sends a query to the specified host. * * @param query the <tt>QueryRequest</tt> to send * @param mc the <tt>ManagedConnection</tt> to send the query to * @param handler the <tt>QueryHandler</tt> * @return the number of new hosts theoretically hit by this query */ static int sendQueryToHost(QueryRequest query, ManagedConnection mc, QueryHandler handler) { // send the query directly along the connection, but if the query didn't // go through send back 0.... if (!_messageRouter.originateQuery(query, mc)) return 0; byte ttl = query.getTTL(); // add the reply handler to the list of queried hosts if it's not // a TTL=1 query or the connection does not support probe queries // adds the connection to the list of probe connections if it's // a TTL=1 query to a connection that supports probe extensions, // otherwise add it to the list of connections we've queried if(ttl == 1 && mc.supportsProbeQueries()) { handler.QUERIED_PROBE_CONNECTIONS.add(mc); } else { handler.QUERIED_CONNECTIONS.add(mc); if (LOG.isTraceEnabled()) LOG.trace("QUERIED_CONNECTIONS.size() = " + handler.QUERIED_CONNECTIONS.size()); } if (LOG.isTraceEnabled()) LOG.trace("Querying host " + mc.getAddress() + " with ttl " + query.getTTL()); handler._nextQueryTime = System.currentTimeMillis() + (ttl * handler._timeToWaitPerHop); return calculateNewHosts(mc, ttl); } /** * Calculates the new TTL to use based on the number of hosts per connection * we still need to query. * * @param hostsToQueryPerConnection the number of hosts we should reach on * each remaining connections, to the best of our knowledge * @param degree the out-degree of the next connection * @param maxTTL the maximum TTL the connection will allow * @return the TTL to use for the next connection */ private static byte calculateNewTTL(int hostsToQueryPerConnection, int degree, byte maxTTL) { if (maxTTL > MAX_QUERY_TTL) maxTTL = MAX_QUERY_TTL; // not the most efficient algorithm -- should use Math.log, but // that's ok for(byte i=1; i<MAX_QUERY_TTL; i++) { // biased towards lower TTLs since the horizon expands so // quickly int hosts = (int)(16.0*calculateNewHosts(degree, i)); if(hosts >= hostsToQueryPerConnection) { if(i > maxTTL) return maxTTL; return i; } } return maxTTL; } /** * Calculate the number of new hosts that would be added to the * theoretical horizon if a query with the given ttl were sent down * the given connection. * * @param conn the <tt>Connection</tt> that will received the query * @param ttl the TTL of the query to add */ private static int calculateNewHosts(Connection conn, byte ttl) { return calculateNewHosts(conn.getNumIntraUltrapeerConnections(), ttl); } /** * Calculate the number of new hosts that would be added to the * theoretical horizon if a query with the given ttl were sent to * a node with the given degree. This is not precise because we're * assuming that the nodes connected to the node in question also * have the same degree, but there's not much we can do about it! * * @param degree the degree of the node that will received the query * @param ttl the TTL of the query to add */ private static int calculateNewHosts(int degree, byte ttl) { double newHosts = 0; for(;ttl>0; ttl--) { newHosts += Math.pow((degree-1), ttl-1); } return (int)newHosts; } /** * Returns whether or not this query has received enough results. * * @return <tt>true</tt> if this query has received enough results, * <tt>false</tt> otherwise */ public boolean hasEnoughResults() { // return false if the query hasn't started yet if(_queryStartTime == 0) return false; // ---------------- // NOTE: as agreed, _numResultsReportedByLeaf is the number of results // the leaf has received/consumed by a filter DIVIDED by 4 (4 being the // number of UPs connection it maintains). That is why we don't divide // it here or anything. We aren't sure if this mixes well with // BearShare's use but oh well.... // ---------------- // if leaf guidance is in effect, we have different criteria. if (_numResultsReportedByLeaf > 0) { // we shouldn't route too much regardless of what the leaf says if (RESULT_COUNTER.getNumResults() >= MAXIMUM_ROUTED_FOR_LEAVES) return true; // if the leaf is happy, so are we.... if (_numResultsReportedByLeaf > RESULTS) return true; } // leaf guidance is not in effect or we are doing our own query else if (RESULT_COUNTER.getNumResults() >= RESULTS) return true; // if our theoretical horizon has gotten too high, consider // it enough results // precisely what this number should be is somewhat hard to determine // because, while connection have a specfic degree, the degree of // the connections on subsequent hops cannot be determined if(_theoreticalHostsQueried > 110000) { return true; } // return true if we've been querying for longer than the specified // maximum int queryLength = (int)(System.currentTimeMillis() - _queryStartTime); if(queryLength > MAX_QUERY_TIME) { return true; } return false; } /** * Use this to modify the number of results as reported by the leaf you are * querying for. */ public void updateLeafResults(int numResults) { if (numResults > _numResultsReportedByLeaf) _numResultsReportedByLeaf = numResults; } /** * Returns the number of results as reported by the leaf. At least 0. */ public int getNumResultsReportedByLeaf() { return _numResultsReportedByLeaf; } /** * Accessor for the <tt>ReplyHandler</tt> instance for the connection * issuing this request. * * @return the <tt>ReplyHandler</tt> for the connection issuing this * request */ public ReplyHandler getReplyHandler() { return REPLY_HANDLER; } /** * Accessor for the time to wait per hop, in milliseconds, * for this QueryHandler. * * @return the time to wait per hop in milliseconds for this * QueryHandler */ public long getTimeToWaitPerHop() { return _timeToWaitPerHop; } // overrides Object.toString public String toString() { return "QueryHandler: QUERY: "+QUERY; } /** @return simply returns the guid of the query this is handling. */ public GUID getGUID() { return new GUID(QUERY.getGUID()); } }