QueryHandlerImpl.java example

Explorer
LimeWire-Pirate-Edition-master
package com.limegroup.gnutella.search;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.limewire.io.GUID;

import com.limegroup.gnutella.ConnectionManager;
import com.limegroup.gnutella.MessageRouter;
import com.limegroup.gnutella.ReplyHandler;
import com.limegroup.gnutella.connection.RoutedConnection;
import com.limegroup.gnutella.messages.QueryRequest;
import com.limegroup.gnutella.messages.QueryRequestFactory;
import com.limegroup.gnutella.routing.QueryRouteTable;

/**
 * This class is a factory for creating <tt>QueryRequest</tt> instances for
 * dynamic queries. Dynamic queries adjust to the varying conditions of a query,
 * such as the number of results received, the number of nodes hit or
 * theoretically hit, etc. This class makes it convenient to rapidly generate
 * <tt>QueryRequest</tt>s with similar characteristics, such as guids, the
 * query itself, the xml query, etc, but with customized settings, such as the
 * TTL.
 */
final class QueryHandlerImpl implements QueryHandler {

    private static final Log LOG = LogFactory.getLog(QueryHandlerImpl.class);

    /**
     * Constant for the max TTL for a query.
     */
    private static final byte MAX_QUERY_TTL = (byte) 6;

    /**
     * The number of results to try to get for queries by hash -- really small
     * since you need relatively few exact matches.
     */
    private static final int HASH_QUERY_RESULTS = 10;

    /**
     * If Leaf Guidance is in effect, the maximum number of hits to route.
     */
    private static final int MAXIMUM_ROUTED_FOR_LEAVES = 75;

    /**
     * Constant for the number of results to look for.
     */
    private final int RESULTS;

    /**
     * The number of milliseconds to wait per query hop. So, if we send out a
     * TTL=3 query, we will then wait TTL*_timeToWaitPerHop milliseconds. As the
     * query continues and we gather more data regarding the popularity of the
     * file, this number may decrease.
     */
    private volatile long _timeToWaitPerHop = 2400;

    /**
     * Variable for the number of milliseconds to shave off of the time to wait
     * per hop after a certain point in the query. As the query continues, the
     * time to shave may increase as well.
     */
    private volatile long _timeToDecreasePerHop = 10;

    /**
     * Variable for the number of times we've decremented the per hop wait time.
     * This is used to determine how much more we should decrement it on this
     * pass.
     */
    private volatile int _numDecrements = 0;

    /** List of times since start of query that results were updated */
    private final List<Long> times = new ArrayList<Long>();

    /** Number of results reported each update */
    private final List<Integer> results = new ArrayList<Integer>();

    /**
     * Variable for the number of results the leaf reports it has.
     */
    private volatile int _numResultsReportedByLeaf = 0;

    /**
     * Variable for the next time after which a query should be sent.
     */
    private volatile long _nextQueryTime = 0;

    /**
     * The theoretical number of hosts that have been reached by this query.
     */
    private volatile int _theoreticalHostsQueried = 1;

    /**
     * Constant for the <tt>ResultCounter</tt> for this query -- used to
     * access the number of replies returned.
     */
    private final ResultCounter RESULT_COUNTER;

    /**
     * Constant list of connections that have already been queried.
     */
    private final List<RoutedConnection> QUERIED_CONNECTIONS = new ArrayList<RoutedConnection>();

    /**
     * <tt>List</tt> of TTL=1 probe connections that we've already used.
     */
    private final List<RoutedConnection> QUERIED_PROBE_CONNECTIONS = new ArrayList<RoutedConnection>();

    /**
     * The time the query started.
     */
    private volatile long _queryStartTime = 0;

    /**
     * The current time, taken each time the query is initiated again.
     */
    private volatile long _curTime = 0;

    /**
     * <tt>ReplyHandler</tt> for replies received for this query.
     */
    private final ReplyHandler REPLY_HANDLER;

    /**
     * Constant for the <tt>QueryRequest</tt> used to build new queries.
     */
    private final QueryRequest QUERY;

    /**
     * Boolean for whether or not the query has been forwarded to leaves of this
     * ultrapeer.
     */
    private volatile boolean _forwardedToLeaves = false;

    /**
     * Boolean for whether or not we've sent the probe query.
     */
    private boolean _probeQuerySent;

    /**
     * used to preference which connections to use when searching if the search
     * comes from a leaf with a certain locale preference then those connections
     * (of this ultrapeer) which match the locale will be used before the other
     * connections.
     */
    private final String _prefLocale;

    private final QueryRequestFactory queryRequestFactory;

    private final ConnectionManager connectionManager;

    private final MessageRouter messageRouter;

    /**
     * Private constructor to ensure that only this class creates new
     * <tt>QueryFactory</tt> instances.
     * 
     * @param request the <tt>QueryRequest</tt> to construct a handler for
     * @param results the number of results to get -- this varies based on the
     *        type of servant sending the request and is respeceted unless it's
     *        a query for a specific hash, in which case we try to get far fewer
     *        matches, ignoring this parameter
     * @param handler the <tt>ReplyHandler</tt> for routing replies
     * @param counter the <tt>ResultCounter</tt> that keeps track of how many
     *        results have been returned for this query
     */
    QueryHandlerImpl(QueryRequest query, int results, ReplyHandler handler, ResultCounter counter,
            QueryRequestFactory queryRequestFactory, ConnectionManager connectionManager,
            MessageRouter messageRouter) {
        this.connectionManager = connectionManager;
        this.messageRouter = messageRouter;
        if (query == null)
            throw new IllegalArgumentException("null query");
        if (handler == null)
            throw new IllegalArgumentException("null reply handler");
        if (counter == null)
            throw new IllegalArgumentException("null result counter");

        this.queryRequestFactory = queryRequestFactory;

        boolean isHashQuery = !query.getQueryUrns().isEmpty();
        QUERY = query;
        if (isHashQuery) {
            RESULTS = HASH_QUERY_RESULTS;
        } else {
            RESULTS = results;
        }

        REPLY_HANDLER = handler;
        RESULT_COUNTER = counter;
        _prefLocale = handler.getLocalePref();
    }

    /** Returns the connections that have already been queried. */
    List<RoutedConnection> getQueriedConnections() {
        return QUERIED_CONNECTIONS;
    }

    /**
     * Factory method for creating new <tt>QueryRequest</tt> instances with
     * the same guid, query, xml query, urn types, etc.
     * 
     * @param ttl the time to live of the new query
     * @return a new <tt>QueryRequest</tt> instance with all of the
     *         pre-defined parameters and the specified TTL
     * @throw <tt>IllegalArgumentException</tt> if the ttl is not within what
     *        is considered reasonable bounds
     * @throw NullPointerException if the <tt>query</tt> argument is
     *        <tt>null</tt>
     */
    private QueryRequest createQuery(QueryRequest query, byte ttl) {
        if (ttl < 1 || ttl > MAX_QUERY_TTL)
            throw new IllegalArgumentException("ttl too high: " + ttl);
        if (query == null) {
            throw new NullPointerException("null query");
        }

        return queryRequestFactory.createQuery(query, ttl);
    }

    public QueryRequest getTemplateQueryRequest() {
        return QUERY;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.limegroup.gnutella.search.QueryHandler#createQuery(byte)
     */
    public QueryRequest createQuery(byte ttl) {
        return createQuery(QUERY, ttl);
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.limegroup.gnutella.search.QueryHandler#sendQuery()
     */
    public void sendQuery() {
        if (hasEnoughResults())
            return;

        _curTime = System.currentTimeMillis();
        if (_curTime < _nextQueryTime)
            return;

        if (LOG.isTraceEnabled())
            LOG.trace("Query = " + QUERY.getQuery() + ", numHostsQueried: "
                    + _theoreticalHostsQueried);

        if (_queryStartTime == 0) {
            _queryStartTime = _curTime;
        }

        // handle 3 query cases

        // 1) If we haven't sent the query to our leaves, send it
        if (!_forwardedToLeaves) {

            _forwardedToLeaves = true;
            QueryRouteTable qrt = messageRouter.getQueryRouteTable();

            QueryRequest query = createQuery(QUERY, (byte) 1);

            _theoreticalHostsQueried += 25;

            // send the query to our leaves if there's a hit and wait,
            // otherwise we'll move on to the probe
            if (qrt != null && qrt.contains(query)) {
                messageRouter.forwardQueryRequestToLeaves(query, REPLY_HANDLER);
                _nextQueryTime = System.currentTimeMillis() + _timeToWaitPerHop;
                return;
            }
        }

        // 2) If we haven't sent the probe query, send it
        if (!_probeQuerySent) {
            ProbeQuery pq = new ProbeQuery(connectionManager.getInitializedConnections(), this);
            long timeToWait = pq.getTimeToWait();
            _theoreticalHostsQueried += pq.sendProbe();
            _nextQueryTime = System.currentTimeMillis() + timeToWait;
            _probeQuerySent = true;
            return;
        }

        // 3) If we haven't yet satisfied the query, keep trying
        else {
            // Otherwise, just send a normal query -- make a copy of the
            // connections because we'll be modifying it.
            int newHosts = sendQuery(new ArrayList<RoutedConnection>(connectionManager
                    .getInitializedConnections()));
            if (newHosts == 0) {
                // if we didn't query any new hosts, wait awhile for new
                // connections to potentially appear
                _nextQueryTime = System.currentTimeMillis() + 6000;
            }
            _theoreticalHostsQueried += newHosts;

            // if we've already queried quite a few hosts, not gotten
            // many results, and have been querying for awhile, start
            // decreasing the per-hop wait time
            if (_timeToWaitPerHop > 100 && (System.currentTimeMillis() - _queryStartTime) > 6000) {
                _timeToWaitPerHop -= _timeToDecreasePerHop;

                int resultFactor = Math.max(1, (RESULTS / 2)
                        - (30 * RESULT_COUNTER.getNumResults()));

                int decrementFactor = Math.max(1, (_numDecrements / 6));

                // the current decrease is weighted based on the number
                // of results returned and on the number of connections
                // we've tried -- the fewer results and the more
                // connections, the more the decrease
                int currentDecrease = resultFactor * decrementFactor;

                currentDecrease = Math.max(5, currentDecrease);
                _timeToDecreasePerHop += currentDecrease;

                _numDecrements++;
                if (_timeToWaitPerHop < 100) {
                    _timeToWaitPerHop = 100;
                }
            }
        }
    }

    /**
     * Sends a query to one of the specified <tt>List</tt> of connections.
     * This is the heart of the dynamic query. We dynamically calculate the
     * appropriate TTL to use based on our current estimate of how widely the
     * file is distributed, how many connections we have, etc. This is static to
     * decouple the algorithm from the specific <tt>QueryHandler</tt>
     * instance, making testing significantly easier.
     * 
     * @param handler the <tt>QueryHandler</tt> instance containing data for
     *        this query
     * @param list the <tt>List</tt> of Gnutella connections to send queries
     *        over
     * @return the number of new hosts theoretically reached by this query
     *         iteration
     * 
     * Default access for testing
     */
    int sendQuery(List<? extends RoutedConnection> ultrapeersAll) {

        // we want to try to use all connections in ultrapeersLocale first.
        List<? extends RoutedConnection> ultrapeers = // method returns a copy
        connectionManager.getInitializedConnectionsMatchLocale(_prefLocale);

        QUERIED_CONNECTIONS.retainAll(ultrapeersAll);
        QUERIED_PROBE_CONNECTIONS.retainAll(ultrapeersAll);

        // if we did get a list of connections that matches the locale
        // of the query
        if (!ultrapeers.isEmpty()) {
            ultrapeers.removeAll(QUERIED_CONNECTIONS);
            ultrapeers.removeAll(QUERIED_PROBE_CONNECTIONS);
            // at this point ultrapeers could become empty
        }

        if (ultrapeers.isEmpty()) {
            ultrapeers = ultrapeersAll;
            // now, remove any connections we've used from our current list
            // of connections to try
            ultrapeers.removeAll(QUERIED_CONNECTIONS);
            ultrapeers.removeAll(QUERIED_PROBE_CONNECTIONS);
        }

        int length = ultrapeers.size();
        if (LOG.isTraceEnabled())
            LOG.trace("potential querier size: " + length);
        byte ttl = 0;
        RoutedConnection mc = null;

        // add randomization to who we send our queries to
        Collections.shuffle(ultrapeers);

        // weed out all connections that aren't yet stable
        for (int i = 0; i < length; i++) {
            RoutedConnection curConnection = ultrapeers.get(i);

            // if the connection hasn't been up for long, don't use it,
            // as the replies will never make it back to us if the
            // connection is dropped, wasting bandwidth
            if (!curConnection.isStable(_curTime))
                continue;
            mc = curConnection;
            break;
        }

        int remainingConnections = Math.max(length + QUERIED_PROBE_CONNECTIONS.size(), 0);

        // return if we don't have any connections to query at this time
        if (remainingConnections == 0)
            return 0;

        // pretend we have fewer connections than we do in case we
        // lose some
        if (remainingConnections > 4)
            remainingConnections -= 4;

        boolean probeConnection = false;

        // mc can still be null if the list of connections was empty.
        if (mc == null) {
            // if we have no connections to query, simply return for now
            if (QUERIED_PROBE_CONNECTIONS.isEmpty()) {
                return 0;
            }

            // we actually remove this from the list to make sure that
            // QUERIED_CONNECTIONS and QUERIED_PROBE_CONNECTIONS do
            // not have any of the same entries, as this connection
            // will be added to QUERIED_CONNECTIONS
            mc = QUERIED_PROBE_CONNECTIONS.remove(0);
            probeConnection = true;
        }

        int reported = _numResultsReportedByLeaf;
        if(reported <= 0)
            reported = RESULT_COUNTER.getNumResults();
        double resultsPerHost = (double) reported / _theoreticalHostsQueried;

        int resultsNeeded = RESULTS - reported;
        int hostsToQuery = 40000;
        if (resultsPerHost != 0) {
            hostsToQuery = (int) (resultsNeeded / resultsPerHost);
        }

        int hostsToQueryPerConnection = hostsToQuery / remainingConnections;

        ttl = calculateNewTTL(hostsToQueryPerConnection, mc.getConnectionCapabilities()
                .getNumIntraUltrapeerConnections(), mc.getConnectionCapabilities().getHeadersRead()
                .getMaxTTL());

        // If we're sending the query down a probe connection and we've
        // already used that connection, or that connection doesn't have
        // a hit for the query, send it at TTL=2. In these cases,
        // sending the query at TTL=1 is pointless because we've either
        // already sent this query, or the Ultrapeer doesn't have a
        // match anyway
        if (ttl == 1
                && ((mc.isUltrapeerQueryRoutingConnection() && !mc.shouldForwardQuery(QUERY)) || probeConnection)) {
            ttl = 2;
        }
        QueryRequest query = createQuery(QUERY, ttl);

        // send out the query on the network, returning the number of new
        // hosts theoretically reached
        return sendQueryToHost(query, mc);
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.limegroup.gnutella.search.QueryHandler#sendQueryToHost(com.limegroup.gnutella.messages.QueryRequest,
     *      com.limegroup.gnutella.connection.RoutedConnection)
     */
    public int sendQueryToHost(QueryRequest query, RoutedConnection mc) {

        // send the query directly along the connection, but if the query didn't
        // go through send back 0....
        if (!messageRouter.sendInitialQuery(query, mc))
            return 0;

        byte ttl = query.getTTL();

        // add the reply handler to the list of queried hosts if it's not
        // a TTL=1 query or the connection does not support probe queries

        // adds the connection to the list of probe connections if it's
        // a TTL=1 query to a connection that supports probe extensions,
        // otherwise add it to the list of connections we've queried
        if (ttl == 1 && mc.getConnectionCapabilities().supportsProbeQueries()) {
            this.QUERIED_PROBE_CONNECTIONS.add(mc);
        } else {
            this.QUERIED_CONNECTIONS.add(mc);
            if (LOG.isTraceEnabled())
                LOG.trace("QUERIED_CONNECTIONS.size() = " + this.QUERIED_CONNECTIONS.size());
        }

        if (LOG.isTraceEnabled())
            LOG.trace("Querying host " + mc.getAddress() + " with ttl " + query.getTTL());

        this._nextQueryTime = System.currentTimeMillis() + (ttl * this._timeToWaitPerHop);

        return calculateNewHosts(mc, ttl);
    }

    /**
     * Calculates the new TTL to use based on the number of hosts per connection
     * we still need to query.
     * 
     * @param hostsToQueryPerConnection the number of hosts we should reach on
     *        each remaining connections, to the best of our knowledge
     * @param degree the out-degree of the next connection
     * @param maxTTL the maximum TTL the connection will allow
     * @return the TTL to use for the next connection
     */
    static byte calculateNewTTL(int hostsToQueryPerConnection, int degree, byte maxTTL) {

        if (maxTTL > MAX_QUERY_TTL)
            maxTTL = MAX_QUERY_TTL;

        // not the most efficient algorithm -- should use Math.log, but
        // that's ok
        for (byte i = 1; i < MAX_QUERY_TTL; i++) {

            // biased towards lower TTLs since the horizon expands so
            // quickly
            int hosts = (int) (16.0 * calculateNewHosts(degree, i));
            if (hosts >= hostsToQueryPerConnection) {
                if (i > maxTTL)
                    return maxTTL;
                return i;
            }
        }
        return maxTTL;
    }

    /**
     * Calculate the number of new hosts that would be added to the theoretical
     * horizon if a query with the given ttl were sent down the given
     * connection.
     * 
     * @param conn the <tt>Connection</tt> that will received the query
     * @param ttl the TTL of the query to add
     */
    static int calculateNewHosts(RoutedConnection conn, byte ttl) {
        return calculateNewHosts(
                conn.getConnectionCapabilities().getNumIntraUltrapeerConnections(), ttl);
    }

    /**
     * Calculate the number of new hosts that would be added to the theoretical
     * horizon if a query with the given ttl were sent to a node with the given
     * degree. This is not precise because we're assuming that the nodes
     * connected to the node in question also have the same degree, but there's
     * not much we can do about it!
     * 
     * @param degree the degree of the node that will received the query
     * @param ttl the TTL of the query to add
     */
    static int calculateNewHosts(int degree, byte ttl) {
        double newHosts = 0;
        for (; ttl > 0; ttl--) {
            newHosts += Math.pow((degree - 1), ttl - 1);
        }
        return (int) newHosts;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.limegroup.gnutella.search.QueryHandler#hasEnoughResults()
     */
    public boolean hasEnoughResults() {
        // return false if the query hasn't started yet
        if (_queryStartTime == 0)
            return false;

        // ----------------
        // NOTE: as agreed, _numResultsReportedByLeaf is the number of results
        // the leaf has received/consumed by a filter DIVIDED by 4 (4 being the
        // number of UPs connection it maintains). That is why we don't divide
        // it here or anything. We aren't sure if this mixes well with
        // BearShare's use but oh well....
        // ----------------
        // if leaf guidance is in effect, we have different criteria.
        if (_numResultsReportedByLeaf > 0) {
            // we shouldn't route too much regardless of what the leaf says
            if (RESULT_COUNTER.getNumResults() >= MAXIMUM_ROUTED_FOR_LEAVES)
                return true;
            // if the leaf is happy, so are we....
            if (_numResultsReportedByLeaf > RESULTS)
                return true;
        }
        // leaf guidance is not in effect or we are doing our own query
        else if (RESULT_COUNTER.getNumResults() >= RESULTS)
            return true;

        // if our theoretical horizon has gotten too high, consider
        // it enough results
        // precisely what this number should be is somewhat hard to determine
        // because, while connection have a specfic degree, the degree of
        // the connections on subsequent hops cannot be determined
        if (_theoreticalHostsQueried > 110000) {
            return true;
        }

        // return true if we've been querying for longer than the specified
        // maximum
        int queryLength = (int) (System.currentTimeMillis() - _queryStartTime);
        if (queryLength > QueryHandler.MAX_QUERY_TIME) {
            return true;
        }

        return false;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.limegroup.gnutella.search.QueryHandler#updateLeafResults(int)
     */
    public void updateLeafResults(int numResults) {
        if (numResults > _numResultsReportedByLeaf) {
            // record up to the first 20 updates
            if (times.size() < 20) {
                times.add(System.currentTimeMillis() - _queryStartTime);
                results.add(numResults);
            }
            _numResultsReportedByLeaf = numResults;
        }
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.limegroup.gnutella.search.QueryHandler#getNumResultsReportedByLeaf()
     */
    public int getNumResultsReportedByLeaf() {
        return _numResultsReportedByLeaf;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.limegroup.gnutella.search.QueryHandler#getReplyHandler()
     */
    public ReplyHandler getReplyHandler() {
        return REPLY_HANDLER;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.limegroup.gnutella.search.QueryHandler#getTimeToWaitPerHop()
     */
    public long getTimeToWaitPerHop() {
        return _timeToWaitPerHop;
    }

    // overrides Object.toString
    @Override
    public String toString() {
        return "QueryHandler: QUERY: " + QUERY;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.limegroup.gnutella.search.QueryHandler#getGUID()
     */
    public GUID getGUID() {
        return new GUID(QUERY.getGUID());
    }
}