package com.limegroup.gnutella.search;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.limewire.io.GUID;
import com.limegroup.gnutella.ConnectionManager;
import com.limegroup.gnutella.MessageRouter;
import com.limegroup.gnutella.ReplyHandler;
import com.limegroup.gnutella.connection.RoutedConnection;
import com.limegroup.gnutella.messages.QueryRequest;
import com.limegroup.gnutella.messages.QueryRequestFactory;
import com.limegroup.gnutella.routing.QueryRouteTable;
/**
* This class is a factory for creating <tt>QueryRequest</tt> instances for
* dynamic queries. Dynamic queries adjust to the varying conditions of a query,
* such as the number of results received, the number of nodes hit or
* theoretically hit, etc. This class makes it convenient to rapidly generate
* <tt>QueryRequest</tt>s with similar characteristics, such as guids, the
* query itself, the xml query, etc, but with customized settings, such as the
* TTL.
*/
final class QueryHandlerImpl implements QueryHandler {
private static final Log LOG = LogFactory.getLog(QueryHandlerImpl.class);
/**
* Constant for the max TTL for a query.
*/
private static final byte MAX_QUERY_TTL = (byte) 6;
/**
* The number of results to try to get for queries by hash -- really small
* since you need relatively few exact matches.
*/
private static final int HASH_QUERY_RESULTS = 10;
/**
* If Leaf Guidance is in effect, the maximum number of hits to route.
*/
private static final int MAXIMUM_ROUTED_FOR_LEAVES = 75;
/**
* Constant for the number of results to look for.
*/
private final int RESULTS;
/**
* The number of milliseconds to wait per query hop. So, if we send out a
* TTL=3 query, we will then wait TTL*_timeToWaitPerHop milliseconds. As the
* query continues and we gather more data regarding the popularity of the
* file, this number may decrease.
*/
private volatile long _timeToWaitPerHop = 2400;
/**
* Variable for the number of milliseconds to shave off of the time to wait
* per hop after a certain point in the query. As the query continues, the
* time to shave may increase as well.
*/
private volatile long _timeToDecreasePerHop = 10;
/**
* Variable for the number of times we've decremented the per hop wait time.
* This is used to determine how much more we should decrement it on this
* pass.
*/
private volatile int _numDecrements = 0;
/** List of times since start of query that results were updated */
private final List<Long> times = new ArrayList<Long>();
/** Number of results reported each update */
private final List<Integer> results = new ArrayList<Integer>();
/**
* Variable for the number of results the leaf reports it has.
*/
private volatile int _numResultsReportedByLeaf = 0;
/**
* Variable for the next time after which a query should be sent.
*/
private volatile long _nextQueryTime = 0;
/**
* The theoretical number of hosts that have been reached by this query.
*/
private volatile int _theoreticalHostsQueried = 1;
/**
* Constant for the <tt>ResultCounter</tt> for this query -- used to
* access the number of replies returned.
*/
private final ResultCounter RESULT_COUNTER;
/**
* Constant list of connections that have already been queried.
*/
private final List<RoutedConnection> QUERIED_CONNECTIONS = new ArrayList<RoutedConnection>();
/**
* <tt>List</tt> of TTL=1 probe connections that we've already used.
*/
private final List<RoutedConnection> QUERIED_PROBE_CONNECTIONS = new ArrayList<RoutedConnection>();
/**
* The time the query started.
*/
private volatile long _queryStartTime = 0;
/**
* The current time, taken each time the query is initiated again.
*/
private volatile long _curTime = 0;
/**
* <tt>ReplyHandler</tt> for replies received for this query.
*/
private final ReplyHandler REPLY_HANDLER;
/**
* Constant for the <tt>QueryRequest</tt> used to build new queries.
*/
private final QueryRequest QUERY;
/**
* Boolean for whether or not the query has been forwarded to leaves of this
* ultrapeer.
*/
private volatile boolean _forwardedToLeaves = false;
/**
* Boolean for whether or not we've sent the probe query.
*/
private boolean _probeQuerySent;
/**
* used to preference which connections to use when searching if the search
* comes from a leaf with a certain locale preference then those connections
* (of this ultrapeer) which match the locale will be used before the other
* connections.
*/
private final String _prefLocale;
private final QueryRequestFactory queryRequestFactory;
private final ConnectionManager connectionManager;
private final MessageRouter messageRouter;
/**
* Private constructor to ensure that only this class creates new
* <tt>QueryFactory</tt> instances.
*
* @param request the <tt>QueryRequest</tt> to construct a handler for
* @param results the number of results to get -- this varies based on the
* type of servant sending the request and is respeceted unless it's
* a query for a specific hash, in which case we try to get far fewer
* matches, ignoring this parameter
* @param handler the <tt>ReplyHandler</tt> for routing replies
* @param counter the <tt>ResultCounter</tt> that keeps track of how many
* results have been returned for this query
*/
QueryHandlerImpl(QueryRequest query, int results, ReplyHandler handler, ResultCounter counter,
QueryRequestFactory queryRequestFactory, ConnectionManager connectionManager,
MessageRouter messageRouter) {
this.connectionManager = connectionManager;
this.messageRouter = messageRouter;
if (query == null)
throw new IllegalArgumentException("null query");
if (handler == null)
throw new IllegalArgumentException("null reply handler");
if (counter == null)
throw new IllegalArgumentException("null result counter");
this.queryRequestFactory = queryRequestFactory;
boolean isHashQuery = !query.getQueryUrns().isEmpty();
QUERY = query;
if (isHashQuery) {
RESULTS = HASH_QUERY_RESULTS;
} else {
RESULTS = results;
}
REPLY_HANDLER = handler;
RESULT_COUNTER = counter;
_prefLocale = handler.getLocalePref();
}
/** Returns the connections that have already been queried. */
List<RoutedConnection> getQueriedConnections() {
return QUERIED_CONNECTIONS;
}
/**
* Factory method for creating new <tt>QueryRequest</tt> instances with
* the same guid, query, xml query, urn types, etc.
*
* @param ttl the time to live of the new query
* @return a new <tt>QueryRequest</tt> instance with all of the
* pre-defined parameters and the specified TTL
* @throw <tt>IllegalArgumentException</tt> if the ttl is not within what
* is considered reasonable bounds
* @throw NullPointerException if the <tt>query</tt> argument is
* <tt>null</tt>
*/
private QueryRequest createQuery(QueryRequest query, byte ttl) {
if (ttl < 1 || ttl > MAX_QUERY_TTL)
throw new IllegalArgumentException("ttl too high: " + ttl);
if (query == null) {
throw new NullPointerException("null query");
}
return queryRequestFactory.createQuery(query, ttl);
}
public QueryRequest getTemplateQueryRequest() {
return QUERY;
}
/*
* (non-Javadoc)
*
* @see com.limegroup.gnutella.search.QueryHandler#createQuery(byte)
*/
public QueryRequest createQuery(byte ttl) {
return createQuery(QUERY, ttl);
}
/*
* (non-Javadoc)
*
* @see com.limegroup.gnutella.search.QueryHandler#sendQuery()
*/
public void sendQuery() {
if (hasEnoughResults())
return;
_curTime = System.currentTimeMillis();
if (_curTime < _nextQueryTime)
return;
if (LOG.isTraceEnabled())
LOG.trace("Query = " + QUERY.getQuery() + ", numHostsQueried: "
+ _theoreticalHostsQueried);
if (_queryStartTime == 0) {
_queryStartTime = _curTime;
}
// handle 3 query cases
// 1) If we haven't sent the query to our leaves, send it
if (!_forwardedToLeaves) {
_forwardedToLeaves = true;
QueryRouteTable qrt = messageRouter.getQueryRouteTable();
QueryRequest query = createQuery(QUERY, (byte) 1);
_theoreticalHostsQueried += 25;
// send the query to our leaves if there's a hit and wait,
// otherwise we'll move on to the probe
if (qrt != null && qrt.contains(query)) {
messageRouter.forwardQueryRequestToLeaves(query, REPLY_HANDLER);
_nextQueryTime = System.currentTimeMillis() + _timeToWaitPerHop;
return;
}
}
// 2) If we haven't sent the probe query, send it
if (!_probeQuerySent) {
ProbeQuery pq = new ProbeQuery(connectionManager.getInitializedConnections(), this);
long timeToWait = pq.getTimeToWait();
_theoreticalHostsQueried += pq.sendProbe();
_nextQueryTime = System.currentTimeMillis() + timeToWait;
_probeQuerySent = true;
return;
}
// 3) If we haven't yet satisfied the query, keep trying
else {
// Otherwise, just send a normal query -- make a copy of the
// connections because we'll be modifying it.
int newHosts = sendQuery(new ArrayList<RoutedConnection>(connectionManager
.getInitializedConnections()));
if (newHosts == 0) {
// if we didn't query any new hosts, wait awhile for new
// connections to potentially appear
_nextQueryTime = System.currentTimeMillis() + 6000;
}
_theoreticalHostsQueried += newHosts;
// if we've already queried quite a few hosts, not gotten
// many results, and have been querying for awhile, start
// decreasing the per-hop wait time
if (_timeToWaitPerHop > 100 && (System.currentTimeMillis() - _queryStartTime) > 6000) {
_timeToWaitPerHop -= _timeToDecreasePerHop;
int resultFactor = Math.max(1, (RESULTS / 2)
- (30 * RESULT_COUNTER.getNumResults()));
int decrementFactor = Math.max(1, (_numDecrements / 6));
// the current decrease is weighted based on the number
// of results returned and on the number of connections
// we've tried -- the fewer results and the more
// connections, the more the decrease
int currentDecrease = resultFactor * decrementFactor;
currentDecrease = Math.max(5, currentDecrease);
_timeToDecreasePerHop += currentDecrease;
_numDecrements++;
if (_timeToWaitPerHop < 100) {
_timeToWaitPerHop = 100;
}
}
}
}
/**
* Sends a query to one of the specified <tt>List</tt> of connections.
* This is the heart of the dynamic query. We dynamically calculate the
* appropriate TTL to use based on our current estimate of how widely the
* file is distributed, how many connections we have, etc. This is static to
* decouple the algorithm from the specific <tt>QueryHandler</tt>
* instance, making testing significantly easier.
*
* @param handler the <tt>QueryHandler</tt> instance containing data for
* this query
* @param list the <tt>List</tt> of Gnutella connections to send queries
* over
* @return the number of new hosts theoretically reached by this query
* iteration
*
* Default access for testing
*/
int sendQuery(List<? extends RoutedConnection> ultrapeersAll) {
// we want to try to use all connections in ultrapeersLocale first.
List<? extends RoutedConnection> ultrapeers = // method returns a copy
connectionManager.getInitializedConnectionsMatchLocale(_prefLocale);
QUERIED_CONNECTIONS.retainAll(ultrapeersAll);
QUERIED_PROBE_CONNECTIONS.retainAll(ultrapeersAll);
// if we did get a list of connections that matches the locale
// of the query
if (!ultrapeers.isEmpty()) {
ultrapeers.removeAll(QUERIED_CONNECTIONS);
ultrapeers.removeAll(QUERIED_PROBE_CONNECTIONS);
// at this point ultrapeers could become empty
}
if (ultrapeers.isEmpty()) {
ultrapeers = ultrapeersAll;
// now, remove any connections we've used from our current list
// of connections to try
ultrapeers.removeAll(QUERIED_CONNECTIONS);
ultrapeers.removeAll(QUERIED_PROBE_CONNECTIONS);
}
int length = ultrapeers.size();
if (LOG.isTraceEnabled())
LOG.trace("potential querier size: " + length);
byte ttl = 0;
RoutedConnection mc = null;
// add randomization to who we send our queries to
Collections.shuffle(ultrapeers);
// weed out all connections that aren't yet stable
for (int i = 0; i < length; i++) {
RoutedConnection curConnection = ultrapeers.get(i);
// if the connection hasn't been up for long, don't use it,
// as the replies will never make it back to us if the
// connection is dropped, wasting bandwidth
if (!curConnection.isStable(_curTime))
continue;
mc = curConnection;
break;
}
int remainingConnections = Math.max(length + QUERIED_PROBE_CONNECTIONS.size(), 0);
// return if we don't have any connections to query at this time
if (remainingConnections == 0)
return 0;
// pretend we have fewer connections than we do in case we
// lose some
if (remainingConnections > 4)
remainingConnections -= 4;
boolean probeConnection = false;
// mc can still be null if the list of connections was empty.
if (mc == null) {
// if we have no connections to query, simply return for now
if (QUERIED_PROBE_CONNECTIONS.isEmpty()) {
return 0;
}
// we actually remove this from the list to make sure that
// QUERIED_CONNECTIONS and QUERIED_PROBE_CONNECTIONS do
// not have any of the same entries, as this connection
// will be added to QUERIED_CONNECTIONS
mc = QUERIED_PROBE_CONNECTIONS.remove(0);
probeConnection = true;
}
int reported = _numResultsReportedByLeaf;
if(reported <= 0)
reported = RESULT_COUNTER.getNumResults();
double resultsPerHost = (double) reported / _theoreticalHostsQueried;
int resultsNeeded = RESULTS - reported;
int hostsToQuery = 40000;
if (resultsPerHost != 0) {
hostsToQuery = (int) (resultsNeeded / resultsPerHost);
}
int hostsToQueryPerConnection = hostsToQuery / remainingConnections;
ttl = calculateNewTTL(hostsToQueryPerConnection, mc.getConnectionCapabilities()
.getNumIntraUltrapeerConnections(), mc.getConnectionCapabilities().getHeadersRead()
.getMaxTTL());
// If we're sending the query down a probe connection and we've
// already used that connection, or that connection doesn't have
// a hit for the query, send it at TTL=2. In these cases,
// sending the query at TTL=1 is pointless because we've either
// already sent this query, or the Ultrapeer doesn't have a
// match anyway
if (ttl == 1
&& ((mc.isUltrapeerQueryRoutingConnection() && !mc.shouldForwardQuery(QUERY)) || probeConnection)) {
ttl = 2;
}
QueryRequest query = createQuery(QUERY, ttl);
// send out the query on the network, returning the number of new
// hosts theoretically reached
return sendQueryToHost(query, mc);
}
/*
* (non-Javadoc)
*
* @see com.limegroup.gnutella.search.QueryHandler#sendQueryToHost(com.limegroup.gnutella.messages.QueryRequest,
* com.limegroup.gnutella.connection.RoutedConnection)
*/
public int sendQueryToHost(QueryRequest query, RoutedConnection mc) {
// send the query directly along the connection, but if the query didn't
// go through send back 0....
if (!messageRouter.sendInitialQuery(query, mc))
return 0;
byte ttl = query.getTTL();
// add the reply handler to the list of queried hosts if it's not
// a TTL=1 query or the connection does not support probe queries
// adds the connection to the list of probe connections if it's
// a TTL=1 query to a connection that supports probe extensions,
// otherwise add it to the list of connections we've queried
if (ttl == 1 && mc.getConnectionCapabilities().supportsProbeQueries()) {
this.QUERIED_PROBE_CONNECTIONS.add(mc);
} else {
this.QUERIED_CONNECTIONS.add(mc);
if (LOG.isTraceEnabled())
LOG.trace("QUERIED_CONNECTIONS.size() = " + this.QUERIED_CONNECTIONS.size());
}
if (LOG.isTraceEnabled())
LOG.trace("Querying host " + mc.getAddress() + " with ttl " + query.getTTL());
this._nextQueryTime = System.currentTimeMillis() + (ttl * this._timeToWaitPerHop);
return calculateNewHosts(mc, ttl);
}
/**
* Calculates the new TTL to use based on the number of hosts per connection
* we still need to query.
*
* @param hostsToQueryPerConnection the number of hosts we should reach on
* each remaining connections, to the best of our knowledge
* @param degree the out-degree of the next connection
* @param maxTTL the maximum TTL the connection will allow
* @return the TTL to use for the next connection
*/
static byte calculateNewTTL(int hostsToQueryPerConnection, int degree, byte maxTTL) {
if (maxTTL > MAX_QUERY_TTL)
maxTTL = MAX_QUERY_TTL;
// not the most efficient algorithm -- should use Math.log, but
// that's ok
for (byte i = 1; i < MAX_QUERY_TTL; i++) {
// biased towards lower TTLs since the horizon expands so
// quickly
int hosts = (int) (16.0 * calculateNewHosts(degree, i));
if (hosts >= hostsToQueryPerConnection) {
if (i > maxTTL)
return maxTTL;
return i;
}
}
return maxTTL;
}
/**
* Calculate the number of new hosts that would be added to the theoretical
* horizon if a query with the given ttl were sent down the given
* connection.
*
* @param conn the <tt>Connection</tt> that will received the query
* @param ttl the TTL of the query to add
*/
static int calculateNewHosts(RoutedConnection conn, byte ttl) {
return calculateNewHosts(
conn.getConnectionCapabilities().getNumIntraUltrapeerConnections(), ttl);
}
/**
* Calculate the number of new hosts that would be added to the theoretical
* horizon if a query with the given ttl were sent to a node with the given
* degree. This is not precise because we're assuming that the nodes
* connected to the node in question also have the same degree, but there's
* not much we can do about it!
*
* @param degree the degree of the node that will received the query
* @param ttl the TTL of the query to add
*/
static int calculateNewHosts(int degree, byte ttl) {
double newHosts = 0;
for (; ttl > 0; ttl--) {
newHosts += Math.pow((degree - 1), ttl - 1);
}
return (int) newHosts;
}
/*
* (non-Javadoc)
*
* @see com.limegroup.gnutella.search.QueryHandler#hasEnoughResults()
*/
public boolean hasEnoughResults() {
// return false if the query hasn't started yet
if (_queryStartTime == 0)
return false;
// ----------------
// NOTE: as agreed, _numResultsReportedByLeaf is the number of results
// the leaf has received/consumed by a filter DIVIDED by 4 (4 being the
// number of UPs connection it maintains). That is why we don't divide
// it here or anything. We aren't sure if this mixes well with
// BearShare's use but oh well....
// ----------------
// if leaf guidance is in effect, we have different criteria.
if (_numResultsReportedByLeaf > 0) {
// we shouldn't route too much regardless of what the leaf says
if (RESULT_COUNTER.getNumResults() >= MAXIMUM_ROUTED_FOR_LEAVES)
return true;
// if the leaf is happy, so are we....
if (_numResultsReportedByLeaf > RESULTS)
return true;
}
// leaf guidance is not in effect or we are doing our own query
else if (RESULT_COUNTER.getNumResults() >= RESULTS)
return true;
// if our theoretical horizon has gotten too high, consider
// it enough results
// precisely what this number should be is somewhat hard to determine
// because, while connection have a specfic degree, the degree of
// the connections on subsequent hops cannot be determined
if (_theoreticalHostsQueried > 110000) {
return true;
}
// return true if we've been querying for longer than the specified
// maximum
int queryLength = (int) (System.currentTimeMillis() - _queryStartTime);
if (queryLength > QueryHandler.MAX_QUERY_TIME) {
return true;
}
return false;
}
/*
* (non-Javadoc)
*
* @see com.limegroup.gnutella.search.QueryHandler#updateLeafResults(int)
*/
public void updateLeafResults(int numResults) {
if (numResults > _numResultsReportedByLeaf) {
// record up to the first 20 updates
if (times.size() < 20) {
times.add(System.currentTimeMillis() - _queryStartTime);
results.add(numResults);
}
_numResultsReportedByLeaf = numResults;
}
}
/*
* (non-Javadoc)
*
* @see com.limegroup.gnutella.search.QueryHandler#getNumResultsReportedByLeaf()
*/
public int getNumResultsReportedByLeaf() {
return _numResultsReportedByLeaf;
}
/*
* (non-Javadoc)
*
* @see com.limegroup.gnutella.search.QueryHandler#getReplyHandler()
*/
public ReplyHandler getReplyHandler() {
return REPLY_HANDLER;
}
/*
* (non-Javadoc)
*
* @see com.limegroup.gnutella.search.QueryHandler#getTimeToWaitPerHop()
*/
public long getTimeToWaitPerHop() {
return _timeToWaitPerHop;
}
// overrides Object.toString
@Override
public String toString() {
return "QueryHandler: QUERY: " + QUERY;
}
/*
* (non-Javadoc)
*
* @see com.limegroup.gnutella.search.QueryHandler#getGUID()
*/
public GUID getGUID() {
return new GUID(QUERY.getGUID());
}
}