FloodOnlySearchJob.java example

Explorer
i2p.i2p-master
package net.i2p.router.networkdb.kademlia;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;

import net.i2p.data.Hash;
import net.i2p.data.i2np.DatabaseLookupMessage;
import net.i2p.kademlia.KBucketSet;
import net.i2p.router.Job;
import net.i2p.router.MessageSelector;
import net.i2p.router.OutNetMessage;
import net.i2p.router.ReplyJob;
import net.i2p.router.RouterContext;
import net.i2p.router.TunnelInfo;
import net.i2p.util.Log;

/**
 * Uunused directly, replaced by IterativeSearchJob, but still extended by
 * SingleSearchJob.
 *
 * Try sending a search to some floodfill peers, failing completely if we don't get
 * a match from one of those peers, with no fallback to the kademlia search
 *
 * Exception (a semi-exception, since we still fail completely without fallback):
 * If we don't know any floodfill peers, we ask a couple of peers at random,
 * who will hopefully reply with some floodfill keys.
 * We still fail without fallback, but we then spin off a job to
 * ask that same random peer for the RouterInfos for those keys.
 * If that job succeeds, the next search should work better.
 *
 * In addition, we follow the floodfill keys in the DSRM
 * (DatabaseSearchReplyMessage) if we know less than 4 floodfills.
 *
 * These enhancements allow the router to bootstrap back into the network
 * after it loses (or never had) floodfill references, as long as it
 * knows one peer that is up.
 */
class FloodOnlySearchJob extends FloodSearchJob {
    private boolean _shouldProcessDSRM;
    private final HashSet<Hash> _unheardFrom;
    
    /** this is a marker to register with the MessageRegistry, it is never sent */
    private OutNetMessage _out;
    protected final MessageSelector _replySelector;
    protected final ReplyJob _onReply;
    protected final Job _onTimeout;

    private static final int MIN_FOR_NO_DSRM = 4;
    private static final long SINGLE_SEARCH_MSG_TIME = 10*1000;

    public FloodOnlySearchJob(RouterContext ctx, FloodfillNetworkDatabaseFacade facade, Hash key, Job onFind, Job onFailed, int timeoutMs, boolean isLease) {
        super(ctx, facade, key, onFind, onFailed, timeoutMs, isLease);
        // these override the settings in super
        _timeoutMs = Math.min(timeoutMs, SearchJob.PER_FLOODFILL_PEER_TIMEOUT);
        _expiration = _timeoutMs + ctx.clock().now();
        _unheardFrom = new HashSet<Hash>(CONCURRENT_SEARCHES);
        _replySelector = new FloodOnlyLookupSelector(getContext(), this);
        _onReply = new FloodOnlyLookupMatchJob(getContext(), this);
        _onTimeout = new FloodOnlyLookupTimeoutJob(getContext(), this);
    }

    public boolean shouldProcessDSRM() { return _shouldProcessDSRM; }

    @Override
    public void runJob() {
        // pick some floodfill peers and send out the searches
        // old
        //List<Hash> floodfillPeers = _facade.getFloodfillPeers();
        // new
        List<Hash> floodfillPeers;
        KBucketSet<Hash> ks = _facade.getKBuckets();
        if (ks != null) {
            Hash rkey = getContext().routingKeyGenerator().getRoutingKey(_key);
            // Ideally we would add the key to an exclude list, so we don't try to query a ff peer for itself,
            // but we're passing the rkey not the key, so we do it below instead in certain cases.
            floodfillPeers = ((FloodfillPeerSelector)_facade.getPeerSelector()).selectFloodfillParticipants(rkey, MIN_FOR_NO_DSRM, ks);
        } else {
            floodfillPeers = Collections.emptyList();
        }

        // If we dont know enough floodfills,
        // or the global network routing key just changed (which is set at statrtup,
        // so this includes the first few minutes of uptime)
        _shouldProcessDSRM = floodfillPeers.size() < MIN_FOR_NO_DSRM ||
                             getContext().routingKeyGenerator().getLastChanged() > getContext().clock().now() - 60*60*1000;

        if (floodfillPeers.isEmpty()) {
            // ask anybody, they may not return the answer but they will return a few ff peers we can go look up,
            // so this situation should be temporary
            if (_log.shouldLog(Log.WARN))
                _log.warn("Running netDb searches against the floodfill peers, but we don't know any");
            floodfillPeers = new ArrayList<Hash>(_facade.getAllRouters());
            if (floodfillPeers.isEmpty()) {
                if (_log.shouldLog(Log.ERROR))
                    _log.error("We don't know any peers at all");
                failed();
                return;
            }
            Collections.shuffle(floodfillPeers, getContext().random());
        }

        // This OutNetMessage is never used or sent (setMessage() is never called), it's only
        // so we can register a reply selector.
        _out = getContext().messageRegistry().registerPending(_replySelector, _onReply, _onTimeout);

/********
        // We need to randomize our ff selection, else we stay with the same ones since
        // getFloodfillPeers() is sorted by closest distance. Always using the same
        // ones didn't help reliability.
        // Also, query the unheard-from, unprofiled, failing, unreachable and banlisted ones last.
        // We should hear from floodfills pretty frequently so set a 30m time limit.
        // If unprofiled we haven't talked to them in a long time.
        // We aren't contacting the peer directly, so banlist doesn't strictly matter,
        // but it's a bad sign, and we often banlist a peer before we fail it...
        if (floodfillPeers.size() > CONCURRENT_SEARCHES) {
            Collections.shuffle(floodfillPeers, getContext().random());
            List ffp = new ArrayList(floodfillPeers.size());
            int failcount = 0;
            long before = getContext().clock().now() - 30*60*1000;
            for (int i = 0; i < floodfillPeers.size(); i++) {
                 Hash peer = (Hash)floodfillPeers.get(i);
                 PeerProfile profile = getContext().profileOrganizer().getProfile(peer);
                 if (profile == null || profile.getLastHeardFrom() < before ||
                     profile.getIsFailing() || getContext().banlist().isBanlisted(peer) ||
                     getContext().commSystem().wasUnreachable(peer)) {
                     failcount++;
                     ffp.add(peer);
                 } else
                     ffp.add(0, peer);
            }
            // This will help us recover if the router just started and all the floodfills
            // have changed since the last time we were running
            if (floodfillPeers.size() - failcount <= 2)
                _shouldProcessDSRM = true;
            if (_log.shouldLog(Log.INFO) && failcount > 0)
                _log.info(getJobId() + ": " + failcount + " of " + floodfillPeers.size() + " floodfills are not heard from, unprofiled, failing, unreachable or banlisted");
            floodfillPeers = ffp;
        } else {
            _shouldProcessDSRM = true;
        }
********/

        int count = 0; // keep a separate count since _lookupsRemaining could be decremented elsewhere
        for (int i = 0; _lookupsRemaining.get() < CONCURRENT_SEARCHES && i < floodfillPeers.size(); i++) {
            Hash peer = floodfillPeers.get(i);
            if (peer.equals(getContext().routerHash()))
                continue;
            
            DatabaseLookupMessage dlm = new DatabaseLookupMessage(getContext(), true);
            TunnelInfo replyTunnel = getContext().tunnelManager().selectInboundTunnel();
            TunnelInfo outTunnel = getContext().tunnelManager().selectOutboundTunnel();
            if ( (replyTunnel == null) || (outTunnel == null) ) {
                failed();
                return;
            }

            // As explained above, it's hard to keep the key itself out of the ff list,
            // so let's just skip it for now if the outbound tunnel is zero-hop.
            // Yes, that means we aren't doing double-lookup for a floodfill
            // if it happens to be closest to itself and we are using zero-hop exploratory tunnels.
            // If we don't, the OutboundMessageDistributor ends up logging erors for
            // not being able to send to the floodfill, if we don't have an older netdb entry.
            if (outTunnel.getLength() <= 1 && peer.equals(_key) && floodfillPeers.size() > 1)
                continue;
            
            synchronized(_unheardFrom) {
                _unheardFrom.add(peer);
            }
            dlm.setFrom(replyTunnel.getPeer(0));
            dlm.setMessageExpiration(getContext().clock().now() + SINGLE_SEARCH_MSG_TIME);
            dlm.setReplyTunnel(replyTunnel.getReceiveTunnelId(0));
            dlm.setSearchKey(_key);
            
            if (_log.shouldLog(Log.INFO))
                _log.info(getJobId() + ": Floodfill search for " + _key + " to " + peer);
            getContext().tunnelDispatcher().dispatchOutbound(dlm, outTunnel.getSendTunnelId(0), peer);
            count++;
            _lookupsRemaining.incrementAndGet();
        }
        
        if (count <= 0) {
            if (_log.shouldLog(Log.INFO))
                _log.info(getJobId() + ": Floodfill search for " + _key + " had no peers to send to");
            // no floodfill peers, fail
            failed();
        }
    }

    @Override
    public String getName() { return "NetDb flood search"; }
    
    /**
     *  Note that we heard from the peer
     *
     *  @return number remaining after decrementing
     */
    int decrementRemaining(Hash peer) {
        synchronized(_unheardFrom) {
            _unheardFrom.remove(peer);
            return decrementRemaining();
        }
    }
    
    @Override
    void failed() {
        synchronized (this) {
            if (_dead) return;
            _dead = true;
        }
        getContext().messageRegistry().unregisterPending(_out);
        long time = System.currentTimeMillis() - _created;
        if (_log.shouldLog(Log.INFO)) {
             int timeRemaining = (int)(_expiration - getContext().clock().now());
            _log.info(getJobId() + ": Floodfill search for " + _key + " failed with " + timeRemaining + " remaining after " + time);
        }
        synchronized(_unheardFrom) {
            for (Hash h : _unheardFrom)
                getContext().profileManager().dbLookupFailed(h);
        }
        _facade.complete(_key);
        getContext().statManager().addRateData("netDb.failedTime", time);
        for (Job j : _onFailed) {
            getContext().jobQueue().addJob(j);
        }
        _onFailed.clear();
    }

    @Override
    void success() {
        synchronized (this) {
            if (_dead) return;
            _dead = true;
            super.success();
        }
        if (_log.shouldLog(Log.INFO))
            _log.info(getJobId() + ": Floodfill search for " + _key + " successful");
        // Sadly, we don't know which of the two replied, unless the first one sent a DSRM
        // before the second one sent the answer, which isn't that likely.
        // Would be really nice to fix this, but it isn't clear how unless CONCURRENT_SEARCHES == 1.
        // Maybe don't unregister the msg from the Registry for a while and see if we get a 2nd reply?
        // Or delay the 2nd search for a few seconds?
        // We'll have to rely primarily on other searches (ExploreJob which calls SearchJob,
        // and FloodfillVerifyStoreJob) to record successful searches for now.
        // StoreJob also calls dbStoreSent() which updates the lastHeardFrom timer - this also helps.
        long time = System.currentTimeMillis() - _created;
        synchronized(_unheardFrom) {
            if (_unheardFrom.size() == 1) {
                Hash peer = _unheardFrom.iterator().next();
                getContext().profileManager().dbLookupSuccessful(peer, time);
            }
        }
        _facade.complete(_key);
        getContext().statManager().addRateData("netDb.successTime", time);
        for (Job j : _onFind) {
            getContext().jobQueue().addJob(j);
        }
    }
}