package net.i2p.router.networkdb.kademlia; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import net.i2p.data.DatabaseEntry; import net.i2p.data.Destination; import net.i2p.data.Hash; import net.i2p.data.TunnelId; import net.i2p.data.i2np.DatabaseLookupMessage; import net.i2p.data.i2np.DatabaseStoreMessage; import net.i2p.data.router.RouterInfo; import net.i2p.data.router.RouterKeyGenerator; import net.i2p.router.Job; import net.i2p.router.JobImpl; import net.i2p.router.OutNetMessage; import net.i2p.router.Router; import net.i2p.router.RouterContext; import net.i2p.util.ConcurrentHashSet; import net.i2p.util.Log; import net.i2p.util.SystemVersion; /** * The network database */ public class FloodfillNetworkDatabaseFacade extends KademliaNetworkDatabaseFacade { public static final char CAPABILITY_FLOODFILL = 'f'; private final Map<Hash, FloodSearchJob> _activeFloodQueries; private boolean _floodfillEnabled; private final Set<Hash> _verifiesInProgress; private FloodThrottler _floodThrottler; private LookupThrottler _lookupThrottler; /** * This is the flood redundancy. Entries are * sent to this many other floodfills. * Was 7 through release 0.9; 5 for 0.9.1. * 4 as of 0.9.2; 3 as of 0.9.9 */ public static final int MAX_TO_FLOOD = 3; private static final int FLOOD_PRIORITY = OutNetMessage.PRIORITY_NETDB_FLOOD; private static final int FLOOD_TIMEOUT = 30*1000; private static final long NEXT_RKEY_RI_ADVANCE_TIME = 45*60*1000; private static final long NEXT_RKEY_LS_ADVANCE_TIME = 10*60*1000; private static final int NEXT_FLOOD_QTY = 2; public FloodfillNetworkDatabaseFacade(RouterContext context) { super(context); _activeFloodQueries = new HashMap<Hash, FloodSearchJob>(); _verifiesInProgress = new ConcurrentHashSet<Hash>(8); _context.statManager().createRequiredRateStat("netDb.successTime", "Time for successful lookup (ms)", "NetworkDatabase", new long[] { 60*60*1000l, 24*60*60*1000l }); _context.statManager().createRateStat("netDb.failedTime", "How long a failed search takes", "NetworkDatabase", new long[] { 60*60*1000l, 24*60*60*1000l }); _context.statManager().createRateStat("netDb.failedRetries", "How many additional queries for an iterative search", "NetworkDatabase", new long[] { 60*60*1000l }); _context.statManager().createRateStat("netDb.successRetries", "How many additional queries for an iterative search", "NetworkDatabase", new long[] { 60*60*1000l }); _context.statManager().createRateStat("netDb.failedAttemptedPeers", "How many peers we sent a search to when the search fails", "NetworkDatabase", new long[] { 10*60*1000l }); _context.statManager().createRateStat("netDb.successPeers", "How many peers are contacted in a successful search", "NetworkDatabase", new long[] { 60*60*1000l, 24*60*60*1000l }); _context.statManager().createRateStat("netDb.failedPeers", "How many peers fail to respond to a lookup?", "NetworkDatabase", new long[] { 60*60*1000l, 24*60*60*1000l }); _context.statManager().createRateStat("netDb.searchCount", "Overall number of searches sent", "NetworkDatabase", new long[] { 5*60*1000l, 10*60*1000l, 60*60*1000l, 3*60*60*1000l, 24*60*60*1000l }); _context.statManager().createRateStat("netDb.searchMessageCount", "Overall number of mesages for all searches sent", "NetworkDatabase", new long[] { 5*60*1000l, 10*60*1000l, 60*60*1000l, 3*60*60*1000l, 24*60*60*1000l }); _context.statManager().createRateStat("netDb.searchReplyValidated", "How many search replies we get that we are able to validate (fetch)", "NetworkDatabase", new long[] { 5*60*1000l, 10*60*1000l, 60*60*1000l, 3*60*60*1000l, 24*60*60*1000l }); _context.statManager().createRateStat("netDb.searchReplyNotValidated", "How many search replies we get that we are NOT able to validate (fetch)", "NetworkDatabase", new long[] { 5*60*1000l, 10*60*1000l, 60*60*1000l, 3*60*60*1000l, 24*60*60*1000l }); _context.statManager().createRateStat("netDb.searchReplyValidationSkipped", "How many search replies we get from unreliable peers that we skip?", "NetworkDatabase", new long[] { 5*60*1000l, 10*60*1000l, 60*60*1000l, 3*60*60*1000l, 24*60*60*1000l }); _context.statManager().createRateStat("netDb.republishQuantity", "How many peers do we need to send a found leaseSet to?", "NetworkDatabase", new long[] { 10*60*1000l, 60*60*1000l, 3*60*60*1000l, 24*60*60*1000l }); // for ISJ _context.statManager().createRateStat("netDb.RILookupDirect", "Was an iterative RI lookup sent directly?", "NetworkDatabase", new long[] { 60*60*1000 }); } @Override public synchronized void startup() { super.startup(); _context.jobQueue().addJob(new FloodfillMonitorJob(_context, this)); _lookupThrottler = new LookupThrottler(); // refresh old routers Job rrj = new RefreshRoutersJob(_context, this); rrj.getTiming().setStartAfter(_context.clock().now() + 5*60*1000); _context.jobQueue().addJob(rrj); } @Override protected void createHandlers() { _context.inNetMessagePool().registerHandlerJobBuilder(DatabaseLookupMessage.MESSAGE_TYPE, new FloodfillDatabaseLookupMessageHandler(_context, this)); _context.inNetMessagePool().registerHandlerJobBuilder(DatabaseStoreMessage.MESSAGE_TYPE, new FloodfillDatabaseStoreMessageHandler(_context, this)); } /** * If we are floodfill, turn it off and tell everybody. * @since 0.8.9 */ @Override public synchronized void shutdown() { // only if not forced ff or not restarting if (_floodfillEnabled && (!_context.getBooleanProperty(FloodfillMonitorJob.PROP_FLOODFILL_PARTICIPANT) || !(_context.router().scheduledGracefulExitCode() == Router.EXIT_HARD_RESTART || _context.router().scheduledGracefulExitCode() == Router.EXIT_GRACEFUL_RESTART))) { // turn off to build a new RI... _floodfillEnabled = false; // true -> publish inline // but job queue is already shut down, so sendStore() called by rebuildRouterInfo() won't work... _context.router().rebuildRouterInfo(true); // ...so force a flood here RouterInfo local = _context.router().getRouterInfo(); if (local != null && _context.router().getUptime() > PUBLISH_JOB_DELAY) { flood(local); // let the messages get out... try { Thread.sleep(3000); } catch (InterruptedException ie) {} } } super.shutdown(); } /** * This maybe could be shorter than RepublishLeaseSetJob.REPUBLISH_LEASESET_TIMEOUT, * because we are sending direct, but unresponsive floodfills may take a while due to timeouts. */ static final long PUBLISH_TIMEOUT = 90*1000; /** * Send our RI to the closest floodfill. * @throws IllegalArgumentException if the local router info is invalid */ @Override public void publish(RouterInfo localRouterInfo) throws IllegalArgumentException { if (localRouterInfo == null) throw new IllegalArgumentException("impossible: null localRouterInfo?"); // should this be after super? why not publish locally? if (_context.router().isHidden()) return; // DE-nied! super.publish(localRouterInfo); // wait until we've read in the RI's so we can find the closest floodfill if (!isInitialized()) return; // no use sending if we have no addresses // (unless maybe we used to have addresses? not worth it if (localRouterInfo.getAddresses().isEmpty()) return; _log.info("Publishing our RI"); // Don't delay, helps IB tunnel builds //if (_context.router().getUptime() > PUBLISH_JOB_DELAY) sendStore(localRouterInfo.getIdentity().calculateHash(), localRouterInfo, null, null, PUBLISH_TIMEOUT, null); } @Override public void sendStore(Hash key, DatabaseEntry ds, Job onSuccess, Job onFailure, long sendTimeout, Set<Hash> toIgnore) { // if we are a part of the floodfill netDb, don't send out our own leaseSets as part // of the flooding - instead, send them to a random floodfill peer so *they* can flood 'em out. // perhaps statistically adjust this so we are the source every 1/N times... or something. if (floodfillEnabled() && (ds.getType() == DatabaseEntry.KEY_TYPE_ROUTERINFO)) { flood(ds); if (onSuccess != null) _context.jobQueue().addJob(onSuccess); } else { _context.jobQueue().addJob(new FloodfillStoreJob(_context, this, key, ds, onSuccess, onFailure, sendTimeout, toIgnore)); } } /** * Increments and tests. * @since 0.7.11 */ boolean shouldThrottleFlood(Hash key) { return _floodThrottler != null && _floodThrottler.shouldThrottle(key); } /** * Increments and tests. * @since 0.7.11 */ boolean shouldThrottleLookup(Hash from, TunnelId id) { // null before startup return _lookupThrottler == null || _lookupThrottler.shouldThrottle(from, id); } /** * Send to a subset of all floodfill peers. * We do this to implement Kademlia within the floodfills, i.e. * we flood to those closest to the key. */ public void flood(DatabaseEntry ds) { Hash key = ds.getHash(); RouterKeyGenerator gen = _context.routerKeyGenerator(); Hash rkey = gen.getRoutingKey(key); FloodfillPeerSelector sel = (FloodfillPeerSelector)getPeerSelector(); List<Hash> peers = sel.selectFloodfillParticipants(rkey, MAX_TO_FLOOD, getKBuckets()); // todo key cert skip? long until = gen.getTimeTillMidnight(); if (until < NEXT_RKEY_LS_ADVANCE_TIME || (ds.getType() == DatabaseEntry.KEY_TYPE_ROUTERINFO && until < NEXT_RKEY_RI_ADVANCE_TIME)) { // to avoid lookup faulures after midnight, also flood to some closest to the // next routing key for a period of time before midnight. Hash nkey = gen.getNextRoutingKey(key); List<Hash> nextPeers = sel.selectFloodfillParticipants(nkey, NEXT_FLOOD_QTY, getKBuckets()); int i = 0; for (Hash h : nextPeers) { // Don't flood an RI back to itself // Not necessary, a ff will do its own flooding (reply token == 0) // But other implementations may not... if (h.equals(key)) continue; // todo key cert skip? if (!peers.contains(h)) { peers.add(h); i++; } } if (i > 0 && _log.shouldLog(Log.INFO)) _log.info("Flooding the entry for " + key + " to " + i + " more, just before midnight"); } int flooded = 0; for (int i = 0; i < peers.size(); i++) { Hash peer = peers.get(i); RouterInfo target = lookupRouterInfoLocally(peer); if ( (target == null) || (_context.banlist().isBanlisted(peer)) ) continue; // Don't flood an RI back to itself // Not necessary, a ff will do its own flooding (reply token == 0) // But other implementations may not... if (ds.getType() == DatabaseEntry.KEY_TYPE_ROUTERINFO && peer.equals(key)) continue; if (peer.equals(_context.routerHash())) continue; DatabaseStoreMessage msg = new DatabaseStoreMessage(_context); msg.setEntry(ds); OutNetMessage m = new OutNetMessage(_context, msg, _context.clock().now()+FLOOD_TIMEOUT, FLOOD_PRIORITY, target); Job floodFail = new FloodFailedJob(_context, peer); m.setOnFailedSendJob(floodFail); // we want to give credit on success, even if we aren't sure, // because otherwise no use noting failure Job floodGood = new FloodSuccessJob(_context, peer); m.setOnSendJob(floodGood); _context.commSystem().processMessage(m); flooded++; if (_log.shouldLog(Log.INFO)) _log.info("Flooding the entry for " + key.toBase64() + " to " + peer.toBase64()); } if (_log.shouldLog(Log.INFO)) _log.info("Flooded the data to " + flooded + " of " + peers.size() + " peers"); } /** note in the profile that the store failed */ private static class FloodFailedJob extends JobImpl { private final Hash _peer; public FloodFailedJob(RouterContext ctx, Hash peer) { super(ctx); _peer = peer; } public String getName() { return "Flood failed"; } public void runJob() { getContext().profileManager().dbStoreFailed(_peer); } } /** * Note in the profile that the store succeeded * @since 0.9.19 */ private static class FloodSuccessJob extends JobImpl { private final Hash _peer; public FloodSuccessJob(RouterContext ctx, Hash peer) { super(ctx); _peer = peer; } public String getName() { return "Flood succeeded"; } public void runJob() { getContext().profileManager().dbStoreSuccessful(_peer); } } @Override protected PeerSelector createPeerSelector() { return new FloodfillPeerSelector(_context); } public synchronized void setFloodfillEnabled(boolean yes) { _floodfillEnabled = yes; if (yes && _floodThrottler == null) { _floodThrottler = new FloodThrottler(); _context.statManager().createRateStat("netDb.floodThrottled", "How often do we decline to flood?", "NetworkDatabase", new long[] { 60*60*1000l }); // following are for HFDSMJ _context.statManager().createRateStat("netDb.storeFloodNew", "How long it takes to flood out a newly received entry?", "NetworkDatabase", new long[] { 60*60*1000l }); _context.statManager().createRateStat("netDb.storeFloodOld", "How often we receive an old entry?", "NetworkDatabase", new long[] { 60*60*1000l }); } } @Override public boolean floodfillEnabled() { return _floodfillEnabled; } /** * @param peer may be null, returns false if null */ public static boolean isFloodfill(RouterInfo peer) { if (peer == null) return false; String caps = peer.getCapabilities(); return caps.indexOf(FloodfillNetworkDatabaseFacade.CAPABILITY_FLOODFILL) >= 0; } public List<RouterInfo> getKnownRouterData() { List<RouterInfo> rv = new ArrayList<RouterInfo>(); DataStore ds = getDataStore(); if (ds != null) { for (DatabaseEntry o : ds.getEntries()) { if (o.getType() == DatabaseEntry.KEY_TYPE_ROUTERINFO) rv.add((RouterInfo)o); } } return rv; } /** * Lookup using exploratory tunnels. * * Caller should check negative cache and/or banlist before calling. * * Begin a kademlia style search for the key specified, which can take up to timeoutMs and * will fire the appropriate jobs on success or timeout (or if the kademlia search completes * without any match) * * @return null always */ @Override SearchJob search(Hash key, Job onFindJob, Job onFailedLookupJob, long timeoutMs, boolean isLease) { return search(key, onFindJob, onFailedLookupJob, timeoutMs, isLease, null); } /** * Lookup using the client's tunnels. * * Caller should check negative cache and/or banlist before calling. * * @param fromLocalDest use these tunnels for the lookup, or null for exploratory * @return null always * @since 0.9.10 */ SearchJob search(Hash key, Job onFindJob, Job onFailedLookupJob, long timeoutMs, boolean isLease, Hash fromLocalDest) { //if (true) return super.search(key, onFindJob, onFailedLookupJob, timeoutMs, isLease); if (key == null) throw new IllegalArgumentException("searchin for nothin, eh?"); boolean isNew = false; FloodSearchJob searchJob; synchronized (_activeFloodQueries) { searchJob = _activeFloodQueries.get(key); if (searchJob == null) { //if (SearchJob.onlyQueryFloodfillPeers(_context)) { //searchJob = new FloodOnlySearchJob(_context, this, key, onFindJob, onFailedLookupJob, (int)timeoutMs, isLease); searchJob = new IterativeSearchJob(_context, this, key, onFindJob, onFailedLookupJob, (int)timeoutMs, isLease, fromLocalDest); //} else { // searchJob = new FloodSearchJob(_context, this, key, onFindJob, onFailedLookupJob, (int)timeoutMs, isLease); //} _activeFloodQueries.put(key, searchJob); isNew = true; } } if (isNew) { if (_log.shouldLog(Log.DEBUG)) _log.debug("this is the first search for that key, fire off the FloodSearchJob"); _context.jobQueue().addJob(searchJob); } else { if (_log.shouldLog(Log.INFO)) _log.info("Deferring flood search for " + key.toBase64() + " with " + _activeFloodQueries.size() + " in progress"); searchJob.addDeferred(onFindJob, onFailedLookupJob, timeoutMs, isLease); // not necessarily LS _context.statManager().addRateData("netDb.lookupDeferred", 1, searchJob.getExpiration()-_context.clock().now()); } return null; } /** * Ok, the initial set of searches to the floodfill peers timed out, lets fall back on the * wider kademlia-style searches * * Unused - called only by FloodSearchJob which is overridden - don't use this. */ /***** void searchFull(Hash key, List<Job> onFind, List<Job> onFailed, long timeoutMs, boolean isLease) { synchronized (_activeFloodQueries) { _activeFloodQueries.remove(key); } Job find = null; Job fail = null; if (onFind != null) { synchronized (onFind) { if (!onFind.isEmpty()) find = onFind.remove(0); } } if (onFailed != null) { synchronized (onFailed) { if (!onFailed.isEmpty()) fail = onFailed.remove(0); } } SearchJob job = super.search(key, find, fail, timeoutMs, isLease); if (job != null) { if (_log.shouldLog(Log.INFO)) _log.info("Floodfill search timed out for " + key.toBase64() + ", falling back on normal search (#" + job.getJobId() + ") with " + timeoutMs + " remaining"); long expiration = timeoutMs + _context.clock().now(); List<Job> removed = null; if (onFind != null) { synchronized (onFind) { removed = new ArrayList(onFind); onFind.clear(); } for (int i = 0; i < removed.size(); i++) job.addDeferred(removed.get(i), null, expiration, isLease); removed = null; } if (onFailed != null) { synchronized (onFailed) { removed = new ArrayList(onFailed); onFailed.clear(); } for (int i = 0; i < removed.size(); i++) job.addDeferred(null, removed.get(i), expiration, isLease); removed = null; } } } *****/ /** * Must be called by the search job queued by search() on success or failure */ void complete(Hash key) { synchronized (_activeFloodQueries) { _activeFloodQueries.remove(key); } } /** list of the Hashes of currently known floodfill peers; * Returned list will not include our own hash. * List is not sorted and not shuffled. */ public List<Hash> getFloodfillPeers() { FloodfillPeerSelector sel = (FloodfillPeerSelector)getPeerSelector(); return sel.selectFloodfillParticipants(getKBuckets()); } /** @since 0.7.10 */ boolean isVerifyInProgress(Hash h) { return _verifiesInProgress.contains(h); } /** @since 0.7.10 */ void verifyStarted(Hash h) { _verifiesInProgress.add(h); } /** @since 0.7.10 */ void verifyFinished(Hash h) { _verifiesInProgress.remove(h); } /** NTCP cons drop quickly but SSU takes a while, so it's prudent to keep this * a little higher than 1 or 2. */ protected final static int MIN_ACTIVE_PEERS = 5; /** @since 0.8.7 */ private static final int MAX_DB_BEFORE_SKIPPING_SEARCH; static { long maxMemory = SystemVersion.getMaxMemory(); // 250 for every 32 MB, min of 250, max of 1250 MAX_DB_BEFORE_SKIPPING_SEARCH = (int) Math.max(250l, Math.min(1250l, maxMemory / ((32 * 1024 * 1024l) / 250))); } /** * Search for a newer router info, drop it from the db if the search fails, * unless just started up or have bigger problems. */ @Override protected void lookupBeforeDropping(Hash peer, RouterInfo info) { // following are some special situations, we don't want to // drop the peer in these cases // yikes don't do this - stack overflow // getFloodfillPeers().size() == 0 || // yikes2 don't do this either - deadlock! // getKnownRouters() < MIN_REMAINING_ROUTERS || if (info.getNetworkId() == _networkID && (getKBucketSetSize() < MIN_REMAINING_ROUTERS || _context.router().getUptime() < DONT_FAIL_PERIOD || _context.commSystem().countActivePeers() <= MIN_ACTIVE_PEERS)) { if (_log.shouldLog(Log.WARN)) _log.warn("Not failing " + peer.toBase64() + " as we are just starting up or have problems"); return; } // should we skip the search? if (_floodfillEnabled || _context.jobQueue().getMaxLag() > 500 || _context.banlist().isBanlistedForever(peer) || getKBucketSetSize() > MAX_DB_BEFORE_SKIPPING_SEARCH) { // don't try to overload ourselves (e.g. failing 3000 router refs at // once, and then firing off 3000 netDb lookup tasks) // Also don't queue a search if we have plenty of routerinfos // (KBucketSetSize() includes leasesets but avoids locking) super.lookupBeforeDropping(peer, info); return; } // this sends out the search to the floodfill peers even if we already have the // entry locally, firing no job if it gets a reply with an updated value (meaning // we shouldn't drop them but instead use the new data), or if they all time out, // firing the dropLookupFailedJob, which actually removes out local reference search(peer, new DropLookupFoundJob(_context, peer, info), new DropLookupFailedJob(_context, peer, info), 10*1000, false); } private class DropLookupFailedJob extends JobImpl { private final Hash _peer; private final RouterInfo _info; public DropLookupFailedJob(RouterContext ctx, Hash peer, RouterInfo info) { super(ctx); _peer = peer; _info = info; } public String getName() { return "Lookup on failure of netDb peer timed out"; } public void runJob() { dropAfterLookupFailed(_peer); } } private class DropLookupFoundJob extends JobImpl { private final Hash _peer; private final RouterInfo _info; public DropLookupFoundJob(RouterContext ctx, Hash peer, RouterInfo info) { super(ctx); _peer = peer; _info = info; } public String getName() { return "Lookup on failure of netDb peer matched"; } public void runJob() { RouterInfo updated = lookupRouterInfoLocally(_peer); if ( (updated != null) && (updated.getPublished() > _info.getPublished()) ) { // great, a legitimate update } else { // they just sent us what we already had. kill 'em both dropAfterLookupFailed(_peer); } } } }