// Copyright 2006 Google Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.google.enterprise.connector.scheduler; import com.google.enterprise.connector.pusher.FeedConnection; import com.google.enterprise.connector.traversal.BatchResult; import com.google.enterprise.connector.traversal.BatchSize; import com.google.enterprise.connector.traversal.FileSizeLimitInfo; import com.google.enterprise.connector.traversal.TraversalDelayPolicy; import com.google.enterprise.connector.util.Clock; import java.util.logging.Level; import java.util.logging.Logger; /** * Keeps track of the load for each connector instance as well as supplies * batchHint to indicate how many docs to allow to be traversed by traverser. */ /* @NotThreadSafe */ /* @GuardedBy("ConnectorCoordinatorImpl") */ public class HostLoadManager implements LoadManager { private static final Logger LOGGER = Logger.getLogger(HostLoadManager.class.getName()); private static final long MINUTE_IN_MILLIS = 60 * 1000L; // TODO(bmj): Raise this to 1000 when the GSA moves there. public static final int DEFAULT_HOST_LOAD = 500; /** * The batch size as calculated by the last call to determineBatchSize(). */ private int lastBatchSize; /** * The last recorded {@link BatchResult} traversal result for the connector. * The result returned by the previous traversal is used to calculate the * optimum size for the next traversal batch to maintain the configured * host load. */ private BatchResult lastBatchResult; /** * The optimal number of documents for each Traversal to return. * Small batches (<100) incur significant per batch overhead, and may * backlog feed processing on the GSA. * Large batches may consume excessive local and Repository resources. */ private int batchSize = 1000; /** The smallest allowed batch size. */ private int minBatchSize; /** * Number of milliseconds used to measure the feed rate. * In particular, we try to constrain our feed rate to * loadFromSchedule docs per periodInMillis. Several * periods worth of results may be used to keep the load * on target. * * By default, the HostLoadManager will use a one minute period * for calculating the batchSize. */ private long periodInMillis = MINUTE_IN_MILLIS; /** * The load is the target number of documents per period to process. */ private int load = DEFAULT_HOST_LOAD; /** * The target traversal rate, based upon the load and the period. */ private float rate; /** * Used for timing throughput. */ private final Clock clock; /** * Used for determining feed backlog status. */ private final FeedConnection feedConnection; /** * Used when calculating low-memory conditions. */ private final FileSizeLimitInfo fileSizeLimit; private boolean gotLowMemory = false; private long lastLowMemMessage = 0L; /** * Constructor used by {@link HostLoadManagerFactory} to create a * {@link LoadManager} for a a connector instance. * * @param feedConnection a {@link FeedConnection}. * @param fileSizeLimit a {@link FileSizeLimitInfo}. * @param clock a {@link Clock}. */ public HostLoadManager(FeedConnection feedConnection, FileSizeLimitInfo fileSizeLimit, Clock clock) { this.feedConnection = feedConnection; this.fileSizeLimit = fileSizeLimit; this.clock = clock; seedLoad(); } /** * Sets the target load in documents per period. * * @param load target load in documents per period. */ @Override public void setLoad(int load) { if (load >= 0) { LOGGER.fine("Setting host load to " + load); this.load = load; seedLoad(); } else { LOGGER.warning("Invalid host load: " + load); } } /** * Sets the measurement period in seconds. * * @param periodInSeconds measurement period in seconds. */ @Override public void setPeriod(int periodInSeconds) { if (periodInSeconds > 0) { LOGGER.fine("Setting load measurement period to " + periodInSeconds + " seconds"); periodInMillis = periodInSeconds * 1000L; seedLoad(); } else { LOGGER.warning("Invalid load measurement period: " + periodInSeconds); } } /** * @param batchSize the target batchSize to set. */ @Override public void setBatchSize(int batchSize) { if (batchSize >= 0) { this.batchSize = batchSize; LOGGER.fine("Setting the maximum batch size to " + batchSize); seedLoad(); } else { LOGGER.warning("Invalid batch size: " + batchSize); } } /** * Sets the target traversal rate, based upon the configured load and period, * and seeds the lastBatchRequest(). */ private void seedLoad() { minBatchSize = Math.min(load, batchSize); LOGGER.fine("Setting the minimum batch size to " + batchSize); rate = ((float) load) / periodInMillis; lastBatchSize = Math.min(load, batchSize); lastBatchResult = new BatchResult(TraversalDelayPolicy.IMMEDIATE, lastBatchSize, 0L, periodInMillis); } /** * Lets HostLoadManager know how many documents have been traversed so that * it can properly enforce the host load. * * @param batchResult a traversal BatchResult */ @Override public void recordResult(BatchResult batchResult) { if (batchResult.getCountProcessed() > 0) { lastBatchResult = batchResult; } } /** * Determine how many documents to be recommended to be traversed. This * number is based on the max feed rate for the connector instance as well * as the load determined based on recently recorded results. * * @return BatchSize hint to the number of documents the traverser * should traverse */ @Override public BatchSize determineBatchSize() { BatchRequest batchReq = getBatchRequest(); if (LOGGER.isLoggable(Level.FINEST)) { LOGGER.finest(batchReq.toString()); } // If the delay time hasn't expired, batch size is 0. // However, if there is less that 100ms left, just let it go. if ((batchReq.delay == 0) || (lastBatchResult.getEndTime() + batchReq.delay < clock.getTimeMillis() + 100)) { lastBatchSize = batchReq.batchSize; return new BatchSize(batchReq.batchSize); } return new BatchSize(); } /** * Return true if this connector instance should not be scheduled * for traversal at this time. * * @return true if the connector should not run at this time */ @Override public boolean shouldDelay() { BatchRequest batchReq = getBatchRequest(); // If the delay time hasn't expired, continue delay. // However, if there is less that 100ms left, just let it go. if ((lastBatchResult.getEndTime() + batchReq.delay) >= clock.getTimeMillis() + 100) { return true; } // If the process is running low on memory, don't traverse. if (fileSizeLimit != null) { Runtime rt = Runtime.getRuntime(); long available = rt.maxMemory() - (rt.totalMemory() - rt.freeMemory()); if (available < fileSizeLimit.maxFeedSize()) { Level level = (gotLowMemory) ? Level.FINE : Level.WARNING; gotLowMemory = true; long now = clock.getTimeMillis(); // Log message no more than once every minute. if (now > (lastLowMemMessage + (60 * 1000))) { lastLowMemMessage = now; LOGGER.log(level, "Delaying traversal due to low memory condition: " + available / (1024 * 1024L) + " MB available"); } return true; } else if (gotLowMemory) { gotLowMemory = false; lastLowMemMessage = 0L; LOGGER.info("Resuming traversal after low memory condition abates: " + available / (1024 * 1024L) + " MB available"); } } // If the GSA this connector is feeding is backlogged handling feeds, // don't traverse. if ((feedConnection != null) && feedConnection.isBacklogged()) { return true; } return false; } /** * Calculate the batch size for the next traversal batch. * This uses the throughput of the previous traversal batch and * the previously determined batch size to determine a batch * size and a delay that will keep the traversal rate at or * below the configured load. */ private BatchRequest getBatchRequest() { int count = lastBatchResult.getCountProcessed(); int time = lastBatchResult.getElapsedTime(); float lastRate = ((float) count) / time; int newBatchSize = (int)(lastBatchSize * rate / lastRate); if (lastRate < 0.85F * rate) { return new BatchRequest(0, Math.min(batchSize, newBatchSize)); } else if (lastRate > 1.15F * rate) { int delay = (int) (count * periodInMillis / load - time); return new BatchRequest(delay, Math.max(minBatchSize, newBatchSize)); } else { return new BatchRequest(0, lastBatchSize); } } private static class BatchRequest { public final int delay; public final int batchSize; BatchRequest(int delay, int batchSize) { this.delay = delay; this.batchSize = batchSize; } public String toString() { return ((delay == 0) ? "no delay" : "delay = " + delay) + ", batchSize = " + batchSize; } } }