/* $Id: PerformanceStatistics.java 988245 2010-08-23 18:39:35Z kwright $ */ /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.manifoldcf.crawler.interfaces; import org.apache.manifoldcf.core.interfaces.*; import org.apache.manifoldcf.agents.interfaces.*; import org.apache.manifoldcf.crawler.system.Logging; import java.util.*; /** An instance of this class keeps a running average of how long it takes for every connection to process a document. * This information is used to limit queuing per connection to something reasonable given the characteristics of the connection. */ public class PerformanceStatistics { public static final String _rcsid = "@(#)$Id: PerformanceStatistics.java 988245 2010-08-23 18:39:35Z kwright $"; /** This is the fetch rate that will be returned in the complete absence of any other information. This represents a 'wild guess' of a sort, * used only at the very start of a job, and designed to not hopelessly overload the queue with stuff from one connection only. */ protected static double DEFAULT_FETCH_RATE = 900.0; protected static long DEFAULT_FETCH_TIME = (long)(((double)60000.0)/DEFAULT_FETCH_RATE); /** These are the weighting coefficients for the average. They should all add up to 1.0 */ protected static double[] weights = new double[]{0.5,0.25,0.125,0.0625,0.0625}; /** This hash is keyed by the connection name, and has elements of type AveragingQueue */ protected HashMap connectionHash = new HashMap(); /** Constructor */ public PerformanceStatistics() { } /** Note the successful completion of a set of documents using a single connection, and record the statistics for them. **/ public synchronized void noteDocumentsCompleted(String connectionName, int documentSetSize, long elapsedTime) { AveragingQueue q = (AveragingQueue)connectionHash.get(connectionName); if (q == null) { q = new AveragingQueue(); connectionHash.put(connectionName,q); } q.addRecord(documentSetSize,elapsedTime); } /** Obtain current average document fetch rate (in documents per minute per connection) */ public synchronized double calculateConnectionFetchRate(String connectionName) { AveragingQueue q = (AveragingQueue)connectionHash.get(connectionName); if (q == null) // If there's no averaging queue, return a value that is consistent with wide-open performance return DEFAULT_FETCH_RATE; return q.calculateFetchRate(); } /** This class keeps track of some depth of fetch history for an individual connection, and is used to calculate a * weighted average fetches-per-minute rate. */ protected static class AveragingQueue { /** The internal structure of the averaging queue is a circular buffer, which gets initialized to the default value */ protected AveragingRecord[] records; /** This is the current start pointer */ protected int startIndex; /** Constructor */ public AveragingQueue() { records = new AveragingRecord[weights.length]; int i = 0; while (i < weights.length) { records[i++] = new AveragingRecord(1,DEFAULT_FETCH_TIME); } startIndex = 0; } /** Add a record */ public void addRecord(int setSize, long elapsedTime) { records[startIndex] = new AveragingRecord(setSize,elapsedTime); startIndex++; if (startIndex == records.length) startIndex -= records.length; } /** Calculate running-average fetch rate */ public double calculateFetchRate() { // The calculation involves calculating the fetch rate for each point in the history we keep, and then multiplying it times the appropriate weight, // and summing the whole thing. double rval = 0.0; int currentIndex = startIndex; int i = 0; while (i < weights.length) { double currentWeight = weights[i++]; if (currentIndex == 0) currentIndex = records.length; currentIndex--; AveragingRecord ar = records[currentIndex]; rval += currentWeight * ar.calculateRate(); } return rval; } } /** This class contains the data for a single document set against the given connection */ protected static class AveragingRecord { protected int documentCount; protected long elapsedTime; public AveragingRecord(int documentCount, long elapsedTime) { this.documentCount = documentCount; this.elapsedTime = elapsedTime; } public double calculateRate() { return 60000.0 * ((double)documentCount)/((double)elapsedTime); } } }