package org.commoncrawl.service.statscollector; import java.io.File; import java.io.IOException; import java.util.Collection; import java.util.Set; import java.util.Map.Entry; import java.util.concurrent.Semaphore; import org.apache.hadoop.io.DataInputBuffer; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.record.Buffer; import org.commoncrawl.async.CallbackWithResult; import org.commoncrawl.async.EventLoop; import org.commoncrawl.async.ConcurrentTask.CompletionCallback; import org.commoncrawl.rpc.base.shared.BinaryProtocol; import org.commoncrawl.service.statscollector.CrawlerStats; import org.commoncrawl.util.CCStringUtils; import org.commoncrawl.util.time.Hour; import com.google.common.collect.ImmutableSortedMap; public class CrawlerStatsCollection extends StatsCollection<CrawlerStats> { public static final String GROUP_KEY = "crawlerStats"; public CrawlerStatsCollection(StatsLogManager logFileManager,String uniqueKey) throws IOException { super(logFileManager, GROUP_KEY, uniqueKey); } @Override public CrawlerStats bufferToValueType(Buffer incomingBuffer) throws IOException { DataInputBuffer buffer = new DataInputBuffer(); buffer.reset(incomingBuffer.get(),0, incomingBuffer.getCount()); CrawlerStats recordOut = new CrawlerStats(); recordOut.deserialize(buffer, new BinaryProtocol()); return recordOut; } @Override public void combineHourlyValues(CrawlerStats sourceValue, CrawlerStats otherValue) { sourceValue.setUrlsProcessed(sourceValue.getUrlsProcessed() + otherValue.getUrlsProcessed()); sourceValue.setUrlsSucceeded(sourceValue.getUrlsSucceeded() + otherValue.getUrlsSucceeded()); sourceValue.setUrlsFailed(sourceValue.getUrlsFailed() + otherValue.getUrlsFailed()); sourceValue.setHttp200Count(sourceValue.getHttp200Count() + otherValue.getHttp200Count()); sourceValue.setHttp300Count(sourceValue.getHttp300Count() + otherValue.getHttp300Count()); sourceValue.setHttp301Count(sourceValue.getHttp301Count() + otherValue.getHttp301Count()); sourceValue.setHttp302Count(sourceValue.getHttp302Count() + otherValue.getHttp302Count()); sourceValue.setHttp304Count(sourceValue.getHttp304Count() + otherValue.getHttp304Count()); sourceValue.setHttp400Count(sourceValue.getHttp400Count() + otherValue.getHttp400Count()); sourceValue.setHttp403Count(sourceValue.getHttp403Count() + otherValue.getHttp403Count()); sourceValue.setHttp404Count(sourceValue.getHttp404Count() + otherValue.getHttp404Count()); sourceValue.setHttp500Count(sourceValue.getHttp500Count() + otherValue.getHttp500Count()); sourceValue.setHttpOtherCount(sourceValue.getHttpOtherCount() + otherValue.getHttpOtherCount()); sourceValue.setHttpErrorUNKNOWN(sourceValue.getHttpErrorUNKNOWN() + otherValue.getHttpErrorUNKNOWN()); sourceValue.setHttpErrorUnknownProtocol(sourceValue.getHttpErrorUnknownProtocol() + otherValue.getHttpErrorUnknownProtocol()); sourceValue.setHttpErrorMalformedURL(sourceValue.getHttpErrorMalformedURL() + otherValue.getHttpErrorMalformedURL()); sourceValue.setHttpErrorTimeout(sourceValue.getHttpErrorTimeout() + otherValue.getHttpErrorTimeout()); sourceValue.setHttpErrorDNSFailure(sourceValue.getHttpErrorDNSFailure() + otherValue.getHttpErrorDNSFailure()); sourceValue.setHttpErrorResolverFailure(sourceValue.getHttpErrorResolverFailure() + otherValue.getHttpErrorResolverFailure()); sourceValue.setHttpErrorIOException(sourceValue.getHttpErrorIOException() + otherValue.getHttpErrorIOException()); sourceValue.setHttpErrorRobotsExcluded(sourceValue.getHttpErrorRobotsExcluded() + otherValue.getHttpErrorRobotsExcluded()); sourceValue.setHttpErrorNoData(sourceValue.getHttpErrorNoData() + otherValue.getHttpErrorNoData()); sourceValue.setHttpErrorRobotsParseError(sourceValue.getHttpErrorRobotsParseError() + otherValue.getHttpErrorRobotsParseError()); sourceValue.setHttpErrorRedirectFailed(sourceValue.getHttpErrorRedirectFailed() + otherValue.getHttpErrorRedirectFailed()); sourceValue.setHttpErrorRuntimeError(sourceValue.getHttpErrorRuntimeError() + otherValue.getHttpErrorRuntimeError()); sourceValue.setHttpErrorConnectTimeout(sourceValue.getHttpErrorConnectTimeout() + otherValue.getHttpErrorConnectTimeout()); sourceValue.setHttpErrorBlackListedHost(sourceValue.getHttpErrorBlackListedHost() + otherValue.getHttpErrorBlackListedHost()); sourceValue.setHttpErrorBlackListedURL(sourceValue.getHttpErrorBlackListedURL() + otherValue.getHttpErrorBlackListedURL()); sourceValue.setHttpErrorTooManyErrors(sourceValue.getHttpErrorTooManyErrors() + otherValue.getHttpErrorTooManyErrors()); sourceValue.setHttpErrorInCache(sourceValue.getHttpErrorInCache() + otherValue.getHttpErrorInCache()); sourceValue.setHttpErrorInvalidResponseCode(sourceValue.getHttpErrorInvalidResponseCode() + otherValue.getHttpErrorInvalidResponseCode()); sourceValue.setHttpErrorBadRedirectData(sourceValue.getHttpErrorBadRedirectData() + otherValue.getHttpErrorBadRedirectData()); sourceValue.setAverageDownloadSize((sourceValue.getAverageDownloadSize() + otherValue.getAverageDownloadSize()) / 2.0f); sourceValue.setUrlsPerSecond((sourceValue.getUrlsPerSecond() + otherValue.getUrlsPerSecond())/2.0f); sourceValue.setMbytesDownPerSecond((sourceValue.getMbytesDownPerSecond() + otherValue.getMbytesDownPerSecond()) / 2.0f); sourceValue.setBytesDownloaded(sourceValue.getBytesDownloaded() + otherValue.getBytesDownloaded()); sourceValue.setCrawlerMemoryUsedRatio((sourceValue.getCrawlerMemoryUsedRatio() + otherValue.getCrawlerMemoryUsedRatio()) / 2.0f); sourceValue.setUrlsInFetcherQueue(otherValue.getUrlsInFetcherQueue()); sourceValue.setUrlsInLoaderQueue(otherValue.getUrlsInLoaderQueue()); sourceValue.setActvieRobotsRequests(otherValue.getActvieRobotsRequests()); sourceValue.setRobotsRequestsSucceeded(sourceValue.getRobotsRequestsSucceeded() + otherValue.getRobotsRequestsSucceeded()); sourceValue.setRobotsRequestsFailed(sourceValue.getRobotsRequestsFailed() + otherValue.getRobotsRequestsFailed()); sourceValue.setRedirectResultAfter1Hops(sourceValue.getRedirectResultAfter1Hops() + otherValue.getRedirectResultAfter1Hops()); sourceValue.setRedirectResultAfter2Hops(sourceValue.getRedirectResultAfter2Hops() + otherValue.getRedirectResultAfter2Hops()); sourceValue.setRedirectResultAfter3Hops(sourceValue.getRedirectResultAfter3Hops() + otherValue.getRedirectResultAfter3Hops()); sourceValue.setRedirectResultAfterGT3Hops(sourceValue.getRedirectResultAfterGT3Hops() + otherValue.getRedirectResultAfterGT3Hops()); sourceValue.setActiveHosts(otherValue.getActiveHosts()); sourceValue.setScheduledHosts(otherValue.getScheduledHosts()); sourceValue.setIdledHosts(otherValue.getIdledHosts()); sourceValue.setActiveDNSRequests(otherValue.getActiveDNSRequests()); sourceValue.setQueuedDNSRequests(otherValue.getQueuedDNSRequests()); sourceValue.setFailedDNSRequests(sourceValue.getFailedDNSRequests() + otherValue.getFailedDNSRequests()); sourceValue.setSuccessfullDNSRequests(sourceValue.getSuccessfullDNSRequests() + otherValue.getSuccessfullDNSRequests()); sourceValue.setRobotsRequestsQueuedForParse(otherValue.getRobotsRequestsQueuedForParse()); sourceValue.setRobotsRequestsSuccessfullParse(sourceValue.getRobotsRequestsSuccessfullParse() + otherValue.getRobotsRequestsSuccessfullParse()); sourceValue.setRobotsRequestsFailedParse(sourceValue.getRobotsRequestsFailedParse() + otherValue.getRobotsRequestsFailedParse()); sourceValue.setRobotsFileExcludesAllContent(sourceValue.getRobotsFileExcludesAllContent() + otherValue.getRobotsFileExcludesAllContent()); sourceValue.setRobotsFileHadCrawlDelay(sourceValue.getRobotsFileHadCrawlDelay() + otherValue.getRobotsFileHadCrawlDelay()); sourceValue.setRobotsFileHasExplicitMention(sourceValue.getRobotsFileHasExplicitMention() + otherValue.getRobotsFileHasExplicitMention()); sourceValue.setRobotsFileExplicitlyExcludesAll(sourceValue.getRobotsFileExplicitlyExcludesAll() + otherValue.getRobotsFileExplicitlyExcludesAll()); } public static CrawlerStats combineValues(Collection<CrawlerStats> collection) { CrawlerStats valueOut = new CrawlerStats(); // accumulate stats into daily value first ... for (CrawlerStats item : collection) { valueOut.setUrlsProcessed(valueOut.getUrlsProcessed() + item.getUrlsProcessed()); valueOut.setUrlsSucceeded(valueOut.getUrlsSucceeded() + item.getUrlsSucceeded()); valueOut.setUrlsFailed(valueOut.getUrlsFailed() + item.getUrlsFailed()); valueOut.setHttp200Count(valueOut.getHttp200Count() + item.getHttp200Count()); valueOut.setHttp300Count(valueOut.getHttp300Count() + item.getHttp300Count()); valueOut.setHttp301Count(valueOut.getHttp301Count() + item.getHttp301Count()); valueOut.setHttp302Count(valueOut.getHttp302Count() + item.getHttp302Count()); valueOut.setHttp304Count(valueOut.getHttp304Count() + item.getHttp304Count()); valueOut.setHttp400Count(valueOut.getHttp400Count() + item.getHttp400Count()); valueOut.setHttp403Count(valueOut.getHttp403Count() + item.getHttp403Count()); valueOut.setHttp404Count(valueOut.getHttp404Count() + item.getHttp404Count()); valueOut.setHttp500Count(valueOut.getHttp500Count() + item.getHttp500Count()); valueOut.setHttpOtherCount(valueOut.getHttpOtherCount() + item.getHttpOtherCount()); valueOut.setHttpErrorUNKNOWN(valueOut.getHttpErrorUNKNOWN() + item.getHttpErrorUNKNOWN()); valueOut.setHttpErrorUnknownProtocol(valueOut.getHttpErrorUnknownProtocol() + item.getHttpErrorUnknownProtocol()); valueOut.setHttpErrorMalformedURL(valueOut.getHttpErrorMalformedURL() + item.getHttpErrorMalformedURL()); valueOut.setHttpErrorTimeout(valueOut.getHttpErrorTimeout() + item.getHttpErrorTimeout()); valueOut.setHttpErrorDNSFailure(valueOut.getHttpErrorDNSFailure() + item.getHttpErrorDNSFailure()); valueOut.setHttpErrorResolverFailure(valueOut.getHttpErrorResolverFailure() + item.getHttpErrorResolverFailure()); valueOut.setHttpErrorIOException(valueOut.getHttpErrorIOException() + item.getHttpErrorIOException()); valueOut.setHttpErrorRobotsExcluded(valueOut.getHttpErrorRobotsExcluded() + item.getHttpErrorRobotsExcluded()); valueOut.setHttpErrorNoData(valueOut.getHttpErrorNoData() + item.getHttpErrorNoData()); valueOut.setHttpErrorRobotsParseError(valueOut.getHttpErrorRobotsParseError() + item.getHttpErrorRobotsParseError()); valueOut.setHttpErrorRedirectFailed(valueOut.getHttpErrorRedirectFailed() + item.getHttpErrorRedirectFailed()); valueOut.setHttpErrorRuntimeError(valueOut.getHttpErrorRuntimeError() + item.getHttpErrorRuntimeError()); valueOut.setHttpErrorConnectTimeout(valueOut.getHttpErrorConnectTimeout() + item.getHttpErrorConnectTimeout()); valueOut.setHttpErrorBlackListedHost(valueOut.getHttpErrorBlackListedHost() + item.getHttpErrorBlackListedHost()); valueOut.setHttpErrorBlackListedURL(valueOut.getHttpErrorBlackListedURL() + item.getHttpErrorBlackListedURL()); valueOut.setHttpErrorTooManyErrors(valueOut.getHttpErrorTooManyErrors() + item.getHttpErrorTooManyErrors()); valueOut.setHttpErrorInCache(valueOut.getHttpErrorInCache() + item.getHttpErrorInCache()); valueOut.setHttpErrorInvalidResponseCode(valueOut.getHttpErrorInvalidResponseCode() + item.getHttpErrorInvalidResponseCode()); valueOut.setHttpErrorBadRedirectData(valueOut.getHttpErrorBadRedirectData() + item.getHttpErrorBadRedirectData()); valueOut.setAverageDownloadSize(valueOut.getAverageDownloadSize() + item.getAverageDownloadSize()); valueOut.setUrlsPerSecond(valueOut.getUrlsPerSecond() + item.getUrlsPerSecond()); valueOut.setMbytesDownPerSecond(valueOut.getMbytesDownPerSecond() + item.getMbytesDownPerSecond()); valueOut.setBytesDownloaded(valueOut.getBytesDownloaded() + item.getBytesDownloaded()); valueOut.setCrawlerMemoryUsedRatio(valueOut.getCrawlerMemoryUsedRatio() + item.getCrawlerMemoryUsedRatio()); valueOut.setUrlsInFetcherQueue(valueOut.getUrlsInFetcherQueue() + item.getUrlsInFetcherQueue()); valueOut.setUrlsInLoaderQueue(valueOut.getUrlsInLoaderQueue() + item.getUrlsInLoaderQueue()); valueOut.setActvieRobotsRequests(valueOut.getActvieRobotsRequests() + item.getActvieRobotsRequests()); valueOut.setRobotsRequestsSucceeded(valueOut.getRobotsRequestsSucceeded() + item.getRobotsRequestsSucceeded()); valueOut.setRobotsRequestsFailed(valueOut.getRobotsRequestsFailed() + item.getRobotsRequestsFailed()); valueOut.setRedirectResultAfter1Hops(valueOut.getRedirectResultAfter1Hops() + item.getRedirectResultAfter1Hops()); valueOut.setRedirectResultAfter2Hops(valueOut.getRedirectResultAfter2Hops() + item.getRedirectResultAfter2Hops()); valueOut.setRedirectResultAfter3Hops(valueOut.getRedirectResultAfter3Hops() + item.getRedirectResultAfter3Hops()); valueOut.setRedirectResultAfterGT3Hops(valueOut.getRedirectResultAfterGT3Hops() + item.getRedirectResultAfterGT3Hops()); valueOut.setActiveHosts(valueOut.getActiveHosts() + item.getActiveHosts()); valueOut.setScheduledHosts(valueOut.getScheduledHosts() + item.getScheduledHosts()); valueOut.setIdledHosts(valueOut.getIdledHosts() + item.getIdledHosts()); valueOut.setActiveDNSRequests(valueOut.getActiveDNSRequests() + item.getActiveDNSRequests()); valueOut.setQueuedDNSRequests(valueOut.getQueuedDNSRequests() + item.getQueuedDNSRequests()); valueOut.setFailedDNSRequests(valueOut.getFailedDNSRequests() + item.getFailedDNSRequests()); valueOut.setSuccessfullDNSRequests(valueOut.getSuccessfullDNSRequests() + item.getSuccessfullDNSRequests()); valueOut.setRobotsRequestsQueuedForParse(valueOut.getRobotsRequestsQueuedForParse() + item.getRobotsRequestsQueuedForParse()); valueOut.setRobotsRequestsSuccessfullParse(valueOut.getRobotsRequestsSuccessfullParse() + item.getRobotsRequestsSuccessfullParse()); valueOut.setRobotsRequestsFailedParse(valueOut.getRobotsRequestsFailedParse() + item.getRobotsRequestsFailedParse()); valueOut.setRobotsFileExcludesAllContent(valueOut.getRobotsFileExcludesAllContent() + item.getRobotsFileExcludesAllContent()); valueOut.setRobotsFileHadCrawlDelay(valueOut.getRobotsFileHadCrawlDelay() + item.getRobotsFileHadCrawlDelay()); valueOut.setRobotsFileHasExplicitMention(valueOut.getRobotsFileHasExplicitMention() + item.getRobotsFileHasExplicitMention()); valueOut.setRobotsFileExplicitlyExcludesAll(valueOut.getRobotsFileExplicitlyExcludesAll() + item.getRobotsFileExplicitlyExcludesAll()); } // next average non-cumilative stats valueOut.setAverageDownloadSize(valueOut.getAverageDownloadSize() / collection.size()); valueOut.setUrlsPerSecond(valueOut.getUrlsPerSecond() / collection.size()); valueOut.setMbytesDownPerSecond(valueOut.getMbytesDownPerSecond() / collection.size()); valueOut.setCrawlerMemoryUsedRatio(valueOut.getCrawlerMemoryUsedRatio()/ collection.size()); valueOut.setUrlsInFetcherQueue(valueOut.getUrlsInFetcherQueue() / collection.size()); valueOut.setUrlsInLoaderQueue(valueOut.getUrlsInLoaderQueue() / collection.size()); valueOut.setActvieRobotsRequests(valueOut.getActvieRobotsRequests() / collection.size()); valueOut.setRobotsRequestsQueuedForParse(valueOut.getRobotsRequestsQueuedForParse() / collection.size()); return valueOut; } @Override public CrawlerStats createDailyValue(Set<Entry<Hour, CrawlerStats>> hourlyValueSet) { CrawlerStats dailyValueOut = new CrawlerStats(); // accumulate stats into daily value first ... for (Entry<Hour,CrawlerStats> entry : hourlyValueSet) { CrawlerStats hourlyValue = entry.getValue(); dailyValueOut.setUrlsProcessed(dailyValueOut.getUrlsProcessed() + hourlyValue.getUrlsProcessed()); dailyValueOut.setUrlsSucceeded(dailyValueOut.getUrlsSucceeded() + hourlyValue.getUrlsSucceeded()); dailyValueOut.setUrlsFailed(dailyValueOut.getUrlsFailed() + hourlyValue.getUrlsFailed()); dailyValueOut.setHttp200Count(dailyValueOut.getHttp200Count() + hourlyValue.getHttp200Count()); dailyValueOut.setHttp300Count(dailyValueOut.getHttp300Count() + hourlyValue.getHttp300Count()); dailyValueOut.setHttp301Count(dailyValueOut.getHttp301Count() + hourlyValue.getHttp301Count()); dailyValueOut.setHttp302Count(dailyValueOut.getHttp302Count() + hourlyValue.getHttp302Count()); dailyValueOut.setHttp304Count(dailyValueOut.getHttp304Count() + hourlyValue.getHttp304Count()); dailyValueOut.setHttp400Count(dailyValueOut.getHttp400Count() + hourlyValue.getHttp400Count()); dailyValueOut.setHttp403Count(dailyValueOut.getHttp403Count() + hourlyValue.getHttp403Count()); dailyValueOut.setHttp404Count(dailyValueOut.getHttp404Count() + hourlyValue.getHttp404Count()); dailyValueOut.setHttp500Count(dailyValueOut.getHttp500Count() + hourlyValue.getHttp500Count()); dailyValueOut.setHttpOtherCount(dailyValueOut.getHttpOtherCount() + hourlyValue.getHttpOtherCount()); dailyValueOut.setHttpErrorUNKNOWN(dailyValueOut.getHttpErrorUNKNOWN() + hourlyValue.getHttpErrorUNKNOWN()); dailyValueOut.setHttpErrorUnknownProtocol(dailyValueOut.getHttpErrorUnknownProtocol() + hourlyValue.getHttpErrorUnknownProtocol()); dailyValueOut.setHttpErrorMalformedURL(dailyValueOut.getHttpErrorMalformedURL() + hourlyValue.getHttpErrorMalformedURL()); dailyValueOut.setHttpErrorTimeout(dailyValueOut.getHttpErrorTimeout() + hourlyValue.getHttpErrorTimeout()); dailyValueOut.setHttpErrorDNSFailure(dailyValueOut.getHttpErrorDNSFailure() + hourlyValue.getHttpErrorDNSFailure()); dailyValueOut.setHttpErrorResolverFailure(dailyValueOut.getHttpErrorResolverFailure() + hourlyValue.getHttpErrorResolverFailure()); dailyValueOut.setHttpErrorIOException(dailyValueOut.getHttpErrorIOException() + hourlyValue.getHttpErrorIOException()); dailyValueOut.setHttpErrorRobotsExcluded(dailyValueOut.getHttpErrorRobotsExcluded() + hourlyValue.getHttpErrorRobotsExcluded()); dailyValueOut.setHttpErrorNoData(dailyValueOut.getHttpErrorNoData() + hourlyValue.getHttpErrorNoData()); dailyValueOut.setHttpErrorRobotsParseError(dailyValueOut.getHttpErrorRobotsParseError() + hourlyValue.getHttpErrorRobotsParseError()); dailyValueOut.setHttpErrorRedirectFailed(dailyValueOut.getHttpErrorRedirectFailed() + hourlyValue.getHttpErrorRedirectFailed()); dailyValueOut.setHttpErrorRuntimeError(dailyValueOut.getHttpErrorRuntimeError() + hourlyValue.getHttpErrorRuntimeError()); dailyValueOut.setHttpErrorConnectTimeout(dailyValueOut.getHttpErrorConnectTimeout() + hourlyValue.getHttpErrorConnectTimeout()); dailyValueOut.setHttpErrorBlackListedHost(dailyValueOut.getHttpErrorBlackListedHost() + hourlyValue.getHttpErrorBlackListedHost()); dailyValueOut.setHttpErrorBlackListedURL(dailyValueOut.getHttpErrorBlackListedURL() + hourlyValue.getHttpErrorBlackListedURL()); dailyValueOut.setHttpErrorTooManyErrors(dailyValueOut.getHttpErrorTooManyErrors() + hourlyValue.getHttpErrorTooManyErrors()); dailyValueOut.setHttpErrorInCache(dailyValueOut.getHttpErrorInCache() + hourlyValue.getHttpErrorInCache()); dailyValueOut.setHttpErrorInvalidResponseCode(dailyValueOut.getHttpErrorInvalidResponseCode() + hourlyValue.getHttpErrorInvalidResponseCode()); dailyValueOut.setHttpErrorBadRedirectData(dailyValueOut.getHttpErrorBadRedirectData() + hourlyValue.getHttpErrorBadRedirectData()); dailyValueOut.setAverageDownloadSize(dailyValueOut.getAverageDownloadSize() + hourlyValue.getAverageDownloadSize()); dailyValueOut.setUrlsPerSecond(dailyValueOut.getUrlsPerSecond() + hourlyValue.getUrlsPerSecond()); dailyValueOut.setMbytesDownPerSecond(dailyValueOut.getMbytesDownPerSecond() + hourlyValue.getMbytesDownPerSecond()); dailyValueOut.setBytesDownloaded(dailyValueOut.getBytesDownloaded() + hourlyValue.getBytesDownloaded()); dailyValueOut.setCrawlerMemoryUsedRatio(dailyValueOut.getCrawlerMemoryUsedRatio() + hourlyValue.getCrawlerMemoryUsedRatio()); dailyValueOut.setUrlsInFetcherQueue(dailyValueOut.getUrlsInFetcherQueue() + hourlyValue.getUrlsInFetcherQueue()); dailyValueOut.setUrlsInLoaderQueue(dailyValueOut.getUrlsInLoaderQueue() + hourlyValue.getUrlsInLoaderQueue()); dailyValueOut.setActvieRobotsRequests(dailyValueOut.getActvieRobotsRequests() + hourlyValue.getActvieRobotsRequests()); dailyValueOut.setRobotsRequestsSucceeded(dailyValueOut.getRobotsRequestsSucceeded() + hourlyValue.getRobotsRequestsSucceeded()); dailyValueOut.setRobotsRequestsFailed(dailyValueOut.getRobotsRequestsFailed() + hourlyValue.getRobotsRequestsFailed()); dailyValueOut.setRedirectResultAfter1Hops(dailyValueOut.getRedirectResultAfter1Hops() + hourlyValue.getRedirectResultAfter1Hops()); dailyValueOut.setRedirectResultAfter2Hops(dailyValueOut.getRedirectResultAfter2Hops() + hourlyValue.getRedirectResultAfter2Hops()); dailyValueOut.setRedirectResultAfter3Hops(dailyValueOut.getRedirectResultAfter3Hops() + hourlyValue.getRedirectResultAfter3Hops()); dailyValueOut.setRedirectResultAfterGT3Hops(dailyValueOut.getRedirectResultAfterGT3Hops() + hourlyValue.getRedirectResultAfterGT3Hops()); dailyValueOut.setActiveHosts(dailyValueOut.getActiveHosts() + hourlyValue.getActiveHosts()); dailyValueOut.setScheduledHosts(dailyValueOut.getScheduledHosts() + hourlyValue.getScheduledHosts()); dailyValueOut.setIdledHosts(dailyValueOut.getIdledHosts() + hourlyValue.getIdledHosts()); dailyValueOut.setActiveDNSRequests(dailyValueOut.getActiveDNSRequests() + hourlyValue.getActiveDNSRequests()); dailyValueOut.setQueuedDNSRequests(dailyValueOut.getQueuedDNSRequests() + hourlyValue.getQueuedDNSRequests()); dailyValueOut.setFailedDNSRequests(dailyValueOut.getFailedDNSRequests() + hourlyValue.getFailedDNSRequests()); dailyValueOut.setSuccessfullDNSRequests(dailyValueOut.getSuccessfullDNSRequests() + hourlyValue.getSuccessfullDNSRequests()); dailyValueOut.setRobotsRequestsQueuedForParse(dailyValueOut.getRobotsRequestsQueuedForParse() + hourlyValue.getRobotsRequestsQueuedForParse()); dailyValueOut.setRobotsRequestsSuccessfullParse(dailyValueOut.getRobotsRequestsSuccessfullParse() + hourlyValue.getRobotsRequestsSuccessfullParse()); dailyValueOut.setRobotsRequestsFailedParse(dailyValueOut.getRobotsRequestsFailedParse() + hourlyValue.getRobotsRequestsFailedParse()); dailyValueOut.setRobotsFileExcludesAllContent(dailyValueOut.getRobotsFileExcludesAllContent() + hourlyValue.getRobotsFileExcludesAllContent()); dailyValueOut.setRobotsFileHadCrawlDelay(dailyValueOut.getRobotsFileHadCrawlDelay() + hourlyValue.getRobotsFileHadCrawlDelay()); dailyValueOut.setRobotsFileHasExplicitMention(dailyValueOut.getRobotsFileHasExplicitMention() + hourlyValue.getRobotsFileHasExplicitMention()); dailyValueOut.setRobotsFileExplicitlyExcludesAll(dailyValueOut.getRobotsFileExplicitlyExcludesAll() + hourlyValue.getRobotsFileExplicitlyExcludesAll()); } // next average non-cumilative stats dailyValueOut.setAverageDownloadSize(dailyValueOut.getAverageDownloadSize() / hourlyValueSet.size()); dailyValueOut.setUrlsPerSecond(dailyValueOut.getUrlsPerSecond() / hourlyValueSet.size()); dailyValueOut.setMbytesDownPerSecond(dailyValueOut.getMbytesDownPerSecond() / hourlyValueSet.size()); dailyValueOut.setCrawlerMemoryUsedRatio(dailyValueOut.getCrawlerMemoryUsedRatio()/ hourlyValueSet.size()); dailyValueOut.setUrlsInFetcherQueue(dailyValueOut.getUrlsInFetcherQueue() / hourlyValueSet.size()); dailyValueOut.setUrlsInLoaderQueue(dailyValueOut.getUrlsInLoaderQueue() / hourlyValueSet.size()); dailyValueOut.setActvieRobotsRequests(dailyValueOut.getActvieRobotsRequests() / hourlyValueSet.size()); dailyValueOut.setRobotsRequestsQueuedForParse(dailyValueOut.getRobotsRequestsQueuedForParse() / hourlyValueSet.size()); return dailyValueOut; } @Override public Buffer valueTypeToBuffer(CrawlerStats value) throws IOException { DataOutputBuffer bufferOut = new DataOutputBuffer(); value.serialize(bufferOut,new BinaryProtocol()); return new Buffer(bufferOut.getData(),0,bufferOut.getLength()); } public static void main(String[] args) { EventLoop eventLoop = new EventLoop(); eventLoop.start(); try { StatsLogManager logManager = new StatsLogManager(eventLoop, new File("/Users/rana")); CrawlerStatsCollection collection = new CrawlerStatsCollection(logManager,"ccn01-PROXY-Prod"); collection.dumpHourlyToJSON(System.out); System.out.println(); final Semaphore blockingSemaphore = new Semaphore(0); collection.dumpDailyToJSON(System.out, new CallbackWithResult<Boolean>() { @Override public void execute(Boolean result) { blockingSemaphore.release(); } }); blockingSemaphore.acquireUninterruptibly(); logManager.shutdown(); } catch (IOException e) { e.printStackTrace(); } eventLoop.stop(); } @Override public void setUniqueKeyInValue(CrawlerStats value) { value.setCrawlerName(_uniqueKey); } @Override public CrawlerStats allocateValueType() { return new CrawlerStats(); } }