/*
* This file is part of the Heritrix web crawler (crawler.archive.org).
*
* Licensed to the Internet Archive (IA) by one or more individual
* contributors.
*
* The IA licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.archive.crawler.framework;
import org.archive.crawler.event.StatSnapshotEvent;
import org.archive.crawler.reporting.CrawlStatSnapshot;
import org.archive.crawler.reporting.StatisticsTracker;
import org.archive.state.ModuleTestBase;
public class CrawlLimitEnforcerTest extends ModuleTestBase {
public static class MockCrawlController extends CrawlController {
private static final long serialVersionUID = 1l;
public CrawlStatus stopRequestedMessage = null;
@Override
public synchronized void requestCrawlStop(CrawlStatus message) {
stopRequestedMessage = message;
}
}
public void testMaxBytesDownload() {
StatisticsTracker stats = new StatisticsTracker();
MockCrawlController cc = new MockCrawlController();
CrawlLimitEnforcer enforcer = new CrawlLimitEnforcer();
enforcer.setCrawlController(cc);
enforcer.setMaxBytesDownload(1000000);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{downloadedUriCount = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{downloadedUriCount = 100;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{elapsedMilliseconds = 1000;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{elapsedMilliseconds = 600000;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{novelUriCount = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{novelUriCount = 100;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{novelBytes = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{novelBytes = 1000000;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{warcNovelUriCount = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{warcNovelUriCount = 100;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{warcNovelBytes = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{warcNovelBytes = 1000000;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{bytesProcessed = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{bytesProcessed = 1000000;}}));
assertEquals(CrawlStatus.FINISHED_DATA_LIMIT, cc.stopRequestedMessage);
}
public void testMaxNovelBytes() {
StatisticsTracker stats = new StatisticsTracker();
MockCrawlController cc = new MockCrawlController();
CrawlLimitEnforcer enforcer = new CrawlLimitEnforcer();
enforcer.setCrawlController(cc);
enforcer.setMaxNovelBytes(1000000);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{downloadedUriCount = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{downloadedUriCount = 100;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{elapsedMilliseconds = 1000;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{elapsedMilliseconds = 600000;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{bytesProcessed = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{bytesProcessed = 1000000;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{warcNovelUriCount = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{warcNovelUriCount = 100;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{warcNovelBytes = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{warcNovelBytes = 1000000;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{novelUriCount = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{novelUriCount = 100;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{novelBytes = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{novelBytes = 1000000;}}));
assertEquals(CrawlStatus.FINISHED_DATA_LIMIT, cc.stopRequestedMessage);
}
public void testMaxNovelUrls() {
StatisticsTracker stats = new StatisticsTracker();
MockCrawlController cc = new MockCrawlController();
CrawlLimitEnforcer enforcer = new CrawlLimitEnforcer();
enforcer.setCrawlController(cc);
enforcer.setMaxNovelUrls(100);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{downloadedUriCount = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{downloadedUriCount = 100;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{elapsedMilliseconds = 1000;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{elapsedMilliseconds = 600000;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{bytesProcessed = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{bytesProcessed = 1000000;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{warcNovelUriCount = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{warcNovelUriCount = 100;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{warcNovelBytes = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{warcNovelBytes = 1000000;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{novelBytes = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{novelBytes = 1000000;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{novelUriCount = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{novelUriCount = 100;}}));
assertEquals(CrawlStatus.FINISHED_DOCUMENT_LIMIT, cc.stopRequestedMessage);
}
public void testMaxDocumentsDownload() {
StatisticsTracker stats = new StatisticsTracker();
MockCrawlController cc = new MockCrawlController();
CrawlLimitEnforcer enforcer = new CrawlLimitEnforcer();
enforcer.setCrawlController(cc);
enforcer.setMaxDocumentsDownload(100);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{elapsedMilliseconds = 1000;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{elapsedMilliseconds = 600000;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{novelUriCount = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{novelUriCount = 100;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{novelBytes = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{novelBytes = 1000000;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{warcNovelUriCount = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{warcNovelUriCount = 100;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{warcNovelBytes = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{warcNovelBytes = 1000000;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{bytesProcessed = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{bytesProcessed = 1000000;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{downloadedUriCount = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{downloadedUriCount = 100;}}));
assertEquals(CrawlStatus.FINISHED_DOCUMENT_LIMIT, cc.stopRequestedMessage);
}
public void testMaxTimeSeconds() {
StatisticsTracker stats = new StatisticsTracker();
MockCrawlController cc = new MockCrawlController();
CrawlLimitEnforcer enforcer = new CrawlLimitEnforcer();
enforcer.setCrawlController(cc);
enforcer.setMaxTimeSeconds(600);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{bytesProcessed = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{bytesProcessed = 1000000;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{novelUriCount = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{novelUriCount = 100;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{novelBytes = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{novelBytes = 1000000;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{warcNovelUriCount = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{warcNovelUriCount = 100;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{warcNovelBytes = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{warcNovelBytes = 1000000;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{downloadedUriCount = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{downloadedUriCount = 100;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{elapsedMilliseconds = 1000;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{elapsedMilliseconds = 600000;}}));
assertEquals(CrawlStatus.FINISHED_TIME_LIMIT, cc.stopRequestedMessage);
}
public void testMaxWarcNovelBytes() {
StatisticsTracker stats = new StatisticsTracker();
MockCrawlController cc = new MockCrawlController();
CrawlLimitEnforcer enforcer = new CrawlLimitEnforcer();
enforcer.setCrawlController(cc);
enforcer.setMaxWarcNovelBytes(1000000);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{downloadedUriCount = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{downloadedUriCount = 100;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{elapsedMilliseconds = 1000;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{elapsedMilliseconds = 600000;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{bytesProcessed = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{bytesProcessed = 1000000;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{warcNovelUriCount = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{warcNovelUriCount = 100;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{novelUriCount = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{novelUriCount = 100;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{novelBytes = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{novelBytes = 1000000;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{warcNovelBytes = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{warcNovelBytes = 1000000;}}));
assertEquals(CrawlStatus.FINISHED_DATA_LIMIT, cc.stopRequestedMessage);
}
public void testMaxWarcNovelUrls() {
StatisticsTracker stats = new StatisticsTracker();
MockCrawlController cc = new MockCrawlController();
CrawlLimitEnforcer enforcer = new CrawlLimitEnforcer();
enforcer.setCrawlController(cc);
enforcer.setMaxWarcNovelUrls(100);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{downloadedUriCount = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{downloadedUriCount = 100;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{elapsedMilliseconds = 1000;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{elapsedMilliseconds = 600000;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{bytesProcessed = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{bytesProcessed = 1000000;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{warcNovelBytes = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{warcNovelBytes = 1000000;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{novelBytes = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{novelBytes = 1000000;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{novelUriCount = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{novelUriCount = 100;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{warcNovelUriCount = 1;}}));
assertNull(cc.stopRequestedMessage);
enforcer.onApplicationEvent(new StatSnapshotEvent(stats, new CrawlStatSnapshot() {{warcNovelUriCount = 100;}}));
assertEquals(CrawlStatus.FINISHED_DOCUMENT_LIMIT, cc.stopRequestedMessage);
}
}