/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nutch.crawl; import junit.framework.TestCase; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Text; import org.apache.nutch.util.NutchConfiguration; import org.junit.Before; import org.junit.Test; /** * Test cases for AdaptiveFetchSchedule. * */ public class TestAdaptiveFetchSchedule extends TestCase { private float inc_rate; private float dec_rate; private Configuration conf; private long curTime, lastModified; private int changed, interval, calculateInterval; @Before public void setUp() throws Exception { super.setUp(); conf = NutchConfiguration.create(); inc_rate = conf.getFloat("db.fetch.schedule.adaptive.inc_rate", 0.2f); dec_rate = conf.getFloat("db.fetch.schedule.adaptive.dec_rate", 0.2f); interval = 100; lastModified = 0; } /** * Test the core functionality of AdaptiveFetchSchedule. * */ @Test public void testAdaptiveFetchSchedule() { FetchSchedule fs = new AdaptiveFetchSchedule(); fs.setConf(conf); CrawlDatum p = prepareCrawlDatum(); Text url = new Text("http://www.example.com"); changed = FetchSchedule.STATUS_UNKNOWN; fs.setFetchSchedule(url, p, p.getFetchTime(), p.getModifiedTime(), curTime, lastModified, changed); validateFetchInterval(changed, p.getFetchInterval()); changed = FetchSchedule.STATUS_MODIFIED; fs.setFetchSchedule(url, p, p.getFetchTime(), p.getModifiedTime(), curTime, lastModified, changed); validateFetchInterval(changed, p.getFetchInterval()); p.setFetchInterval(interval); changed = FetchSchedule.STATUS_NOTMODIFIED; fs.setFetchSchedule(url, p, p.getFetchTime(), p.getModifiedTime(), curTime, lastModified, changed); validateFetchInterval(changed, p.getFetchInterval()); } /** * Prepare a CrawlDatum (STATUS_DB_UNFETCHED) to Test AdaptiveFetchSchedule. * * @return properly initialized CrawlDatum */ public CrawlDatum prepareCrawlDatum() { CrawlDatum p = new CrawlDatum(); p.setStatus(CrawlDatum.STATUS_DB_UNFETCHED); p.setFetchInterval(interval); p.setScore(1.0f); p.setFetchTime(0); return p; } /** * * The Method validates interval values according to changed parameter. * * @param changed * status value to check calculated interval value. * @param getInterval * to test IntervalValue from CrawlDatum which is calculated via * AdaptiveFetchSchedule algorithm. */ private void validateFetchInterval(int changed, int getInterval) { if (changed == FetchSchedule.STATUS_UNKNOWN) { assertEquals(getInterval, interval); } else if (changed == FetchSchedule.STATUS_MODIFIED) { calculateInterval = (int) (interval - (interval * dec_rate)); assertEquals(getInterval, calculateInterval); } else if (changed == FetchSchedule.STATUS_NOTMODIFIED) { calculateInterval = (int) (interval + (interval * inc_rate)); assertEquals(getInterval, calculateInterval); } } }