/*
* This file is part of the Wayback archival access software
* (http://archive-access.sourceforge.net/projects/wayback/).
*
* Licensed to the Internet Archive (IA) by one or more individual
* contributors.
*
* The IA licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.archive.cdxserver.processor;
import junit.framework.TestCase;
import org.archive.format.cdx.CDXLine;
import org.archive.format.cdx.FieldSplitFormat;
import org.easymock.EasyMock;
/**
* Test for {@link DupeTimestampBestStatusFilter} and
* {@link DupeTimestampLastBestStatusFilter}.
*
*/
public class DupeTimestampBestStatusFilterTest extends TestCase {
DupeTimestampBestStatusFilter cut;
/* (non-Javadoc)
* @see junit.framework.TestCase#setUp()
*/
protected void setUp() throws Exception {
super.setUp();
}
static class TestCDXLine extends CDXLine {
static final FieldSplitFormat format = new FieldSplitFormat("timestamp,statuscode,robotflags,filename");
public TestCDXLine(String timestamp, int status) {
this(timestamp, status, "-", "NA");
}
public TestCDXLine(String timestamp, int status, String filename) {
this(timestamp, status, "-", filename);
}
public TestCDXLine(String timestamp, int status, String robotflags, String filename) {
super(timestamp + " " + status + " " + robotflags + " " + filename, format);
}
}
protected final BaseProcessor setupOutputMock(CDXLine[] cdxlines, int... expected) {
BaseProcessor output = EasyMock.createStrictMock(BaseProcessor.class);
output.begin();
EasyMock.expectLastCall().once();
for (int i : expected) {
EasyMock.expect(output.writeLine(cdxlines[i])).andReturn(1);
}
output.end();
EasyMock.expectLastCall().once();
return output;
}
protected final void process(CDXLine[] cdxlines) {
// simplified sequence - actual code also calls trackLine()
// and modifyOutputFormat, which are irrelevant to the class
// under test.
cut.begin();
for (CDXLine l : cdxlines) {
cut.writeLine(l);
}
cut.end();
}
static final CDXLine[] TEST_CASE_1 = {
new TestCDXLine("20140101022436", 200),
new TestCDXLine("20140101033526", 200),
new TestCDXLine("20140101033819", 200),
new TestCDXLine("20140101042648", 200)
};
public void testBasic_dedupLength10() {
BaseProcessor output = setupOutputMock(TEST_CASE_1, 0, 1, 3);
cut = new DupeTimestampBestStatusFilter(output, 10, null);
EasyMock.replay(output);
process(TEST_CASE_1);
EasyMock.verify();
}
public void testBasic_dedupLength8() {
BaseProcessor output = setupOutputMock(TEST_CASE_1, 0);
cut = new DupeTimestampBestStatusFilter(output, 8, null);
EasyMock.replay(output);
process(TEST_CASE_1);
EasyMock.verify();
}
/**
* dedupLength > 14 is assumed 0 (i.e. no collapsing) and does not cause any
* trouble.
*/
public void testBasic_dedupLength16() {
BaseProcessor output = setupOutputMock(TEST_CASE_1, 0, 1, 2, 3);
cut = new DupeTimestampBestStatusFilter(output, 16, null);
EasyMock.replay(output);
process(TEST_CASE_1);
EasyMock.verify();
}
static final CDXLine[] TEST_CASE_2 = {
new TestCDXLine("20140902201508", 200, "A"),
new TestCDXLine("20140903012025", 200, "A"),
new TestCDXLine("20140903020020", 301, "A"),
new TestCDXLine("20140903182258", 200, "L"),
new TestCDXLine("20140903192521", 200, "A"),
new TestCDXLine("20140903192732", 301, "L")
};
/**
* Picks the first CDX line with the best (smallest) {@code statuscode}
* within each group.
*/
public void testBasic_bestStatusCode() {
BaseProcessor output = setupOutputMock(TEST_CASE_2, 0, 1);
cut = new DupeTimestampBestStatusFilter(output, 8, null);
EasyMock.replay(output);
process(TEST_CASE_2);
EasyMock.verify();
}
/**
* soft-blocked captures are passed-through (they are supposed to be
* removed later).
*/
public void testBlockedPassesThrough() {
final CDXLine[] TEST_CASE = {
new TestCDXLine("20140902201508", 200, "-", "A"),
new TestCDXLine("20140903012025", 200, "X", "A"),
new TestCDXLine("20140903020020", 200, "-", "A"),
new TestCDXLine("20140903182258", 200, "-", "L"),
new TestCDXLine("20140903192521", 200, "-", "A"),
new TestCDXLine("20140903192732", 200, "-", "L")
};
// 20140903012025 capture is passed through. The next one
// 20140903020020 is picked up by the filter for 20140903
// period, and the rest is dropped.
BaseProcessor output = setupOutputMock(TEST_CASE, 0, 1, 2);
cut = new DupeTimestampBestStatusFilter(output, 8, null);
EasyMock.replay(output);
process(TEST_CASE);
EasyMock.verify();
}
// DupeTimestampLastBestStatusFilter
/**
* Picks the last CDX line with the best (smallest) {@code statuscode}
* within each group.
*/
public void testLastBestStatus() {
BaseProcessor output = setupOutputMock(TEST_CASE_2, 0, 4);
cut = new DupeTimestampLastBestStatusFilter(output, 8, null);
EasyMock.replay(output);
process(TEST_CASE_2);
EasyMock.verify();
}
public void testLastBestStatus_withNoCollapse() {
BaseProcessor output = setupOutputMock(TEST_CASE_2, 0, 3, 4, 5);
cut = new DupeTimestampLastBestStatusFilter(output, 8, new String[] { "L" });
EasyMock.replay(output);
process(TEST_CASE_2);
EasyMock.verify();
}
}