/**
* JHOVE2 - Next-generation architecture for format-aware characterization
* <p>
* Copyright (c) 2009 by The Regents of the University of California, Ithaka
* Harbors, Inc., and The Board of Trustees of the Leland Stanford Junior
* University. All rights reserved.
* </p>
* <p>
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* </p>
* <ul>
* <li>Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.</li>
* <li>Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.</li>
* <li>Neither the name of the University of California/California Digital
* Library, Ithaka Harbors/Portico, or Stanford University, nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.</li>
* </ul>
* <p>
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* </p>
*/
package org.jhove2.module.format.warc;
import org.jhove2.module.format.Validator.Validity;
import org.junit.Test;
/**
* Tests of WARC Module
* @see org.jhove2.module.warc.WarcModule
* @author nicl
*/
public class WarcModuleTest extends WarcModuleTestBase {
/*
IAH-20080430204825-00000-blackbook.warc
*/
Object[][] cases = new Object[][] {
{Validity.False, "invalid-empty.warc", new String[][] {
}, new String[] {
"[ERROR/OBJECT] Error in WARC file, expected 'One or more records'"
}},
{Validity.False, "invalid-warcfile-contenttype-recommended.warc", new String[][] {
{
"[WARNING/OBJECT] Recommend 'Content-Type' header missing"
},
{
"[WARNING/OBJECT] Recommend 'Content-Type' header missing"
},
{
"[WARNING/OBJECT] Recommend 'Content-Type' header missing"
},
{
"[WARNING/OBJECT] Recommend 'Content-Type' header missing"
},
{
"[WARNING/OBJECT] Recommend 'Content-Type' header missing"
},
{
"[WARNING/OBJECT] Recommend 'Content-Type' header missing"
},
{
"[WARNING/OBJECT] Recommend 'Content-Type' header missing"
}
}, new String[] {
}},
{Validity.False, "invalid-warcfile-contenttype-warcinfo-recommended.warc", new String[][] {
{
"[WARNING/OBJECT] Recommend 'Content-Type' value: value: 'application/warc-fields', instead of: 'application/octet'"
}
}, new String[] {
}},
{Validity.False, "invalid-warcfile-digest-fields.warc", new String[][] {
{
"[ERROR/OBJECT] Invalid Incorrect block digest, value: 'D5817BF5B4B35A296823509DD754700A6AD522B5', expected: '1B9310B445384A0F1D3C45B90E2346D608BC73D7'",
"[ERROR/OBJECT] Invalid Incorrect payload digest, value: 'D5817BF5B4B35A296823509DD754700A6AD522B5', expected: '1DBBF1909F9142B639402A6241C3873C462DAB32'"
},
{
"[ERROR/OBJECT] Invalid 'WARC-Payload-Digest' value, value: 'sha1:', expected: '<digest-algorithm>:<digest-encoded>'",
"[ERROR/OBJECT] Invalid 'WARC-Block-Digest' value, value: 'sha1:', expected: '<digest-algorithm>:<digest-encoded>'"
},
{
"[ERROR/OBJECT] Invalid 'WARC-Payload-Digest' value, value: ':2WAXX5NUWNNCS2BDKCO5OVDQBJVNKIVV', expected: '<digest-algorithm>:<digest-encoded>'",
"[ERROR/OBJECT] Invalid 'WARC-Block-Digest' value, value: ':2WAXX5NUWNNCS2BDKCO5OVDQBJVNKIVV', expected: '<digest-algorithm>:<digest-encoded>'"
},
{
"[ERROR/OBJECT] Invalid 'WARC-Payload-Digest' value, value: 'monkeypowah!', expected: '<digest-algorithm>:<digest-encoded>'",
"[ERROR/OBJECT] Invalid 'WARC-Block-Digest' value, value: 'monkeypowah!', expected: '<digest-algorithm>:<digest-encoded>'"
}
}, new String[] {
}},
{Validity.False, "invalid-warcfile-duplicate-fields.warc", new String[][] {
{
"[ERROR/OBJECT] Duplicate 'WARC-Type' header: 'warcinfo'",
"[ERROR/OBJECT] Duplicate 'WARC-Date' header: '2008-04-30T20:48:25Z'",
"[ERROR/OBJECT] Duplicate 'WARC-Filename' header: 'IAH-20080430204825-00000-blackbook.warc.gz'",
"[ERROR/OBJECT] Duplicate 'WARC-Record-ID' header: '<urn:uuid:35f02b38-eb19-4f0d-86e4-bfe95815069c-1>'",
"[ERROR/OBJECT] Duplicate 'Content-Type' header: 'application/warc-fields'",
"[ERROR/OBJECT] Duplicate 'Content-Length' header: '483'"
}
}, new String[] {
}},
{Validity.False, "invalid-warcfile-fields-empty.warc", new String[][] {
{
"[ERROR/OBJECT] Required and invalid 'WARC-Type' header: value: ''",
"[ERROR/OBJECT] Required and invalid 'WARC-Record-ID' header: value: ''",
"[ERROR/OBJECT] Required and invalid 'WARC-Date' header: value: ''",
"[ERROR/OBJECT] Required and invalid 'Content-Length' header: value: ''",
"[ERROR/OBJECT] Invalid Trailing newlines, value: '0', expected: '2'",
"[WARNING/OBJECT] Empty 'WARC-Type' field",
"[WARNING/OBJECT] Empty 'WARC-Target-URI' field",
"[WARNING/OBJECT] Empty 'WARC-IP-Address' field",
"[WARNING/OBJECT] Empty 'WARC-Date' field",
"[WARNING/OBJECT] Empty 'WARC-Record-ID' field",
"[WARNING/OBJECT] Empty 'Content-Length' field",
"[WARNING/OBJECT] Empty 'Content-Type' field",
"[WARNING/OBJECT] Empty 'WARC-Block-Digest' field",
"[WARNING/OBJECT] Empty 'WARC-Segment-Number' field"
}
}, new String[] {
"[ERROR/OBJECT] Invalid Data before WARC version",
"[ERROR/OBJECT] Invalid Empty lines before WARC version"
}},
{Validity.False, "invalid-warcfile-fields-invalidformat.warc", new String[][] {
{
"[ERROR/OBJECT] Invalid 'WARC-Target-URI' value, value: 'bad_uri', expected: 'Absolute URI'",
"[ERROR/OBJECT] Invalid 'WARC-IP-Address' value, value: 'a.b.c.d', expected: 'IPv4 or IPv6 format'",
"[ERROR/OBJECT] Invalid 'WARC-Date' value, value: 'blue monday', expected: 'yyyy-MM-dd'T'HH:mm:ss'Z''",
"[ERROR/OBJECT] Invalid 'WARC-Record-ID' value, value: 'zaphod', expected: 'Absolute URI'",
"[ERROR/OBJECT] Invalid 'Content-Length' value, value: 'very lengthy', expected: 'Numeric format'",
"[ERROR/OBJECT] Invalid 'Content-Type' value, value: 'gif\\image', expected: '<type>/<sub-type>(; <argument>=<value>)*'",
"[ERROR/OBJECT] Invalid 'WARC-Block-Digest' value, value: 'sharif-1; omar', expected: '<digest-algorithm>:<digest-encoded>'",
"[ERROR/OBJECT] Invalid 'WARC-Segment-Number' value, value: 'one', expected: 'Numeric format'",
"[ERROR/OBJECT] Required and invalid 'WARC-Record-ID' header: value: '<zaphod>'",
"[ERROR/OBJECT] Required and invalid 'WARC-Date' header: value: 'blue monday'",
"[ERROR/OBJECT] Required and invalid 'Content-Length' header: value: 'very lengthy'",
"[ERROR/OBJECT] Required and invalid 'WARC-Target-URI' value: value: 'bad_uri'",
"[ERROR/OBJECT] Invalid Trailing newlines, value: '0', expected: '2'"
}
}, new String[] {
"[ERROR/OBJECT] Invalid Data before WARC version",
"[ERROR/OBJECT] Invalid Empty lines before WARC version"
}},
{Validity.False, "invalid-warcfile-fields-missing.warc", new String[][] {
{
"[ERROR/OBJECT] Required and invalid 'WARC-Type' header: value: ''",
"[ERROR/OBJECT] Required and invalid 'WARC-Record-ID' header: value: ''",
"[ERROR/OBJECT] Required and invalid 'WARC-Date' header: value: ''",
"[ERROR/OBJECT] Required and invalid 'Content-Length' header: value: ''",
"[ERROR/OBJECT] Invalid Trailing newlines, value: '0', expected: '2'",
"[WARNING/OBJECT] Empty 'WARC-Type' field",
"[WARNING/OBJECT] Empty 'WARC-Record-ID' field",
"[WARNING/OBJECT] Empty 'WARC-Date' field",
"[WARNING/OBJECT] Empty 'Content-Length' field",
"[WARNING/OBJECT] Empty 'Content-Type' field",
"[WARNING/OBJECT] Empty 'WARC-Concurrent-To' field",
"[WARNING/OBJECT] Empty 'WARC-Block-Digest' field",
"[WARNING/OBJECT] Empty 'WARC-Payload-Digest' field",
"[WARNING/OBJECT] Empty 'WARC-IP-Address' field",
"[WARNING/OBJECT] Empty 'WARC-Refers-To' field",
"[WARNING/OBJECT] Empty 'WARC-Target-URI' field",
"[WARNING/OBJECT] Empty 'WARC-Truncated' field",
"[WARNING/OBJECT] Empty 'WARC-Warcinfo-ID' field",
"[WARNING/OBJECT] Empty 'WARC-Filename' field",
"[WARNING/OBJECT] Empty 'WARC-Profile' field",
"[WARNING/OBJECT] Empty 'WARC-Identified-Payload-Type' field",
"[WARNING/OBJECT] Empty 'WARC-Segment-Origin-ID' field",
"[WARNING/OBJECT] Empty 'WARC-Segment-Number' field",
"[WARNING/OBJECT] Empty 'WARC-Segment-Total-Length' field"
}
}, new String[] {
}},
{Validity.False, "invalid-warcfile-lonely-continuation.warc", new String[][] {
{
"[ERROR/OBJECT] Required and invalid 'WARC-Record-ID' header: value: 'null'",
"[ERROR/OBJECT] Required and invalid 'WARC-Date' header: value: 'null'",
"[ERROR/OBJECT] Required and invalid 'Content-Length' header: value: 'null'",
"[ERROR/OBJECT] Required and invalid 'WARC-Target-URI' value: value: 'null'",
"[ERROR/OBJECT] Required and invalid 'WARC-Segment-Number' value: value: 'null'",
"[ERROR/OBJECT] Required and invalid 'WARC-Segment-Origin-ID' value: value: 'null'"
}
}, new String[] {
}},
{Validity.False, "invalid-warcfile-lonely-monkeys.warc", new String[][] {
{
"[ERROR/OBJECT] Required and invalid 'WARC-Record-ID' header: value: 'null'",
"[ERROR/OBJECT] Required and invalid 'WARC-Date' header: value: 'null'",
"[ERROR/OBJECT] Required and invalid 'Content-Length' header: value: 'null'",
"[WARNING/OBJECT] Unknown 'WARC-Type' value: 'monkeys'",
"[WARNING/OBJECT] Error in Missing CR, expected 'Sequence of CRLFs'"
}
}, new String[] {
}},
{Validity.False, "invalid-warcfile-lonely-request-response-resource-conversion.warc", new String[][] {
{
"[ERROR/OBJECT] Required and invalid 'WARC-Record-ID' header: value: 'null'",
"[ERROR/OBJECT] Required and invalid 'WARC-Date' header: value: 'null'",
"[ERROR/OBJECT] Required and invalid 'Content-Length' header: value: 'null'",
"[ERROR/OBJECT] Required and invalid 'WARC-Target-URI' value: value: 'null'"
},
{
"[ERROR/OBJECT] Required and invalid 'WARC-Record-ID' header: value: 'null'",
"[ERROR/OBJECT] Required and invalid 'WARC-Date' header: value: 'null'",
"[ERROR/OBJECT] Required and invalid 'Content-Length' header: value: 'null'",
"[ERROR/OBJECT] Required and invalid 'WARC-Target-URI' value: value: 'null'"
},
{
"[ERROR/OBJECT] Required and invalid 'WARC-Record-ID' header: value: 'null'",
"[ERROR/OBJECT] Required and invalid 'WARC-Date' header: value: 'null'",
"[ERROR/OBJECT] Required and invalid 'Content-Length' header: value: 'null'",
"[ERROR/OBJECT] Required and invalid 'WARC-Target-URI' value: value: 'null'"
},
{
"[ERROR/OBJECT] Required and invalid 'WARC-Record-ID' header: value: 'null'",
"[ERROR/OBJECT] Required and invalid 'WARC-Date' header: value: 'null'",
"[ERROR/OBJECT] Required and invalid 'Content-Length' header: value: 'null'",
"[ERROR/OBJECT] Required and invalid 'WARC-Target-URI' value: value: 'null'"
}
}, new String[] {
}},
{Validity.False, "invalid-warcfile-lonely-revisit.warc", new String[][] {
{
"[ERROR/OBJECT] Required and invalid 'WARC-Record-ID' header: value: 'null'",
"[ERROR/OBJECT] Required and invalid 'WARC-Date' header: value: 'null'",
"[ERROR/OBJECT] Required and invalid 'Content-Length' header: value: 'null'",
"[ERROR/OBJECT] Required and invalid 'WARC-Target-URI' value: value: 'null'",
"[ERROR/OBJECT] Required and invalid 'WARC-Profile' value: value: 'null'"
}
}, new String[] {
}},
{Validity.False, "invalid-warcfile-lonely-warcinfo-metadata.warc", new String[][] {
{
"[ERROR/OBJECT] Required and invalid 'WARC-Record-ID' header: value: 'null'",
"[ERROR/OBJECT] Required and invalid 'WARC-Date' header: value: 'null'",
"[ERROR/OBJECT] Required and invalid 'Content-Length' header: value: 'null'"
},
{
"[ERROR/OBJECT] Required and invalid 'WARC-Record-ID' header: value: 'null'",
"[ERROR/OBJECT] Required and invalid 'WARC-Date' header: value: 'null'",
"[ERROR/OBJECT] Required and invalid 'Content-Length' header: value: 'null'"
}
}, new String[] {
}},
{Validity.False, "invalid-warcfile-segment-number-continuation.warc", new String[][] {
{
"[ERROR/OBJECT] Invalid 'WARC-Segment-Number' value, value: '1', expected: '>1'"
},
{}
}, new String[] {
}},
{Validity.False, "invalid-warcfile-segment-number-response.warc", new String[][] {
{
"[ERROR/OBJECT] Invalid Incorrect payload digest, value: 'D5817BF5B4B35A296823509DD754700A6AD522B5', expected: '1DBBF1909F9142B639402A6241C3873C462DAB32'"
},
{
"[ERROR/OBJECT] Invalid 'WARC-Segment-Number' value, value: '2', expected: '1'",
"[ERROR/OBJECT] Invalid Incorrect payload digest, value: 'D5817BF5B4B35A296823509DD754700A6AD522B5', expected: '1DBBF1909F9142B639402A6241C3873C462DAB32'"
}
}, new String[] {
}},
{Validity.True, "valid-warcfile-contenttype-continuation.warc", new String[][] {
{}
}, new String[] {
}},
{Validity.True, "valid-warcfile-duplicate-concurrentto.warc", new String[][] {
{}
}, new String[] {
}},
{Validity.True, "valid-warcfile-fields-continuation.warc", new String[][] {
{}
}, new String[] {
}},
{Validity.True, "valid-warcfile-fields-metainfo.warc", new String[][] {
{}
}, new String[] {
}},
{Validity.True, "valid-warcfile-fields-warcinfo.warc", new String[][] {
{}
}, new String[] {
}},
{Validity.True, "valid-warcfile-non-warc-headers.warc", new String[][] {
{}
}, new String[] {
}},
{Validity.True, "valid-warcfile-upper-lower-case.warc", new String[][] {
{},
{},
{},
{},
{}
}, new String[] {
}},
{Validity.True, "valid-warcfile-utf8.warc", new String[][] {
{}
}, new String[] {
}}
};
@Test
public void test_warcmodule() {
test_cases(cases, false);
}
}