/* * This file is part of the Heritrix web crawler (crawler.archive.org). * * Licensed to the Internet Archive (IA) by one or more individual * contributors. * * The IA licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.archive.util.zip; import java.io.ByteArrayInputStream; import java.io.IOException; import java.util.Iterator; import java.util.Random; import junit.framework.TestCase; import org.apache.commons.io.IOUtils; import org.archive.util.ArchiveUtils; import org.archive.util.zip.GZIPMembersInputStream; import com.google.common.io.NullOutputStream; import com.google.common.primitives.Bytes; /** * Tests for GZIPMembersInputStream * @contributor gojomo * @version $ $ */ public class GZIPMembersInputStreamTest extends TestCase { byte[] noise1k_gz; byte[] noise32k_gz; byte[] a_gz; byte[] hello_gz; byte[] allfour_gz; byte[] sixsmall_gz; { Random rand = new Random(1); try { byte[] buf = new byte[1024]; rand.nextBytes(buf); noise1k_gz = ArchiveUtils.gzip(buf); buf = new byte[32*1024]; rand.nextBytes(buf); noise32k_gz = ArchiveUtils.gzip(buf); a_gz = ArchiveUtils.gzip("a".getBytes("ASCII")); hello_gz = ArchiveUtils.gzip("hello".getBytes("ASCII")); allfour_gz = Bytes.concat(noise1k_gz,noise32k_gz,a_gz,hello_gz); sixsmall_gz = Bytes.concat(a_gz,hello_gz,a_gz,hello_gz,a_gz,hello_gz); } catch (IOException e) { // should not happen } } public static void main(String [] args) { junit.textui.TestRunner.run(GZIPMembersInputStreamTest.class); } public void testFullReadAllFour() throws IOException { GZIPMembersInputStream gzin = new GZIPMembersInputStream(new ByteArrayInputStream(allfour_gz)); int count = IOUtils.copy(gzin, new NullOutputStream()); assertEquals("wrong length uncompressed data", 1024+(32*1024)+1+5, count); } public void testFullReadSixSmall() throws IOException { GZIPMembersInputStream gzin = new GZIPMembersInputStream(new ByteArrayInputStream(sixsmall_gz)); int count = IOUtils.copy(gzin, new NullOutputStream()); assertEquals("wrong length uncompressed data", 1+5+1+5+1+5, count); } public void testReadPerMemberAllFour() throws IOException { GZIPMembersInputStream gzin = new GZIPMembersInputStream(new ByteArrayInputStream(allfour_gz)); gzin.setEofEachMember(true); int count0 = IOUtils.copy(gzin, new NullOutputStream()); assertEquals("wrong 1k member count", 1024, count0); assertEquals("wrong member number", 0, gzin.getMemberNumber()); assertEquals("wrong member0 start", 0, gzin.getCurrentMemberStart()); assertEquals("wrong member0 end", noise1k_gz.length, gzin.getCurrentMemberEnd()); gzin.nextMember(); int count1 = IOUtils.copy(gzin, new NullOutputStream()); assertEquals("wrong 32k member count", (32*1024), count1); assertEquals("wrong member number", 1, gzin.getMemberNumber()); assertEquals("wrong member1 start", noise1k_gz.length, gzin.getCurrentMemberStart()); assertEquals("wrong member1 end", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberEnd()); gzin.nextMember(); int count2 = IOUtils.copy(gzin, new NullOutputStream()); assertEquals("wrong 1-byte member count", 1, count2); assertEquals("wrong member number", 2, gzin.getMemberNumber()); assertEquals("wrong member2 start", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberStart()); assertEquals("wrong member2 end", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberEnd()); gzin.nextMember(); int count3 = IOUtils.copy(gzin, new NullOutputStream()); assertEquals("wrong 5-byte member count", 5, count3); assertEquals("wrong member number", 3, gzin.getMemberNumber()); assertEquals("wrong member3 start", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberStart()); assertEquals("wrong member3 end", noise1k_gz.length+noise32k_gz.length+a_gz.length+hello_gz.length, gzin.getCurrentMemberEnd()); gzin.nextMember(); int countEnd = IOUtils.copy(gzin, new NullOutputStream()); assertEquals("wrong eof count", 0, countEnd); } public void testReadPerMemberSixSmall() throws IOException { GZIPMembersInputStream gzin = new GZIPMembersInputStream(new ByteArrayInputStream(sixsmall_gz)); gzin.setEofEachMember(true); for(int i = 0; i < 3; i++) { int count2 = IOUtils.copy(gzin, new NullOutputStream()); assertEquals("wrong 1-byte member count", 1, count2); gzin.nextMember(); int count3 = IOUtils.copy(gzin, new NullOutputStream()); assertEquals("wrong 5-byte member count", 5, count3); gzin.nextMember(); } int countEnd = IOUtils.copy(gzin, new NullOutputStream()); assertEquals("wrong eof count", 0, countEnd); } public void testByteReadPerMember() throws IOException { GZIPMembersInputStream gzin = new GZIPMembersInputStream(new ByteArrayInputStream(allfour_gz)); gzin.setEofEachMember(true); int count0 = 0; while(gzin.read()>-1) count0++; assertEquals("wrong 1k member count", 1024, count0); assertEquals("wrong member number", 0, gzin.getMemberNumber()); assertEquals("wrong member0 start", 0, gzin.getCurrentMemberStart()); assertEquals("wrong member0 end", noise1k_gz.length, gzin.getCurrentMemberEnd()); gzin.nextMember(); int count1 = 0; while(gzin.read()>-1) count1++; assertEquals("wrong 32k member count", (32*1024), count1); assertEquals("wrong member number", 1, gzin.getMemberNumber()); assertEquals("wrong member1 start", noise1k_gz.length, gzin.getCurrentMemberStart()); assertEquals("wrong member1 end", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberEnd()); gzin.nextMember(); int count2 = 0; while(gzin.read()>-1) count2++; assertEquals("wrong 1-byte member count", 1, count2); assertEquals("wrong member number", 2, gzin.getMemberNumber()); assertEquals("wrong member2 start", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberStart()); assertEquals("wrong member2 end", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberEnd()); gzin.nextMember(); int count3 = 0; while(gzin.read()>-1) count3++; assertEquals("wrong 5-byte member count", 5, count3); assertEquals("wrong member number", 3, gzin.getMemberNumber()); assertEquals("wrong member3 start", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberStart()); assertEquals("wrong member3 end", noise1k_gz.length+noise32k_gz.length+a_gz.length+hello_gz.length, gzin.getCurrentMemberEnd()); gzin.nextMember(); int countEnd = 0; while(gzin.read()>-1) countEnd++; assertEquals("wrong eof count", 0, countEnd); } public void testMemberSeek() throws IOException { GZIPMembersInputStream gzin = new GZIPMembersInputStream(new ByteArrayInputStream(allfour_gz)); gzin.setEofEachMember(true); gzin.compressedSeek(noise1k_gz.length+noise32k_gz.length); int count2 = IOUtils.copy(gzin, new NullOutputStream()); assertEquals("wrong 1-byte member count", 1, count2); // assertEquals("wrong Member number", 2, gzin.getMemberNumber()); assertEquals("wrong Member2 start", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberStart()); assertEquals("wrong Member2 end", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberEnd()); gzin.nextMember(); int count3 = IOUtils.copy(gzin, new NullOutputStream()); assertEquals("wrong 5-byte member count", 5, count3); // assertEquals("wrong Member number", 3, gzin.getMemberNumber()); assertEquals("wrong Member3 start", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberStart()); assertEquals("wrong Member3 end", noise1k_gz.length+noise32k_gz.length+a_gz.length+hello_gz.length, gzin.getCurrentMemberEnd()); gzin.nextMember(); int countEnd = IOUtils.copy(gzin, new NullOutputStream()); assertEquals("wrong eof count", 0, countEnd); } @SuppressWarnings("deprecation") public void testMemberIterator() throws IOException { GZIPMembersInputStream gzin = new GZIPMembersInputStream(new ByteArrayInputStream(allfour_gz)); Iterator<GZIPMembersInputStream> iter = gzin.memberIterator(); assertTrue(iter.hasNext()); GZIPMembersInputStream gzMember0 = iter.next(); int count0 = IOUtils.copy(gzMember0, new NullOutputStream()); assertEquals("wrong 1k member count", 1024, count0); assertEquals("wrong member number", 0, gzin.getMemberNumber()); assertEquals("wrong member0 start", 0, gzin.getCurrentMemberStart()); assertEquals("wrong member0 end", noise1k_gz.length, gzin.getCurrentMemberEnd()); assertTrue(iter.hasNext()); GZIPMembersInputStream gzMember1 = iter.next(); int count1 = IOUtils.copy(gzMember1, new NullOutputStream()); assertEquals("wrong 32k member count", (32*1024), count1); assertEquals("wrong member number", 1, gzin.getMemberNumber()); assertEquals("wrong member1 start", noise1k_gz.length, gzin.getCurrentMemberStart()); assertEquals("wrong member1 end", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberEnd()); assertTrue(iter.hasNext()); GZIPMembersInputStream gzMember2 = iter.next(); int count2 = IOUtils.copy(gzMember2, new NullOutputStream()); assertEquals("wrong 1-byte member count", 1, count2); assertEquals("wrong member number", 2, gzin.getMemberNumber()); assertEquals("wrong member2 start", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberStart()); assertEquals("wrong member2 end", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberEnd()); assertTrue(iter.hasNext()); GZIPMembersInputStream gzMember3 = iter.next(); int count3 = IOUtils.copy(gzMember3, new NullOutputStream()); assertEquals("wrong 5-byte member count", 5, count3); assertEquals("wrong member number", 3, gzin.getMemberNumber()); assertEquals("wrong member3 start", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberStart()); assertEquals("wrong member3 end", noise1k_gz.length+noise32k_gz.length+a_gz.length+hello_gz.length, gzin.getCurrentMemberEnd()); assertFalse(iter.hasNext()); } }