/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.test;
import static org.apache.pig.ExecType.MAPREDUCE;
import java.io.File;
import java.io.FileOutputStream;
import java.util.HashMap;
import java.util.Iterator;
import junit.framework.TestCase;
import org.apache.pig.PigServer;
import org.apache.pig.backend.local.datastorage.LocalSeekableInputStream;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.tools.bzip2r.CBZip2InputStream;
import org.apache.tools.bzip2r.CBZip2OutputStream;
import org.junit.Test;
public class TestBZip extends TestCase {
MiniCluster cluster = MiniCluster.buildCluster();
/**
* Tests the end-to-end writing and reading of a BZip file.
*/
@Test
public void testBzipInPig() throws Exception {
PigServer pig = new PigServer(MAPREDUCE);
try {
pig.deleteFile("junit-out.bz");
} catch (Exception e) {
}
File in = File.createTempFile("junit", ".bz");
in.deleteOnExit();
File out = File.createTempFile("junit", ".bz");
out.deleteOnExit();
out.delete();
CBZip2OutputStream cos = new CBZip2OutputStream(
new FileOutputStream(in));
for (int i = 1; i < 100; i++) {
cos.write((i + "\n").getBytes());
cos.write((-i + "\n").getBytes());
}
cos.close();
pig.registerQuery("AA=load '" + Util.generateURI(in.getAbsolutePath()) + "';");
pig.registerQuery("A=foreach (group (filter AA by $0 > 0) all) generate flatten($1);");
pig.store("A", Util.generateURI(out.getAbsolutePath()));
CBZip2InputStream cis = new CBZip2InputStream(
new LocalSeekableInputStream(new File(out, "part-00000.bz")));
// Just a sanity check, to make sure it was a bzip file; we
// will do the value verification later
assertEquals(100, cis.read(new byte[100]));
cis.close();
pig.registerQuery("B=load '" + Util.generateURI(out.getAbsolutePath()) + "';");
Iterator<Tuple> i = pig.openIterator("B");
HashMap<Integer, Integer> map = new HashMap<Integer, Integer>();
while (i.hasNext()) {
Integer val = DataType.toInteger(i.next().get(0));
map.put(val, val);
}
assertEquals(new Integer(99), new Integer(map.keySet().size()));
for(int j = 1; j < 100; j++) {
assertEquals(new Integer(j), map.get(j));
}
in.delete();
out.delete();
}
/**
* Tests the end-to-end writing and reading of an empty BZip file.
*/
@Test
public void testEmptyBzipInPig() throws Exception {
PigServer pig = new PigServer(MAPREDUCE);
try {
pig.deleteFile("junit-out.bz");
} catch (Exception e) {
}
File in = File.createTempFile("junit", ".tmp");
in.deleteOnExit();
File out = File.createTempFile("junit", ".bz");
out.deleteOnExit();
out.delete();
FileOutputStream fos = new FileOutputStream(in);
fos.write("55\n".getBytes());
fos.close();
System.out.println(in.getAbsolutePath());
pig.registerQuery("AA=load '" + Util.generateURI(in.getAbsolutePath()) + "';");
pig
.registerQuery("A=foreach (group (filter AA by $0 < '0') all) generate flatten($1);");
pig.store("A", Util.generateURI(out.getAbsolutePath()));
CBZip2InputStream cis = new CBZip2InputStream(
new LocalSeekableInputStream(new File(out, "part-00000.bz")));
assertEquals(-1, cis.read(new byte[100]));
cis.close();
pig.registerQuery("B=load '" + Util.generateURI(out.getAbsolutePath()) + "';");
pig.openIterator("B");
in.delete();
out.delete();
}
/**
* Tests the writing and reading of an empty BZip file.
*/
@Test
public void testEmptyBzip() throws Exception {
File tmp = File.createTempFile("junit", ".tmp");
tmp.deleteOnExit();
CBZip2OutputStream cos = new CBZip2OutputStream(new FileOutputStream(
tmp));
cos.close();
assertNotSame(0, tmp.length());
CBZip2InputStream cis = new CBZip2InputStream(
new LocalSeekableInputStream(tmp));
assertEquals(-1, cis.read(new byte[100]));
cis.close();
tmp.delete();
}
}