package com.yahoo.dtf.util; import java.io.InputStream; import java.io.IOException; import java.util.ArrayList; import java.util.zip.GZIPInputStream; import com.yahoo.dtf.actions.Action; import com.yahoo.dtf.exception.DTFException; import com.yahoo.dtf.exception.StorageException; import com.yahoo.dtf.storage.StorageIntf; /* * Using this class from bug id 4691425 in java... since the GZIPInputStream is * to special to handle the case of multiple files in the same gzip file. * * This was copied from the bug as a possible workaround and seems to solve * the problem quite well. * * Link to bug: http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4691425 * */ public class MultiMemberGZIPInputStream extends InputStream { private ArrayList<GZIPInputStream> entities = null; private String filename = null; private StorageIntf storage = null; private InputStream in = null; private GZIPInputStreamWrapper gzipin = null; public MultiMemberGZIPInputStream(String filename, StorageIntf storage) throws IOException, StorageException { this.filename = filename; this.storage = storage; in = storage.getInputStream(filename); gzipin = new GZIPInputStreamWrapper(in); } private void init() throws DTFException { Action.getLogger().debug("Calculating gzip boundaries..."); entities = new ArrayList<GZIPInputStream>(); int read = 0; int previous = 0; int now = 0; int count = 0; long start = System.currentTimeMillis(); try { byte[] mainBuffer = new byte[32*1024]; int mainRead = 0; InputStream in = storage.getInputStream(filename); while ( mainRead != -1 ) { // read a chunk of data then process the bytes for the right // gzip header signature. mainRead = in.read(mainBuffer); for (int i = 0; i < mainRead; i++) { byte aux = mainBuffer[i]; now = aux & 0xFF; // unsignedInt = signedByte & 0xFF; read++; int value = now << 8 | previous; previous = now; if (value == GZIPInputStream.GZIP_MAGIC) { InputStream auxin = storage.getInputStream(filename); int auxread = 0; try { /* * Figure out how many gzip files exist in the * multimember gzip file. First time is a test where * we parse the header and read 1KB of data to see * if there is anything completely messed up */ int skip = read-2; while ( (skip -= auxin.skip(skip)) != 0); GZIPInputStream gzipin = new GZIPInputStream(auxin); byte[] buffer = new byte[32*1024]; // read through the whole file and validate its really // a gzip file, horribly ineffecient but the only way of // being sure right now... XXX: ineffeciency. while ( (auxread = gzipin.read(buffer)) != -1 ); /* * Second time save the pointer to the file. */ gzipin.close(); auxin = storage.getInputStream(filename); skip = read-2; while ( (skip -= auxin.skip(skip)) != 0); gzipin = new GZIPInputStream(auxin); entities.add(gzipin); count++; } catch (IOException ignore) { if (auxread != 0) auxin.close(); } } } } } catch (IOException e) { throw new DTFException("Unable to handle multi part gzip file",e); } finally { if (in != null) { try { in.close(); } catch (IOException e ) { throw new DTFException("Unable to handle multi part gzip file",e); } } } long stop = System.currentTimeMillis(); if (Action.getLogger().isDebugEnabled()) { Action.getLogger().debug("Gzip preprocessing took " + (stop-start) + "ms"); } } public synchronized int getEntityCount() throws DTFException { if (entities == null) init(); return entities.size(); } public GZIPInputStream getEntity(int index) { return entities.get(index); } public int read(byte[] b) throws IOException { if ( entities != null ) { if (entities.size() == 0) { return -1; } int read = entities.get(0).read(b); if (read == -1) { entities.remove(0); return read(b); } return read; } else { return gzipin.read(b); } } public int read(byte[] b, int off, int len) throws IOException { if ( entities != null ) { if (entities.size() == 0) { return -1; } int read = entities.get(0).read(b, off, len); if (read == -1) { entities.remove(0); return read(b, off, len); } return read; } else { return gzipin.read(b, off, len); } } public int read() throws IOException { if ( entities != null ) { if (entities.size() == 0) { return -1; } int read = entities.get(0).read(); if (read == -1) { entities.remove(0); return read(); } return read; } else { return gzipin.read(); } } public void close() throws IOException { if ( entities != null ) { for (int i = 0; i < entities.size(); i++) { entities.get(i).close(); } } if ( gzipin != null ) gzipin.close(); } }