/*
* This file is part of the Wayback archival access software
* (http://archive-access.sourceforge.net/projects/wayback/).
*
* Licensed to the Internet Archive (IA) by one or more individual
* contributors.
*
* The IA licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.archive.wayback.resourceindex.ziplines;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.logging.Logger;
import java.util.zip.GZIPInputStream;
import org.archive.util.zip.OpenJDK7GZIPInputStream;
import org.archive.wayback.util.ByteOp;
/**
* @author brad
*
*/
public class ZiplinedBlock {
private static final Logger LOGGER = Logger.getLogger(
ZiplinedBlock.class.getName());
BlockLoader loader = null;
String urlOrPaths[] = null;
long offset = -1;
public final static int BLOCK_SIZE = 128 * 1024;
int count = BLOCK_SIZE;
public final static String RANGE_HEADER = "Range";
public final static String BYTES_HEADER = "bytes=";
public final static String BYTES_MINUS = "-";
/**
* @param urlOrPaths Array of URLs where this file can be downloaded.
* @param offset start of 128K block boundary.
*/
public ZiplinedBlock(String urlOrPaths[], long offset) {
this(urlOrPaths,offset,BLOCK_SIZE);
}
/**
* @param urlOrPaths Array of URLs where this file can be downloaded.
* @param offset start of 128K block boundary.
* @param count number of records in this block
*/
public ZiplinedBlock(String urlOrPaths[], long offset, int count) {
this.urlOrPaths = urlOrPaths;
this.offset = offset;
this.count = count;
}
/**
* @param loader the RemoteHttp11BlockLoader to use when fetching this block
*/
public void setLoader(BlockLoader loader) {
this.loader = loader;
}
/**
* @return a BufferedReader of the underlying compressed data in this block
* @throws IOException for usual reasons
*/
public BufferedReader readBlock() throws IOException {
if(loader != null) {
return readBlockEfficiently(loader);
}
return readBlockInefficiently();
}
protected byte[] attemptBlockLoad(BlockLoader remote) {
for(String urlOrPath : urlOrPaths) {
try {
return remote.getBlock(urlOrPath, offset, count);
} catch (IOException e) {
LOGGER.warning(String.format("FAILED to load(%s) (%d:%d)",
urlOrPath,offset,count));
}
}
return null;
}
protected BufferedReader readBlockEfficiently(BlockLoader remote)
throws IOException {
byte bytes[] = attemptBlockLoad(remote);
if(bytes == null) {
throw new IOException("Unable to load block!");
}
return new BufferedReader(new InputStreamReader(
new OpenJDK7GZIPInputStream(new ByteArrayInputStream(bytes)),
ByteOp.UTF8));
// return new BufferedReader(new InputStreamReader(
// new GZIPInputStream(new ByteArrayInputStream(bytes)),
// ByteOp.UTF8));
}
protected BufferedReader readBlockInefficiently() throws IOException {
StringBuilder sb = new StringBuilder(16);
sb.append(BYTES_HEADER).append(offset).append(BYTES_MINUS);
sb.append((offset + count)-1);
LOGGER.fine("Reading block:" + urlOrPaths[0] + "("+sb.toString()+")");
// TODO: timeouts
URL u = new URL(urlOrPaths[0]);
URLConnection uc = u.openConnection();
uc.setRequestProperty(RANGE_HEADER, sb.toString());
return new BufferedReader(new InputStreamReader(
new GZIPInputStream(uc.getInputStream()),ByteOp.UTF8));
}
}