package org.archive.server; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.RandomAccessFile; import java.util.logging.Level; import java.util.logging.Logger; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.archive.format.gzip.GZIPFormatException; import org.archive.format.gzip.GZIPMemberSeries; import org.archive.format.gzip.GZIPSeriesMember; import org.archive.streamcontext.SimpleStream; import org.mortbay.jetty.Request; import org.mortbay.jetty.Server; import org.mortbay.jetty.handler.AbstractHandler; import com.google.common.io.ByteStreams; import com.google.common.io.LimitInputStream; public class GZRangeServer extends AbstractHandler implements Tool { public final static String TOOL_NAME = "gzrange-server"; public static final String TOOL_DESCRIPTION = "Run a special gzrange HTTP server."; private Configuration conf; public void setConf(Configuration conf) { this.conf = conf; } public Configuration getConf() { return conf; } private static int USAGE(int code) { System.err.println("USAGE"); System.err.println(TOOL_NAME + " [PORT]"); System.err.println("run a Jetty HTTP server listening on PORT (or 8009 if omitted)"); System.err.println("The server handles unbounded HTTP 1.1 Range requests for GZ members in a special"); System.err.println("fashion: it will scan ahead to determing the compressed length of the gzip member"); System.err.println("starting at the range start offset, and will return the exact number"); System.err.println("of compressed bytes in the member, including setting the Content-Length response header."); return code; } public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new GZRangeServer(), args); System.exit(res); } public int run(String[] args) throws Exception { Logger.getLogger(GZIPMemberSeries.class.getName()).setLevel(Level.WARNING); int port = 8009; if(args.length > 0) { try { port = Integer.parseInt(args[0]); } catch(NumberFormatException e) { return USAGE(1); } } Server server = new Server(port); server.setHandler(new GZRangeServer()); server.start(); server.join(); return 0; } private long parseBytes(String range) { if(range == null) { return -1; } if(range.startsWith("bytes=")) { if(range.endsWith("-")) { String rem = range.substring(6,range.length()-1); try { return Long.parseLong(rem); } catch(NumberFormatException e) { } } } return -1; } private long getGZLength(InputStream is) throws IOException, GZIPFormatException { SimpleStream s = new SimpleStream(is); GZIPMemberSeries gzs = new GZIPMemberSeries(s,"range",0,true); GZIPSeriesMember m = gzs.getNextMember(); m.skipMember(); return m.getCompressedBytesRead(); } public void handle(String target, HttpServletRequest request, HttpServletResponse response, int dispatch) throws IOException, ServletException { File file = new File(target); if(file.isFile()) { if(file.canRead()) { String range = request.getHeader("Range"); long offset = parseBytes(range); if(offset == -1) { range = request.getParameter("offset"); if(range != null) { try { offset = Long.parseLong(range); } catch(NumberFormatException e) { } } } if(range == null) { range = "null"; } if(offset == -1) { response.setContentType("text/plain;charset=utf-8"); response.setStatus(HttpServletResponse.SC_BAD_REQUEST); response.getWriter().println("Require \"Range\" header " + "or \"offset\" GET parameter"); } else { long length = file.length(); if(offset > length) { response.setContentType("text/plain;charset=utf-8"); response.setStatus(HttpServletResponse.SC_REQUESTED_RANGE_NOT_SATISFIABLE); response.getWriter().println("range past EOF"); } else { // OK, find then end of the record: RandomAccessFile raf = new RandomAccessFile(file, "r"); raf.seek(offset); FileInputStream fis = new FileInputStream(raf.getFD()); long gzLength = -1; try { gzLength = getGZLength(fis); } catch(GZIPFormatException e) { } if(gzLength == -1) { response.setContentType("text/plain;charset=utf-8"); response.setStatus(HttpServletResponse.SC_UNSUPPORTED_MEDIA_TYPE); response.getWriter().println("corrupt range, or gzip alignment error"); } else { raf.seek(offset); fis = new FileInputStream(raf.getFD()); response.setContentType("application/octet-stream"); response.setStatus(HttpServletResponse.SC_PARTIAL_CONTENT); response.setContentLength((int)gzLength); LimitInputStream lis = new LimitInputStream(fis, gzLength); long copied = ByteStreams.copy(lis, response.getOutputStream()); if(copied != gzLength) { throw new IOException("Short copy Want(" + gzLength + ") copied(" + copied + ")"); } } } } } else { response.setContentType("text/plain;charset=utf-8"); response.setStatus(HttpServletResponse.SC_FORBIDDEN); response.getWriter().println("not readable\n"); } } else { response.setContentType("text/plain;charset=utf-8"); response.setStatus(HttpServletResponse.SC_NOT_FOUND); response.getWriter().println("not found\n"); } ((Request)request).setHandled(true); } }