/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package io.milton.zsync; import io.milton.common.BufferingOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.SequenceInputStream; import java.io.UnsupportedEncodingException; import java.nio.BufferOverflowException; import java.nio.ByteBuffer; import java.nio.charset.Charset; import java.text.ParseException; import java.util.ArrayList; import java.util.Enumeration; import java.util.Iterator; import java.util.List; import org.apache.commons.io.IOUtils; import org.apache.commons.lang.StringUtils; /** * A container for the information transmitted in a ZSync PUT upload. The information currently consists of some * headers (file length, block size, etc...), an InputStream containing a list of RelocateRanges for relocating matching blocks, * and an InputStream containing a sequence of data chunks (along with their ranges). The Upload class also contains methods for * translating to/from a stream (getInputStream and parse, respectively). * * @author Nick * */ public class Upload { /** * The character encoding used to convert Strings to bytes. The default is US-ASCII. * The methods involved in parsing assume one byte per character. */ public final static String CHARSET = "US-ASCII"; /** * The character marking the end of a line. The default is '\n' */ public final static char LF = '\n'; /** * A String that marks the beginning of a range of uploaded bytes. Currently unused. */ public String DIV = "--DIVIDER"; public final static String VERSION = "zsync"; public final static String BLOCKSIZE = "Blocksize"; public final static String FILELENGTH = "Length"; /** * The total number of bytes of new data to be transmitted. Currently Unused. */ public final static String NEWDATA = "ContentLength"; public final static String SHA_1 = "SHA-1"; public final static String RELOCATE = "Relocate"; public final static String RANGE = "Range"; private String version; private String sha1; private long blocksize; private long filelength; private InputStream relocStream; private InputStream dataStream; /** * Returns the list of headers in String format, in the proper format for upload. The * list is terminated by the LF character. * * @return A String containing the headers */ public String getParams(){ StringBuilder sbr = new StringBuilder(); sbr.append( paramString( VERSION, version ) ); sbr.append( paramString( FILELENGTH, filelength ) ); sbr.append( paramString( BLOCKSIZE, blocksize ) ); sbr.append( paramString( SHA_1, sha1 ) ); return sbr.toString(); } public static String paramString( String key, Object value ){ return key + ": " + value + LF; } /** * Constructs an empty Upload object. Its fields need to be set individually. */ public Upload(){ //this.relocList = new ArrayList<RelocateRange>(); //this.dataList = new ArrayList<DataRange>(); } /** * Parses the InputStream into an Upload object.<p/> * * The method initially parses the headers from the InputStream by reading the sequence of keys (the String preceding the first colon in each line) * and values ( the String following the colon and terminated by the LF character ) and invoking {@link #parseParam} on each key value pair. * If the key is RELOCATE, then the value is not read, but is copied into a BufferingOutputStream and stored in the relocStream field. Parsing of headers * continues until a "blank" line is reached, ie a line that is null or contains only whitespace, which indicates the beginning of the data section. * A reference to the remaining InputStream is then stored in the dataStream field.<p/> * * @param in The InputStream containing the ZSync upload * @return A filled in Upload object */ public static Upload parse(InputStream in) { Upload um = new Upload(); int bytesRead = 0; //Enables a ParseException to specify the offset try{ //Maximum number of bytes to search for delimiters int MAX_SEARCH = 1024; String key; //Parse headers until a null/all-whitespace line is encountered while ( !StringUtils.isBlank( ( key = readKey( in, MAX_SEARCH ) ) ) ) { /* * Add one to bytesRead since the delimiter was read but omitted from the String. * The final value of bytesRead may end up off by one if the end of input is reached, since no * delimiter is read in that case. */ bytesRead += key.length() + 1; key = key.trim(); if ( key.equalsIgnoreCase( RELOCATE ) ) { /* * Copies the Relocate values to a BufferingOutputStream */ BufferingOutputStream relocOut = new BufferingOutputStream( 16384 ); bytesRead += copyLine( in, 1024*1024*64, relocOut ); relocOut.close(); um.setRelocStream( relocOut.getInputStream() ); } else { /* * Key is not "Relocate", so parse header */ String value = readValue( in, MAX_SEARCH ); bytesRead += value.length() + 1; value = value.trim(); um.parseParam( key, value ); } } /* * A blank line has been read, indicating the end of the headers, so the unread * portion of the InputStream is the byte range section. */ um.setDataStream( in ); } catch ( IOException e ) { throw new RuntimeException( "Couldn't parse upload, IOException.", e ); } catch( ParseException e ){ //Set the offset of the ParseException to bytesRead ParseException ex = new ParseException( e.getMessage(), bytesRead ); throw new RuntimeException( ex ); } return um; } /** * Returns the next String terminated by one of the specified delimiters or the end of the InputStream.<p/> * * This method simply reads from an InputStream one byte at a time, up to maxsearch bytes, until it reads a byte equal to one of the delimiters * or reaches the end of the stream. It uses the CHARSET encoding to translate the bytes read into a String, which it returns with delimiter excluded, * or it throws a ParseException if maxSearch bytes are read without reaching a delimiter or the end of the stream.<p/> * * A non-buffering method is used because a buffering reader would likely pull in part of the binary data * from the InputStream. An alternative is to use a BufferedReader with a given buffer size and use * mark and reset to get back binary data pulled into the buffer. * * @param in The InputStream to read from * @param delimiters A list of byte values, each of which indicates the end of a token * @param maxsearch The maximum number of bytes to search for a delimiter * @return The String containing the CHARSET decoded String with delimiter excluded * @throws IOException * @throws ParseException If a delimiter byte is not found within maxsearch reads */ public static String readToken( InputStream in, byte[] delimiters, int maxsearch ) throws ParseException, IOException { if ( maxsearch <= 0 ) { throw new RuntimeException( "readToken: Invalid maxsearch " + maxsearch ); } ByteBuffer bytes = ByteBuffer.allocate( maxsearch ); byte nextByte; try { read: while ( ( nextByte = (byte) in.read() ) > -1 ) { for ( byte delimiter : delimiters ) { if ( nextByte == delimiter ) { break read; } } bytes.put( nextByte ); } bytes.flip(); return Charset.forName( CHARSET ).decode( bytes ).toString(); } catch ( BufferOverflowException ex ) { throw new ParseException( "Could not find delimiter within " + maxsearch + " bytes.", 0 ); } } /** * Helper method that reads the String preceding the first colon or newline in the InputStream. * * @param in The InputStream to read from * @param maxsearch The maximum number of bytes allowed in the key * @return The CHARSET encoded String that was read * @throws ParseException If a colon, newline, or end of input is not reached within maxsearch reads * @throws IOException */ private static String readKey ( InputStream in, int maxsearch ) throws ParseException, IOException { byte NEWLINE = Character.toString( LF ).getBytes( CHARSET )[0]; byte COLON = ":".getBytes( CHARSET )[0]; byte[] delimiters = { NEWLINE, COLON }; return readToken( in, delimiters, maxsearch ); } /** * Helper method that reads the String preceding the first newline in the InputStream. * * @param in The InputStream to read from * @param maxsearch The maximum number of bytes allowed in the value * @return The CHARSET encoded String that was read * @throws ParseException If a newline or end of input is not reached within maxsearch reads * @throws IOException */ public static String readValue ( InputStream in, int maxsearch ) throws ParseException, IOException { byte NEWLINE = Character.toString( LF ).getBytes( CHARSET )[0]; byte[] delimiters = { NEWLINE }; return readToken( in, delimiters, maxsearch ); } /** * A helper method that reads from an InputStream and copies to an OutputStream until the LF character is read (The LF is not * copied to the OutputStream). An exception is thrown if maxsearch bytes are read without encountering LF. This is used by {@link #parse} * to copy the relocate values into a BufferingOutputStream. * * @param in The InputStream to read from * @param maxsearch The maximum number of bytes to search for a newline * @param out The OutputStream to copy into * @return The number of bytes read from in * @throws IOException * @throws ParseException If a newline is not found within maxsearch reads */ private static int copyLine( InputStream in, int maxsearch, OutputStream out ) throws IOException, ParseException { if ( maxsearch <= 0 ) { throw new RuntimeException( "copyLine: Invalid maxsearch " + maxsearch ); } byte nextByte, bytesRead = 0; byte NEWLINE = Character.toString( LF ).getBytes( CHARSET )[0]; while ( (nextByte = (byte) in.read()) > -1 ) { if ( ++bytesRead > maxsearch ) { throw new ParseException( "Could not find delimiter within " + maxsearch + " bytes.", 0 ); } if ( nextByte == NEWLINE ) { break; } out.write( nextByte ); } return bytesRead; } /** * Parses a String header by setting the appropriate field in upload if the key is recognized * and ignoring keys that are not recognized. * * @param key The key String with leading/trailing whitespace omitted * @param value The value String with leading/trailing whitespace omitted * @throws ParseException if the value of a recognized key cannot be properly parsed */ private void parseParam( String key, String value ) throws ParseException { if (StringUtils.isBlank( key ) || StringUtils.isBlank( value )) { return; } try{ if (key.equalsIgnoreCase(VERSION)){ this.setVersion(value); } else if (key.equalsIgnoreCase(FILELENGTH)){ this.setFilelength(Long.parseLong(value)); } else if (key.equalsIgnoreCase(BLOCKSIZE)){ this.setBlocksize(Long.parseLong(value)); } else if (key.equalsIgnoreCase(SHA_1)){ this.setSha1( value ); } } catch (NumberFormatException ex) { throw new ParseException( "Cannot parse " + value + " into a long.", -1 ); } } /** * Returns an InputStream containing a complete ZSync upload (Params, Relocate stream, and ByteRange stream), * ready to be sent as the body of a PUT request. <p/> * * Note: In this implementation, any temporary file used to store the RelocateRanges will be automatically deleted when this stream * is closed, so a second invocation of this method on the same Upload object is likely to throw an exception. * Therefore, this method should be used only once per Upload object. * * @return The complete ZSync upload * @throws UnsupportedEncodingException * @throws IOException */ public InputStream getInputStream() throws UnsupportedEncodingException, IOException{ List<InputStream> streamList = new ArrayList<InputStream>(); /* * The getParams and getRelocStream must be terminated by a single LF character. */ streamList.add( IOUtils.toInputStream( getParams() , CHARSET ) ); streamList.add( IOUtils.toInputStream( RELOCATE + ": ", CHARSET ) ); streamList.add( getRelocStream() ); /* Prepend the data portion with a blank line. */ streamList.add( IOUtils.toInputStream( Character.toString( LF ), CHARSET) ); streamList.add( getDataStream() ); return new SequenceInputStream( new IteratorEnum<InputStream>( streamList ) ); } /** * Gets the zsync version of the upload sender (client) */ public String getVersion() { return version; } /** * Sets the zsync version of the upload sender (client) */ public void setVersion(String version) { this.version = version; } /** * Gets the checksum for the entire source file */ public String getSha1() { return sha1; } /** * Sets the checksum for the entire source file, which allow the server to validate the new file * after assembling it. */ public void setSha1(String sha1) { this.sha1 = sha1; } /** * Gets the blocksize used in the upload. */ public long getBlocksize() { return blocksize; } /** * Sets the blocksize used in the upload. The server needs this to translate block ranges into byte ranges */ public void setBlocksize(long blocksize) { //System.out.println("Upload: setBlockSize: " + blocksize); this.blocksize = blocksize; } /** * Gets the length of the (assembled) source file being uploaded */ public long getFilelength() { return filelength; } /** * Sets the length of the (assembled) source file being uploaded */ public void setFilelength(long filelength) { this.filelength = filelength; } /** * * Gets the list of RelocateRanges, which tells the server which blocks of the previous * file to keep, and where to place them in the new file. The current format is a comma * separated list terminated by LF. * */ public InputStream getRelocStream() { return relocStream; } /** * * Sets the list of RelocateRanges, which tells the server which blocks of the previous * file to keep, and where to place them in the new file. The current format is a comma * separated list terminated by LF. * * @param relocStream */ public void setRelocStream(InputStream relocStream) { this.relocStream = relocStream; } /** * Gets the list of uploaded data chunks ( byte Ranges and their associated data ). */ public InputStream getDataStream() { return dataStream; } /** * Sets the list of data chunks to be uploaded ( byte Ranges and their associated data ). The stream * should contain no leading whitespace. * */ public void setDataStream(InputStream dataStream) { this.dataStream = dataStream; } /** * An <code>Enumeration</code> wrapper for an Iterator. This is needed in order to construct * a <code>SequenceInputStream</code> (used to concatenate upload sections), which takes an <code>Enumeration</code> argument. * * @author Nick * * @param <T> The type of object being enumerated */ public static class IteratorEnum <T> implements Enumeration<T>{ Iterator<T> iter; public IteratorEnum( List<T> list ) { this.iter = list.iterator(); } @Override public boolean hasMoreElements() { return iter.hasNext(); } @Override public T nextElement() { return iter.next(); } } /** * An object representing a (Key, Value) pair of Strings. Currently unused. * * @author Nick * */ public static class KeyValue { public String KEY; public String VALUE; public KeyValue ( String key, String value ) { this.KEY = key; this.VALUE = value; } /** * Parses a String of the form "foo: bar" into a KeyValue object whose KEY is the * String preceding the first colon and VALUE is the String following the first colon * ( leading and trailing whitespaces are removed from KEY and VALUE ). A ParseException is * thrown if the input String does not contain a colon. * * @param kv A String of the form "foo: bar" * @return A KeyValue object with a KEY of "foo" and a VALUE of "bar" * @throws ParseException If no colon is found in <b>kv</b> */ public static KeyValue parseKV( String kv ) throws ParseException { int colonIndex = kv.indexOf(':'); if (colonIndex == -1){ throw new ParseException("No colon found in \"" + kv + "\"", colonIndex); } String key = kv.substring(0, colonIndex).trim(); String value = kv.substring(colonIndex + 1).trim(); return new KeyValue( key, value ); } } }