QueryRouteTable.java example

Explorer
learning-bittorrent-master
package com.limegroup.gnutella.routing;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.zip.DataFormatException;
import java.util.zip.Inflater;

import com.limegroup.gnutella.Assert;
import com.limegroup.gnutella.URN;
import com.limegroup.gnutella.messages.BadPacketException;
import com.limegroup.gnutella.messages.QueryRequest;
import com.limegroup.gnutella.util.BitSet;
import com.limegroup.gnutella.util.Utilities;
import com.limegroup.gnutella.util.IOUtils;
import com.limegroup.gnutella.xml.LimeXMLDocument;
import com.limegroup.gnutella.settings.ConnectionSettings;

//Please note that < and > are the HTML escapes for '<' and '>'.

/**
 * A list of query keywords that a connection can respond to, as well as the
 * minimum TTL for a response.  More formally, a QueryRouteTable is a (possibly
 * infinite!) list of keyword TTL pairs, [ <keyword_1, ttl_1>, ...,
 * <keywordN, ttl_N> ]  <p>
 *
 * 10/08/2002 - A day after Susheel's birthday, he decided to change this class
 * for the heck of it.  Kidding.  Functionality has been changed so that keyword
 * depth is 'constant' - meaning that if a keyword is added, then any contains
 * query regarding that keyword will return true.  This is because this general
 * idea of QRTs is only used in a specialized way in LW - namely, UPs use it for
 * their leaves ONLY, so the depth is always 1.  If you looking for a keyword
 * and it is in the table, a leaf MAY have it, so return true.  This only
 * needed a one line change.
 *
 * 12/05/2003 - Two months after Susheel's birthday, this class was changed to
 * once again accept variable infinity values.  Over time, optimizations had
 * removed the ability for a QueryRouteTable to have an infinity that wasn't
 * 7.  However, nothing outright checked that, so patch messages that were
 * based on a non-7 infinity were silently failing (always stayed empty).
 * In practice, we could probably even change the infinity to 2, and change
 * change the number of entryBits to 2, with the keywordPresent and
 * keywordAbsent values going to 1 and -1, cutting the size of our patch
 * messages further in half (a quarter of the original size).  This would
 * probably require upgrading the X-Query-Routing to another version.
 *
 * <b>This class is NOT synchronized.</b>
 */
public class QueryRouteTable {
    /** 
     * The suggested default max table TTL.
     */
    public static final byte DEFAULT_INFINITY=(byte)7;
    /** What should come across the wire if a keyword status is unchanged. */
    public static final byte KEYWORD_NO_CHANGE=(byte)0;
    /** The maximum size of patch messages, in bytes. */
    public static final int MAX_PATCH_SIZE=1<<12;      //4 KB
    
    /**
     * The current infinity this table is using.  Necessary for creating
     * ResetTableMessages with the correct infinity.
     */
    private byte infinity;
    
    /**
     * What should come across the wire if a keyword is present.
     * The nature of this value is dependent on the infinity of the
     * ResetTableMessage.
     */
    private byte keywordPresent;
    
    /**
     * What should come across the wire if a keyword is absent.
     * The nature of thsi value is dependent on the infinity of the
     * ResetTableMessage.
     */
    private byte keywordAbsent;

    /** The *new* table implementation.  The table of keywords - each value in
     *  the BitSet is either 'true' or 'false' - 'true' signifies that a keyword
     *  match MAY be at a leaf 1 hop away, whereas 'false' signifies it isn't.
     *  QRP is really not used in full by the Gnutella Ultrapeer protocol, hence
     *  the easy optimization of only using BitSets.
     */
    private BitSet bitTable;
    
    /**
     * The cached resized QRT.
     */
    private QueryRouteTable resizedQRT = null;

    /** The 'logical' length of the BitSet.  Needed because the BitSet accessor
     *  methods don't seem to offer what is needed.
     */
    private int bitTableLength;

    /** The last message received of current sequence, or -1 if none. */
    private int sequenceNumber;
    /** The size of the current sequence, or -1 if none. */
    private int sequenceSize;

    /** The index of the next table entry to patch. */
    private int nextPatch;
    /** The uncompressor. This state must be maintained to implement chunked
     *  PATCH messages.  (You may need data from message N-1 to apply the patch
     *  in message N.) */
    private Inflater uncompressor;



    /////////////////////////////// Basic Methods ///////////////////////////


    /** Creates a QueryRouteTable with default sizes. */
    public QueryRouteTable() {
        long byteCount = 1024 * ConnectionSettings.QRT_SIZE_IN_KIBI_ENTRIES.getValue();
        if (byteCount > Integer.MAX_VALUE) {
            throw new IllegalArgumentException("Default QRT size cannot be expressed as an int.");
        }
        initialize((int)byteCount, DEFAULT_INFINITY);
    }

    /**
     * Creates a new <tt>QueryRouteTable</tt> instance with the specified
     * size.  This <tt>QueryRouteTable</tt> will be completely empty with
     * no keywords -- no queries will have hits in this route table until
     * patch messages are received.
     *
     * @param size the size of the query routing table
     */
    public QueryRouteTable(int size) {
        this(size, DEFAULT_INFINITY);
    }
    
    /**
     * Creates a new <tt>QueryRouteTable</tt> instance with the specified
     * size and infinity.  This <tt>QueryRouteTable</tt> will be completely 
     * empty with no keywords -- no queries will have hits in this route 
     * table until patch messages are received.
     *
     * @param size the size of the query routing table
     * @param infinity the infinity to use
     */
    public QueryRouteTable(int size, byte infinity) {
        initialize(size, infinity);
    }    

    /**
     * Initializes this <tt>QueryRouteTable</tt> to the specified size.
     * This table will be empty until patch messages are received.
     *
     * @param size the size of the query route table
     */
    private void initialize(int size, byte infinity) {
        this.bitTableLength = size;
        this.bitTable = new BitSet();
        this.sequenceNumber = -1;
        this.sequenceSize = -1;
        this.nextPatch = 0;
        this.keywordPresent = (byte)(1 - infinity);
        this.keywordAbsent = (byte)(infinity - 1);
        this.infinity = infinity;
    }
    
    /**
     * Returns the size of this QueryRouteTable.
     */
    public int getSize() {
        return bitTableLength;
    }    
    
    /**
     * Returns the percentage of slots used in this QueryRouteTable's BitTable.
     * The return value is from 0 to 100.
     */
    public double getPercentFull() {
        double set = bitTable.cardinality();
        return ( set / bitTableLength ) * 100.0;
	}
	
	/**
	 * Returns the number of empty elements in the table.
	 */
	public int getEmptyUnits() {
	    return bitTable.unusedUnits();
	}
	
	/**
	 * Returns the total number of units allocated for storage.
	 */
	public int getUnitsInUse() {
	    return bitTable.getUnitsInUse();
	}

    /**
     * Returns true if a response could be generated for qr.  Note that a return
     * value of true does not necessarily mean that a response will be
     * generated--just that it could.  It is assumed that qr's TTL has already
     * been decremented, i.e., is the outbound not inbound TTL.  
     */
    public boolean contains(QueryRequest qr) {
        byte bits=Utilities.log2(bitTableLength);

        //1. First we check that all the normal keywords of qr are in the route
        //   table.  Note that this is done with zero allocations!  Also note
        //   that HashFunction.hash() takes cares of the capitalization.
        String query = qr.getQuery();
        LimeXMLDocument richQuery = qr.getRichQuery();
		if(query.length() == 0 && 
		   richQuery == null && 
		   !qr.hasQueryUrns()) {
			return false;
		}
		if(qr.hasQueryUrns()) {
			Set urns = qr.getQueryUrns();
			Iterator iter = urns.iterator();
			while(iter.hasNext()) {
				URN qurn = (URN)iter.next();
				int hash = HashFunction.hash(qurn.toString(), bits);
				if(contains(hash)) {
					// we note a match if any one of the hashes matches
					return true;
				}
			}
			return false;
		}
        for (int i=0 ; ; ) {
            //Find next keyword...
            //    _ _ W O R D _ _ _ A B
            //    i   j       k
            int j=HashFunction.keywordStart(query, i);     
            if (j<0)
                break;
            int k=HashFunction.keywordEnd(query, j);

            //...and look up its hash.
            int hash=HashFunction.hash(query, j, k, bits);
            if (!contains(hash))
                return false;
            i=k+1;
        }        
        
        //2. Now we extract meta information in the query.  If there isn't any,
        //   declare success now.  Otherwise ensure that the URI is in the 
        //   table.
        if (richQuery == null) //Normal case for matching query with no metadata.
            return true;
        String docSchemaURI = richQuery.getSchemaURI();
        int hash = HashFunction.hash(docSchemaURI, bits);
        if (!contains(hash))//don't know the URI? can't answer query
            return false;
            
        //3. Finally check that "enough" of the metainformation keywords are in
        //   the table: 2/3 or 3, whichever is more.
        int wordCount=0;
        int matchCount=0;
        Iterator iter=richQuery.getKeyWords().iterator();
        while(iter.hasNext()) {
            //getKeyWords only returns all the fields, so we still need to split
            //the words.  The code is copied from part (1) above.  It could be
            //factored, but that's slightly tricky; the above code terminates if
            //a match fails--a nice optimization--while this code simply counts
            //the number of words and matches.
            String words = (String)iter.next();
            for (int i=0 ; ; ) {
                //Find next keyword...
                //    _ _ W O R D _ _ _ A B
                //    i   j       k
                int j=HashFunction.keywordStart(words, i);     
                if (j<0)
                    break;
                int k=HashFunction.keywordEnd(words, j);
                
                //...and look up its hash.
                int wordHash = HashFunction.hash(words, j, k, bits);
                if (contains(wordHash))
                    matchCount++;
                wordCount++;
                i=k+1;
            }
        }

        // some parts of the query are indivisible, so do some nonstandard
        // matching
        iter=richQuery.getKeyWordsIndivisible().iterator();
        while(iter.hasNext()) {
            hash = HashFunction.hash((String)iter.next(), bits);
            if (contains(hash))
                matchCount++;
            wordCount++;
        }

        if (wordCount<3)
            //less than three word? 100% match required
            return wordCount==matchCount;
        else 
            //a 67% match will do...
            return ((float)matchCount/(float)wordCount) > 0.67;
    }
    
    // In the new version, we will not accept TTLs for methods.  Tables are only
    // 1 hop deep....
    private final boolean contains(int hash) {
        return bitTable.get(hash);
    }

    /**
     * For all keywords k in filename, adds <k> to this.
     */
    public void add(String filePath) {
        addBTInternal(filePath);
    }


    private void addBTInternal(String filePath) {
        String[] words = HashFunction.keywords(filePath);
        String[] keywords=HashFunction.getPrefixes(words);
		byte log2 = Utilities.log2(bitTableLength);
        for (int i=0; i<keywords.length; i++) {
            int hash=HashFunction.hash(keywords[i], log2);
            if (!bitTable.get(hash)) {
                resizedQRT = null;
                bitTable.set(hash);
            }
        }
    }


    public void addIndivisible(String iString) {
        final int hash = HashFunction.hash(iString, 
                                           Utilities.log2(bitTableLength));
        if (!bitTable.get(hash)) {
            resizedQRT = null;
            bitTable.set(hash);
        }
    }


    /**
     * For all <keyword_i> in qrt, adds <keyword_i> to this.
     * (This is useful for unioning lots of route tables for propoagation.)
     *
     *    @modifies this
     */
    public void addAll(QueryRouteTable qrt) {
        this.bitTable.or( qrt.resize(this.bitTableLength) );
    }
    
    /**
     * Scales the internal cached BitSet to size 'newSize'
     */
    private BitSet resize(int newSize) {
        // if this bitTable is already the correct size,
        // return it
        if ( bitTableLength == newSize )
            return bitTable;
            
        // if we already have a cached resizedQRT and
        // it is the correct size, then use it.
        if ( resizedQRT != null && resizedQRT.bitTableLength == newSize )
            return resizedQRT.bitTable;

        // we must construct a new QRT of this size.            
        resizedQRT = new QueryRouteTable(newSize);
        
        //This algorithm scales between tables of different lengths.
        //Refer to the query routing paper for a full explanation.
        //(The below algorithm, contributed by Philippe Verdy,
        // uses integer values instead of decimal values
        // as both double & float can cause precision problems on machines
        // with odd setups, causing the wrong values to be set in tables)
        final int m = this.bitTableLength;
        final int m2 = resizedQRT.bitTableLength;
        for (int i = this.bitTable.nextSetBit(0); i >= 0;
          i = this.bitTable.nextSetBit(i + 1)) {
             // floor(i*m2/m)
             final int firstSet = (int)(((long)i * m2) / m);
             i = this.bitTable.nextClearBit(i + 1);
             // ceil(i*m2/m)
             final int lastNotSet = (int)(((long)i * m2 - 1) / m + 1);
             resizedQRT.bitTable.set(firstSet, lastNotSet);
        }
        
        return resizedQRT.bitTable;
    }

    /** True if o is a QueryRouteTable with the same entries of this. */
    public boolean equals(Object o) {
        if ( this == o )
            return true;
            
        if (! (o instanceof QueryRouteTable))
            return false;

        //TODO: two qrt's can be equal even if they have different TTL ranges.
        QueryRouteTable other=(QueryRouteTable)o;
        if (this.bitTableLength!=other.bitTableLength)
            return false;

        if (!this.bitTable.equals(other.bitTable))
            return false;

        return true;
    }

    public int hashCode() {
        return bitTable.hashCode() * 17;
    }


    public String toString() {
        return "QueryRouteTable : " + bitTable.toString();
    }


    ////////////////////// Core Encoding and Decoding //////////////////////


    /**
     * Resets this <tt>QueryRouteTable</tt> to the specified size with
     * no data.  This is done when a RESET message is received.
     *
     * @param rtm the <tt>ResetTableMessage</tt> containing the size
     *  to reset the table to
     */
    public void reset(ResetTableMessage rtm) {
        initialize(rtm.getTableSize(), rtm.getInfinity());
    }

    /**
     * Adds the specified patch message to this query routing table.
     *
     * @param patch the <tt>PatchTableMessage</tt> containing the new
     *  data to add
     * @throws <tt>BadPacketException</tt> if the sequence number or size
     *  is incorrect
     */
    public void patch(PatchTableMessage patch) throws BadPacketException {
        handlePatch(patch);        
    }


    //All encoding/decoding works in a pipelined manner, by continually
    //modifying a byte array called 'data'.  TODO2: we could avoid a lot of
    //allocations here if memory is at a premium.

    private void handlePatch(PatchTableMessage m) throws BadPacketException {
        //0. Verify that m belongs in this sequence.  If we haven't just been
        //RESET, ensure that m's sequence size matches last message
        if (sequenceSize!=-1 && sequenceSize!=m.getSequenceSize())
            throw new BadPacketException("Inconsistent seq size: "
                                         +m.getSequenceSize()
                                         +" vs. "+sequenceSize);
        //If we were just reset, ensure that m's sequence number is one.
        //Otherwise it should be one greater than the last message received.
        if (sequenceNumber==-1 ? m.getSequenceNumber()!=1 //reset
                               : sequenceNumber+1!=m.getSequenceNumber())
            throw new BadPacketException("Inconsistent seq number: "
                                         +m.getSequenceNumber()
                                         +" vs. "+sequenceNumber);

        byte[] data=m.getData();

        //1. Start pipelined uncompression.
        //TODO: check that compression is same as last message.
        if (m.getCompressor()==PatchTableMessage.COMPRESSOR_DEFLATE) {
            try {
                //a) If first message, create uncompressor (if needed).
                if (m.getSequenceNumber()==1) {
                    uncompressor = new Inflater();
                }       
                Assert.that(uncompressor!=null, 
                    "Null uncompressor.  Sequence: "+m.getSequenceNumber());
                data=uncompress(data);            
            } catch (IOException e) {
                throw new BadPacketException("Couldn't uncompress data: "+e);
            }
        } else if (m.getCompressor()!=PatchTableMessage.COMPRESSOR_NONE) {
            throw new BadPacketException("Unknown compressor");
        }
        
        //2. Expand nibbles if necessary.
        if (m.getEntryBits()==4) 
            data=unhalve(data);
        else if (m.getEntryBits()!=8)
            throw new BadPacketException("Unknown value for entry bits");

        //3. Add data[0...] to table[nextPatch...]            
        for (int i=0; i<data.length; i++) {
            if(nextPatch >= bitTableLength)
                throw new BadPacketException("Tried to patch "+nextPatch
                                             +" on a bitTable of size "
                                             + bitTableLength);
            // All negative values indicate presence
            if (data[i] < 0) {
                bitTable.set(nextPatch);
                resizedQRT = null;
            }
            // All positive values indicate absence
            else if (data[i] > 0) {
                bitTable.clear(nextPatch);
                resizedQRT = null;
            }
            nextPatch++;
        }
        bitTable.compact();

        //4. Update sequence numbers.
        this.sequenceSize=m.getSequenceSize();
        if (m.getSequenceNumber()!=m.getSequenceSize()) {            
            this.sequenceNumber=m.getSequenceNumber();
        } else {
            //Sequence complete.
            this.sequenceNumber=-1;
            this.sequenceSize=-1;
            this.nextPatch=0; //TODO: is this right?
            // if this last message was compressed, release the uncompressor.
            if( this.uncompressor != null ) {
                this.uncompressor.end();
                this.uncompressor = null;
            }
        }   
    }
    
    /**
     * Stub for calling encode(QueryRouteTable, true).
     */
    public List /* of RouteTableMessage */ encode(QueryRouteTable prev) {
        return encode(prev, true);
    }

    /**
     * Returns an List of RouteTableMessage that will convey the state of
     * this.  If that is null, this will include a reset.  Otherwise it will
     * include only those messages needed to to convert that to this.  More
     * formally, for any non-null QueryRouteTable's m and that, the following 
     * holds:
     *
     * <pre>
     * for (Iterator iter=m.encode(); iter.hasNext(); ) 
     *    prev.update((RouteTableUpdate)iter.next());
     * Assert.that(prev.equals(m)); 
     * </pre> 
     */
    public List /* of RouteTableMessage */ encode(
      QueryRouteTable prev, boolean allowCompression) {
        List /* of RouteTableMessage */ buf=new LinkedList();
        if (prev==null)
            buf.add(new ResetTableMessage(bitTableLength, infinity));
        else
            Assert.that(prev.bitTableLength==this.bitTableLength,
                        "TODO: can't deal with tables of different lengths");

        //1. Calculate patch array
        byte[] data=new byte[bitTableLength];
        // Fill up data with KEYWORD_NO_CHANGE, since the majority
        // of elements will be that.
        // Because it is already filled, we do not need to iterate and
        // set it anywhere.
        Utilities.fill(data, 0, bitTableLength, KEYWORD_NO_CHANGE);
        boolean needsPatch=false;
        
        //1a. If there was a previous table, determine if it was the same one.
        //    If so, we can prevent BitTableLength calls to BitSet.get(int).
        if( prev != null ) {
            //1a-I. If they are not equal, xOr the tables and loop
            //      through the different bits.  This avoids
            //      bitTableLength*2 calls to BitSet.get
            //      at the cost of the xOr'd table's cardinality
            //      calls to both BitSet.nextSetBit and BitSet.get.
            //      Generally it is worth it, as our BitTables don't
            //      change very rapidly.
            //      With the xOr'd table, we know that all 'clear'
            //      values have not changed.  Thus, we can use
            //      nextSetBit on the xOr'd table & this.bitTable.get
            //      to determine whether or not we should set
            //      data[x] to keywordPresent or keywordAbsent.
            //      Because this is an xOr, we know that if 
            //      this.bitTable.get is true, prev.bitTable.get
            //      is false, and vice versa.            
            if(!this.bitTable.equals(prev.bitTable) ) {
                BitSet xOr = (BitSet)this.bitTable.clone();
                xOr.xor(prev.bitTable);
                for (int i=xOr.nextSetBit(0); i >= 0; i=xOr.nextSetBit(i+1)) {
                    data[i] = this.bitTable.get(i) ?
                        keywordPresent : keywordAbsent;
                    needsPatch = true;
                }
            }
            // Else the two tables are equal, and we don't need to do anything
            // because all elements already contain KEYWORD_NO_CHANGE.
        }
        //1b. If there was no previous table, scan through the table using
        //    nextSetBit, avoiding bitTableLength calls to BitSet.get(int).
        else {
            for (int i=bitTable.nextSetBit(0);i>=0;i=bitTable.nextSetBit(i+1)){
                data[i] = keywordPresent;
                needsPatch = true;
            }
        }
        //Optimization: there's nothing to report.  If prev=null, send a single
        //RESET.  Otherwise send nothing.
        if (!needsPatch) {
            return buf;
        }


        //2. Try compression.
        //TODO: Should this not be done if compression isn't allowed?
        byte bits=8;
        // Only halve if our values require 4 signed bits at most.
        // keywordPresent will always be negative and
        // keywordAbsent will always be positive.
        if( keywordPresent >= -8 && keywordAbsent <= 7 ) {
            bits = 4;
            data = halve(data);
        }

        byte compression=PatchTableMessage.COMPRESSOR_NONE;
        //Optimization: If we are told it is safe to compress the message,
        //then attempt to compress it.  Reasons it is not safe include
        //the outgoing stream already being compressed.
        if( allowCompression ) {
            byte[] patchCompressed = IOUtils.deflate(data);
            if (patchCompressed.length<data.length) {
                //...Hooray!  Compression was efficient.
                data=patchCompressed;
                compression=PatchTableMessage.COMPRESSOR_DEFLATE;
            }
        }
                   

        //3. Break into 1KB chunks and send.  TODO: break size limits if needed.
        final int chunks=(int)Math.ceil((float)data.length/(float)MAX_PATCH_SIZE);
        int chunk=1;
        for (int i=0; i<data.length; i+=MAX_PATCH_SIZE) {
            //Just past the last position of data to copy.
            //Note special case for last chunk.  
            int stop=Math.min(i+MAX_PATCH_SIZE, data.length);
            buf.add(new PatchTableMessage((short)chunk, (short)chunks,
                                          compression, bits,
                                          data, i, stop));
            chunk++;
        }        
        return buf;        
    }


    ///////////////// Helper Functions for Codec ////////////////////////

    /** Returns the uncompressed version of the given defalted bytes, using
     *  any dictionaries in uncompressor.  Throws IOException if the data is
     *  corrupt.
     *      @requires inflater initialized 
     *      @modifies inflater */
    private byte[] uncompress(byte[] data) throws IOException {
        ByteArrayOutputStream baos=new ByteArrayOutputStream();
        uncompressor.setInput(data);
        
        try {
            byte[] buf=new byte[1024];
            while (true) {
                int read=uncompressor.inflate(buf);
                //Needs input?
                if (read==0)
                    break;
                baos.write(buf, 0, read);                
            }
            baos.flush();
            return baos.toByteArray();
        } catch (DataFormatException e) {
            throw new IOException("Bad deflate format");
        }
    }

    
    /** Returns an array R of length array.length/2, where R[i] consists of the
     *  low nibble of array[2i] concatentated with the low nibble of array[2i+1].
     *  Note that unhalve(halve(array))=array if all elements of array fit can 
     *  fit in four signed bits.
     *      @requires array.length is a multiple of two */
    static byte[] halve(byte[] array) {
        byte[] ret=new byte[array.length/2];
        for (int i=0; i<ret.length; i++)
            ret[i]=(byte)((array[2*i]<<4) | (array[2*i+1]&0xF));
        return ret;
    }

    /** Returns an array of R of length array.length*2, where R[i] is the the
     *  sign-extended high nibble of floor(i/2) if i even, or the sign-extended
     *  low nibble of floor(i/2) if i odd. */        
    static byte[] unhalve(byte[] array) {
        byte[] ret=new byte[array.length*2];
        for (int i=0; i<array.length; i++) {
            ret[2*i]=(byte)(array[i]>>4);     //sign extension
            ret[2*i+1]=extendNibble((byte)(array[i]&0xF));
        }
        return ret;
    }    
    
    /** Sign-extends the low nibble of b, i.e., 
     *  returns (from MSB to LSB) b[3]b[3]b[3]b[3]b[3]b[2]b[1]b[0]. */
    static byte extendNibble(byte b) {
        if ((b&0x8)!=0)   //negative nibble; sign-extend.
            return (byte)(0xF0 | b);
        else
            return b;        
    }
}