package com.limegroup.gnutella.routing; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Set; import java.util.zip.DataFormatException; import java.util.zip.Inflater; import com.limegroup.gnutella.Assert; import com.limegroup.gnutella.URN; import com.limegroup.gnutella.messages.BadPacketException; import com.limegroup.gnutella.messages.QueryRequest; import com.limegroup.gnutella.util.BitSet; import com.limegroup.gnutella.util.Utilities; import com.limegroup.gnutella.util.IOUtils; import com.limegroup.gnutella.xml.LimeXMLDocument; import com.limegroup.gnutella.settings.ConnectionSettings; //Please note that < and > are the HTML escapes for '<' and '>'. /** * A list of query keywords that a connection can respond to, as well as the * minimum TTL for a response. More formally, a QueryRouteTable is a (possibly * infinite!) list of keyword TTL pairs, [ <keyword_1, ttl_1>, ..., * <keywordN, ttl_N> ] <p> * * 10/08/2002 - A day after Susheel's birthday, he decided to change this class * for the heck of it. Kidding. Functionality has been changed so that keyword * depth is 'constant' - meaning that if a keyword is added, then any contains * query regarding that keyword will return true. This is because this general * idea of QRTs is only used in a specialized way in LW - namely, UPs use it for * their leaves ONLY, so the depth is always 1. If you looking for a keyword * and it is in the table, a leaf MAY have it, so return true. This only * needed a one line change. * * 12/05/2003 - Two months after Susheel's birthday, this class was changed to * once again accept variable infinity values. Over time, optimizations had * removed the ability for a QueryRouteTable to have an infinity that wasn't * 7. However, nothing outright checked that, so patch messages that were * based on a non-7 infinity were silently failing (always stayed empty). * In practice, we could probably even change the infinity to 2, and change * change the number of entryBits to 2, with the keywordPresent and * keywordAbsent values going to 1 and -1, cutting the size of our patch * messages further in half (a quarter of the original size). This would * probably require upgrading the X-Query-Routing to another version. * * <b>This class is NOT synchronized.</b> */ public class QueryRouteTable { /** * The suggested default max table TTL. */ public static final byte DEFAULT_INFINITY=(byte)7; /** What should come across the wire if a keyword status is unchanged. */ public static final byte KEYWORD_NO_CHANGE=(byte)0; /** The maximum size of patch messages, in bytes. */ public static final int MAX_PATCH_SIZE=1<<12; //4 KB /** * The current infinity this table is using. Necessary for creating * ResetTableMessages with the correct infinity. */ private byte infinity; /** * What should come across the wire if a keyword is present. * The nature of this value is dependent on the infinity of the * ResetTableMessage. */ private byte keywordPresent; /** * What should come across the wire if a keyword is absent. * The nature of thsi value is dependent on the infinity of the * ResetTableMessage. */ private byte keywordAbsent; /** The *new* table implementation. The table of keywords - each value in * the BitSet is either 'true' or 'false' - 'true' signifies that a keyword * match MAY be at a leaf 1 hop away, whereas 'false' signifies it isn't. * QRP is really not used in full by the Gnutella Ultrapeer protocol, hence * the easy optimization of only using BitSets. */ private BitSet bitTable; /** * The cached resized QRT. */ private QueryRouteTable resizedQRT = null; /** The 'logical' length of the BitSet. Needed because the BitSet accessor * methods don't seem to offer what is needed. */ private int bitTableLength; /** The last message received of current sequence, or -1 if none. */ private int sequenceNumber; /** The size of the current sequence, or -1 if none. */ private int sequenceSize; /** The index of the next table entry to patch. */ private int nextPatch; /** The uncompressor. This state must be maintained to implement chunked * PATCH messages. (You may need data from message N-1 to apply the patch * in message N.) */ private Inflater uncompressor; /////////////////////////////// Basic Methods /////////////////////////// /** Creates a QueryRouteTable with default sizes. */ public QueryRouteTable() { long byteCount = 1024 * ConnectionSettings.QRT_SIZE_IN_KIBI_ENTRIES.getValue(); if (byteCount > Integer.MAX_VALUE) { throw new IllegalArgumentException("Default QRT size cannot be expressed as an int."); } initialize((int)byteCount, DEFAULT_INFINITY); } /** * Creates a new <tt>QueryRouteTable</tt> instance with the specified * size. This <tt>QueryRouteTable</tt> will be completely empty with * no keywords -- no queries will have hits in this route table until * patch messages are received. * * @param size the size of the query routing table */ public QueryRouteTable(int size) { this(size, DEFAULT_INFINITY); } /** * Creates a new <tt>QueryRouteTable</tt> instance with the specified * size and infinity. This <tt>QueryRouteTable</tt> will be completely * empty with no keywords -- no queries will have hits in this route * table until patch messages are received. * * @param size the size of the query routing table * @param infinity the infinity to use */ public QueryRouteTable(int size, byte infinity) { initialize(size, infinity); } /** * Initializes this <tt>QueryRouteTable</tt> to the specified size. * This table will be empty until patch messages are received. * * @param size the size of the query route table */ private void initialize(int size, byte infinity) { this.bitTableLength = size; this.bitTable = new BitSet(); this.sequenceNumber = -1; this.sequenceSize = -1; this.nextPatch = 0; this.keywordPresent = (byte)(1 - infinity); this.keywordAbsent = (byte)(infinity - 1); this.infinity = infinity; } /** * Returns the size of this QueryRouteTable. */ public int getSize() { return bitTableLength; } /** * Returns the percentage of slots used in this QueryRouteTable's BitTable. * The return value is from 0 to 100. */ public double getPercentFull() { double set = bitTable.cardinality(); return ( set / bitTableLength ) * 100.0; } /** * Returns the number of empty elements in the table. */ public int getEmptyUnits() { return bitTable.unusedUnits(); } /** * Returns the total number of units allocated for storage. */ public int getUnitsInUse() { return bitTable.getUnitsInUse(); } /** * Returns true if a response could be generated for qr. Note that a return * value of true does not necessarily mean that a response will be * generated--just that it could. It is assumed that qr's TTL has already * been decremented, i.e., is the outbound not inbound TTL. */ public boolean contains(QueryRequest qr) { byte bits=Utilities.log2(bitTableLength); //1. First we check that all the normal keywords of qr are in the route // table. Note that this is done with zero allocations! Also note // that HashFunction.hash() takes cares of the capitalization. String query = qr.getQuery(); LimeXMLDocument richQuery = qr.getRichQuery(); if(query.length() == 0 && richQuery == null && !qr.hasQueryUrns()) { return false; } if(qr.hasQueryUrns()) { Set urns = qr.getQueryUrns(); Iterator iter = urns.iterator(); while(iter.hasNext()) { URN qurn = (URN)iter.next(); int hash = HashFunction.hash(qurn.toString(), bits); if(contains(hash)) { // we note a match if any one of the hashes matches return true; } } return false; } for (int i=0 ; ; ) { //Find next keyword... // _ _ W O R D _ _ _ A B // i j k int j=HashFunction.keywordStart(query, i); if (j<0) break; int k=HashFunction.keywordEnd(query, j); //...and look up its hash. int hash=HashFunction.hash(query, j, k, bits); if (!contains(hash)) return false; i=k+1; } //2. Now we extract meta information in the query. If there isn't any, // declare success now. Otherwise ensure that the URI is in the // table. if (richQuery == null) //Normal case for matching query with no metadata. return true; String docSchemaURI = richQuery.getSchemaURI(); int hash = HashFunction.hash(docSchemaURI, bits); if (!contains(hash))//don't know the URI? can't answer query return false; //3. Finally check that "enough" of the metainformation keywords are in // the table: 2/3 or 3, whichever is more. int wordCount=0; int matchCount=0; Iterator iter=richQuery.getKeyWords().iterator(); while(iter.hasNext()) { //getKeyWords only returns all the fields, so we still need to split //the words. The code is copied from part (1) above. It could be //factored, but that's slightly tricky; the above code terminates if //a match fails--a nice optimization--while this code simply counts //the number of words and matches. String words = (String)iter.next(); for (int i=0 ; ; ) { //Find next keyword... // _ _ W O R D _ _ _ A B // i j k int j=HashFunction.keywordStart(words, i); if (j<0) break; int k=HashFunction.keywordEnd(words, j); //...and look up its hash. int wordHash = HashFunction.hash(words, j, k, bits); if (contains(wordHash)) matchCount++; wordCount++; i=k+1; } } // some parts of the query are indivisible, so do some nonstandard // matching iter=richQuery.getKeyWordsIndivisible().iterator(); while(iter.hasNext()) { hash = HashFunction.hash((String)iter.next(), bits); if (contains(hash)) matchCount++; wordCount++; } if (wordCount<3) //less than three word? 100% match required return wordCount==matchCount; else //a 67% match will do... return ((float)matchCount/(float)wordCount) > 0.67; } // In the new version, we will not accept TTLs for methods. Tables are only // 1 hop deep.... private final boolean contains(int hash) { return bitTable.get(hash); } /** * For all keywords k in filename, adds <k> to this. */ public void add(String filePath) { addBTInternal(filePath); } private void addBTInternal(String filePath) { String[] words = HashFunction.keywords(filePath); String[] keywords=HashFunction.getPrefixes(words); byte log2 = Utilities.log2(bitTableLength); for (int i=0; i<keywords.length; i++) { int hash=HashFunction.hash(keywords[i], log2); if (!bitTable.get(hash)) { resizedQRT = null; bitTable.set(hash); } } } public void addIndivisible(String iString) { final int hash = HashFunction.hash(iString, Utilities.log2(bitTableLength)); if (!bitTable.get(hash)) { resizedQRT = null; bitTable.set(hash); } } /** * For all <keyword_i> in qrt, adds <keyword_i> to this. * (This is useful for unioning lots of route tables for propoagation.) * * @modifies this */ public void addAll(QueryRouteTable qrt) { this.bitTable.or( qrt.resize(this.bitTableLength) ); } /** * Scales the internal cached BitSet to size 'newSize' */ private BitSet resize(int newSize) { // if this bitTable is already the correct size, // return it if ( bitTableLength == newSize ) return bitTable; // if we already have a cached resizedQRT and // it is the correct size, then use it. if ( resizedQRT != null && resizedQRT.bitTableLength == newSize ) return resizedQRT.bitTable; // we must construct a new QRT of this size. resizedQRT = new QueryRouteTable(newSize); //This algorithm scales between tables of different lengths. //Refer to the query routing paper for a full explanation. //(The below algorithm, contributed by Philippe Verdy, // uses integer values instead of decimal values // as both double & float can cause precision problems on machines // with odd setups, causing the wrong values to be set in tables) final int m = this.bitTableLength; final int m2 = resizedQRT.bitTableLength; for (int i = this.bitTable.nextSetBit(0); i >= 0; i = this.bitTable.nextSetBit(i + 1)) { // floor(i*m2/m) final int firstSet = (int)(((long)i * m2) / m); i = this.bitTable.nextClearBit(i + 1); // ceil(i*m2/m) final int lastNotSet = (int)(((long)i * m2 - 1) / m + 1); resizedQRT.bitTable.set(firstSet, lastNotSet); } return resizedQRT.bitTable; } /** True if o is a QueryRouteTable with the same entries of this. */ public boolean equals(Object o) { if ( this == o ) return true; if (! (o instanceof QueryRouteTable)) return false; //TODO: two qrt's can be equal even if they have different TTL ranges. QueryRouteTable other=(QueryRouteTable)o; if (this.bitTableLength!=other.bitTableLength) return false; if (!this.bitTable.equals(other.bitTable)) return false; return true; } public int hashCode() { return bitTable.hashCode() * 17; } public String toString() { return "QueryRouteTable : " + bitTable.toString(); } ////////////////////// Core Encoding and Decoding ////////////////////// /** * Resets this <tt>QueryRouteTable</tt> to the specified size with * no data. This is done when a RESET message is received. * * @param rtm the <tt>ResetTableMessage</tt> containing the size * to reset the table to */ public void reset(ResetTableMessage rtm) { initialize(rtm.getTableSize(), rtm.getInfinity()); } /** * Adds the specified patch message to this query routing table. * * @param patch the <tt>PatchTableMessage</tt> containing the new * data to add * @throws <tt>BadPacketException</tt> if the sequence number or size * is incorrect */ public void patch(PatchTableMessage patch) throws BadPacketException { handlePatch(patch); } //All encoding/decoding works in a pipelined manner, by continually //modifying a byte array called 'data'. TODO2: we could avoid a lot of //allocations here if memory is at a premium. private void handlePatch(PatchTableMessage m) throws BadPacketException { //0. Verify that m belongs in this sequence. If we haven't just been //RESET, ensure that m's sequence size matches last message if (sequenceSize!=-1 && sequenceSize!=m.getSequenceSize()) throw new BadPacketException("Inconsistent seq size: " +m.getSequenceSize() +" vs. "+sequenceSize); //If we were just reset, ensure that m's sequence number is one. //Otherwise it should be one greater than the last message received. if (sequenceNumber==-1 ? m.getSequenceNumber()!=1 //reset : sequenceNumber+1!=m.getSequenceNumber()) throw new BadPacketException("Inconsistent seq number: " +m.getSequenceNumber() +" vs. "+sequenceNumber); byte[] data=m.getData(); //1. Start pipelined uncompression. //TODO: check that compression is same as last message. if (m.getCompressor()==PatchTableMessage.COMPRESSOR_DEFLATE) { try { //a) If first message, create uncompressor (if needed). if (m.getSequenceNumber()==1) { uncompressor = new Inflater(); } Assert.that(uncompressor!=null, "Null uncompressor. Sequence: "+m.getSequenceNumber()); data=uncompress(data); } catch (IOException e) { throw new BadPacketException("Couldn't uncompress data: "+e); } } else if (m.getCompressor()!=PatchTableMessage.COMPRESSOR_NONE) { throw new BadPacketException("Unknown compressor"); } //2. Expand nibbles if necessary. if (m.getEntryBits()==4) data=unhalve(data); else if (m.getEntryBits()!=8) throw new BadPacketException("Unknown value for entry bits"); //3. Add data[0...] to table[nextPatch...] for (int i=0; i<data.length; i++) { if(nextPatch >= bitTableLength) throw new BadPacketException("Tried to patch "+nextPatch +" on a bitTable of size " + bitTableLength); // All negative values indicate presence if (data[i] < 0) { bitTable.set(nextPatch); resizedQRT = null; } // All positive values indicate absence else if (data[i] > 0) { bitTable.clear(nextPatch); resizedQRT = null; } nextPatch++; } bitTable.compact(); //4. Update sequence numbers. this.sequenceSize=m.getSequenceSize(); if (m.getSequenceNumber()!=m.getSequenceSize()) { this.sequenceNumber=m.getSequenceNumber(); } else { //Sequence complete. this.sequenceNumber=-1; this.sequenceSize=-1; this.nextPatch=0; //TODO: is this right? // if this last message was compressed, release the uncompressor. if( this.uncompressor != null ) { this.uncompressor.end(); this.uncompressor = null; } } } /** * Stub for calling encode(QueryRouteTable, true). */ public List /* of RouteTableMessage */ encode(QueryRouteTable prev) { return encode(prev, true); } /** * Returns an List of RouteTableMessage that will convey the state of * this. If that is null, this will include a reset. Otherwise it will * include only those messages needed to to convert that to this. More * formally, for any non-null QueryRouteTable's m and that, the following * holds: * * <pre> * for (Iterator iter=m.encode(); iter.hasNext(); ) * prev.update((RouteTableUpdate)iter.next()); * Assert.that(prev.equals(m)); * </pre> */ public List /* of RouteTableMessage */ encode( QueryRouteTable prev, boolean allowCompression) { List /* of RouteTableMessage */ buf=new LinkedList(); if (prev==null) buf.add(new ResetTableMessage(bitTableLength, infinity)); else Assert.that(prev.bitTableLength==this.bitTableLength, "TODO: can't deal with tables of different lengths"); //1. Calculate patch array byte[] data=new byte[bitTableLength]; // Fill up data with KEYWORD_NO_CHANGE, since the majority // of elements will be that. // Because it is already filled, we do not need to iterate and // set it anywhere. Utilities.fill(data, 0, bitTableLength, KEYWORD_NO_CHANGE); boolean needsPatch=false; //1a. If there was a previous table, determine if it was the same one. // If so, we can prevent BitTableLength calls to BitSet.get(int). if( prev != null ) { //1a-I. If they are not equal, xOr the tables and loop // through the different bits. This avoids // bitTableLength*2 calls to BitSet.get // at the cost of the xOr'd table's cardinality // calls to both BitSet.nextSetBit and BitSet.get. // Generally it is worth it, as our BitTables don't // change very rapidly. // With the xOr'd table, we know that all 'clear' // values have not changed. Thus, we can use // nextSetBit on the xOr'd table & this.bitTable.get // to determine whether or not we should set // data[x] to keywordPresent or keywordAbsent. // Because this is an xOr, we know that if // this.bitTable.get is true, prev.bitTable.get // is false, and vice versa. if(!this.bitTable.equals(prev.bitTable) ) { BitSet xOr = (BitSet)this.bitTable.clone(); xOr.xor(prev.bitTable); for (int i=xOr.nextSetBit(0); i >= 0; i=xOr.nextSetBit(i+1)) { data[i] = this.bitTable.get(i) ? keywordPresent : keywordAbsent; needsPatch = true; } } // Else the two tables are equal, and we don't need to do anything // because all elements already contain KEYWORD_NO_CHANGE. } //1b. If there was no previous table, scan through the table using // nextSetBit, avoiding bitTableLength calls to BitSet.get(int). else { for (int i=bitTable.nextSetBit(0);i>=0;i=bitTable.nextSetBit(i+1)){ data[i] = keywordPresent; needsPatch = true; } } //Optimization: there's nothing to report. If prev=null, send a single //RESET. Otherwise send nothing. if (!needsPatch) { return buf; } //2. Try compression. //TODO: Should this not be done if compression isn't allowed? byte bits=8; // Only halve if our values require 4 signed bits at most. // keywordPresent will always be negative and // keywordAbsent will always be positive. if( keywordPresent >= -8 && keywordAbsent <= 7 ) { bits = 4; data = halve(data); } byte compression=PatchTableMessage.COMPRESSOR_NONE; //Optimization: If we are told it is safe to compress the message, //then attempt to compress it. Reasons it is not safe include //the outgoing stream already being compressed. if( allowCompression ) { byte[] patchCompressed = IOUtils.deflate(data); if (patchCompressed.length<data.length) { //...Hooray! Compression was efficient. data=patchCompressed; compression=PatchTableMessage.COMPRESSOR_DEFLATE; } } //3. Break into 1KB chunks and send. TODO: break size limits if needed. final int chunks=(int)Math.ceil((float)data.length/(float)MAX_PATCH_SIZE); int chunk=1; for (int i=0; i<data.length; i+=MAX_PATCH_SIZE) { //Just past the last position of data to copy. //Note special case for last chunk. int stop=Math.min(i+MAX_PATCH_SIZE, data.length); buf.add(new PatchTableMessage((short)chunk, (short)chunks, compression, bits, data, i, stop)); chunk++; } return buf; } ///////////////// Helper Functions for Codec //////////////////////// /** Returns the uncompressed version of the given defalted bytes, using * any dictionaries in uncompressor. Throws IOException if the data is * corrupt. * @requires inflater initialized * @modifies inflater */ private byte[] uncompress(byte[] data) throws IOException { ByteArrayOutputStream baos=new ByteArrayOutputStream(); uncompressor.setInput(data); try { byte[] buf=new byte[1024]; while (true) { int read=uncompressor.inflate(buf); //Needs input? if (read==0) break; baos.write(buf, 0, read); } baos.flush(); return baos.toByteArray(); } catch (DataFormatException e) { throw new IOException("Bad deflate format"); } } /** Returns an array R of length array.length/2, where R[i] consists of the * low nibble of array[2i] concatentated with the low nibble of array[2i+1]. * Note that unhalve(halve(array))=array if all elements of array fit can * fit in four signed bits. * @requires array.length is a multiple of two */ static byte[] halve(byte[] array) { byte[] ret=new byte[array.length/2]; for (int i=0; i<ret.length; i++) ret[i]=(byte)((array[2*i]<<4) | (array[2*i+1]&0xF)); return ret; } /** Returns an array of R of length array.length*2, where R[i] is the the * sign-extended high nibble of floor(i/2) if i even, or the sign-extended * low nibble of floor(i/2) if i odd. */ static byte[] unhalve(byte[] array) { byte[] ret=new byte[array.length*2]; for (int i=0; i<array.length; i++) { ret[2*i]=(byte)(array[i]>>4); //sign extension ret[2*i+1]=extendNibble((byte)(array[i]&0xF)); } return ret; } /** Sign-extends the low nibble of b, i.e., * returns (from MSB to LSB) b[3]b[3]b[3]b[3]b[3]b[2]b[1]b[0]. */ static byte extendNibble(byte b) { if ((b&0x8)!=0) //negative nibble; sign-extend. return (byte)(0xF0 | b); else return b; } }