LimeXMLUtils.java example

Explorer
LimeWire-Pirate-Edition-master
/*
 * XMLUtils.java
 *
 * Created on April 30, 2001, 4:51 PM
 */
package com.limegroup.gnutella.xml;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Writer;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Collection;
import java.util.Locale;
import java.util.Map;
import java.util.zip.Deflater;
import java.util.zip.DeflaterOutputStream;
import java.util.zip.GZIPInputStream;
import java.util.zip.Inflater;
import java.util.zip.InflaterInputStream;

import org.limewire.io.IOUtils;
import org.limewire.logging.Log;
import org.limewire.logging.LogFactory;
import org.limewire.util.I18NConvert;
import org.limewire.util.StringUtils;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

/**
 * Contains utility methods.
 * @author  asingla
 */
public class LimeXMLUtils {
    
    private static final Log LOG = LogFactory.getLog(LimeXMLUtils.class);

    private static final double MATCHING_RATE = .9;

    private static final String C_HEADER_BEGIN = "{";
    private static final String C_HEADER_END   = "}";
    private static final String C_HEADER_NONE_VAL = "plaintext";
    private static final String C_HEADER_ZLIB_VAL = "deflate";
    private static final String C_HEADER_GZIP_VAL = "gzip";
    
    private static final String COMPRESS_HEADER_ZLIB = 
        C_HEADER_BEGIN + C_HEADER_ZLIB_VAL + C_HEADER_END;
    private static final String COMPRESS_HEADER_GZIP = 
        C_HEADER_BEGIN + C_HEADER_GZIP_VAL + C_HEADER_END;
    private static final String COMPRESS_HEADER_NONE = 
        C_HEADER_BEGIN + C_HEADER_END;

    
    private static final int NONE = 0;
    private static final int GZIP = 1;
    private static final int ZLIB = 2;

    
    /**
     * Gets the text content of the child nodes.
     * This is the same as Node.getTextContent(), but exists on all
     * JDKs.
     */
    public static String getTextContent(Node node) {
        return getText(node.getChildNodes());
    }
    
    /**
     * Collapses a list of CDATASection, Text, and predefined EntityReference
     * nodes into a single string.  If the list contains other types of nodes,
     * those other nodes are ignored.
     */
    public static String getText(NodeList nodeList) {
        StringBuilder buffer = new StringBuilder();
        for(int i = 0; i < nodeList.getLength(); i++) {
            Node node = nodeList.item(i);
            switch(node.getNodeType()) {
                case Node.CDATA_SECTION_NODE :
                case Node.TEXT_NODE :
                    buffer.append(node.getNodeValue());
                    break;
                case Node.ENTITY_REFERENCE_NODE :
                    if(node.getNodeName().equals("amp"))
                        buffer.append('&');
                    else if(node.getNodeName().equals("lt"))
                        buffer.append('<');
                    else if(node.getNodeName().equals("gt"))
                        buffer.append('>');
                    else if(node.getNodeName().equals("apos"))
                        buffer.append('\'');
                    else if(node.getNodeName().equals("quot"))
                        buffer.append('"');
                    // Any other entity references are ignored
                    break;
                default :
                    // All other nodes are ignored
             }
         }
         return buffer.toString();
    }

    /**
     * Writes <CODE>string</CODE> into writer, escaping &, ', ", <, and >
     * with the XML escape strings.
     */
    public static void writeEscapedString(Writer writer, String string)
        throws IOException {
        for(int i = 0; i < string.length(); i++) {
            char c = string.charAt(i);
            if(c == '<')
                writer.write("<");
            else if(c == '>')
                writer.write(">");
            else if(c == '&')
                writer.write("&");
            else if(c == '\'')
                writer.write("'");
            else if(c == '"')
                writer.write(""");
            else
		writer.write(c);
        }
    }
    
    /**
     * Reads all the bytes from the passed input stream till end of stream
     * reached.
     * @param in the input stream to read from
     * @return array of bytes read
     * @exception IOException If any I/O exception occurs while reading data
     */
    public static byte[] readFully(InputStream in) throws IOException {
        //create a new byte array stream to store the read data
        ByteArrayOutputStream byteArray = new ByteArrayOutputStream();
        
        //read the bytes till EOF
        byte[] buffer = new byte[1024];
        int bytesRead;
        while((bytesRead = in.read(buffer)) != -1)
        {
            //append the bytes read to the byteArray buffer
            byteArray.write(buffer,0,bytesRead);
        }
        
        //return the bytes read
        return byteArray.toByteArray();
    }
    
    
    /**
     * Compares the queryDoc with the replyDoc and finds out if the
     * replyDoc is a match for the queryDoc.
     * @param replyDoc potential reply Document
     * @param queryDoc the query Document
     * @return true if the replyDoc is a match for the queryDoc, false
     * otherwise
     */
    public static boolean match(LimeXMLDocument replyDoc,
                                LimeXMLDocument queryDoc,
                                boolean allowAllNulls) {
        if(queryDoc == null || replyDoc == null)
            throw new NullPointerException("querying with null doc.");

        //First find the names of all the fields in the query
        Collection<Map.Entry<String, String>> queryNameValues = queryDoc.getNameValueSet();
        int size = queryNameValues.size();
        int matchCount = 0; // number of matches
        int nullCount = 0; // number of fields in query not in replyDoc.
        boolean matchedBitrate = false;
        for(Map.Entry<String, String> entry : queryNameValues) {
            String currFieldName = entry.getKey();
            String queryValue = entry.getValue();
            assert queryValue != null : "null value";
            if (queryValue.equals(""))
                continue; // "" matches everything!!
            String replyDocValue = replyDoc.getValue(currFieldName);
            
            if (currFieldName.endsWith("license_type__") && queryValue.length() > 0) {
                if (replyDocValue == null || !replyDocValue.startsWith(queryValue))
                    return false;
                }

            if (replyDocValue == null || replyDocValue.equals(""))
                nullCount++;
            else {
                try {  
                    // if this is a parse-able numeric value, doing a prefix
                    // matching doesn't make sense.  cast it to a double and do
                    // a straight equals comparison
                    double rDVD = Double.parseDouble(replyDocValue);
                    double qVD  = Double.parseDouble(queryValue);
                    if (rDVD == qVD) {
                        matchCount++;
                        if (currFieldName.equals(LimeXMLNames.AUDIO_BITRATE))
                            matchedBitrate = true;
                    }
                    continue;
                } catch (NumberFormatException nfe) {
                    // just roll through and try to do a normal test...
                } 
                // we used to do a .equalsIgnoreCase, but that is a little too
                // rigid.  so do a ignore case prefix match.
                String queryValueLC = queryValue.toLowerCase(Locale.US);
                String replyDocValueLC = I18NConvert.instance().getNorm(replyDocValue);
                if (replyDocValueLC.startsWith(queryValueLC) ||
                        replyDocValueLC.indexOf(" " + queryValueLC) >= 0)
                    matchCount++;
            }
        }
        // The metric of a correct match is that whatever fields are specified
        // in the query must have prefix match* with the fields in the reply
        // unless the reply has a null for that field, in which case we are OK 
        // with letting it slide.  also, %MATCHING_RATE of the fields must
        // either be a prefix match or null.
        // We make an exception for queries of size 1 field. In this case, there
        // must be a 100% match (which is trivially >= %MATCHING_RATE)
        // * prefix match assumes a string; for numerics just do an equality test
        double sizeD = size;
        double matchCountD = matchCount;
        double nullCountD = nullCount;

        if (size > 1) {
            if (matchedBitrate) {
                // discount a bitrate match.  matching bitrate's shouldn't
                // influence the logic because where size is 2, a matching
                // bitrate will result in a lot of irrelevant results.
                sizeD--;
                matchCountD--;
                matchCount--;
            }
            if (((nullCountD + matchCountD)/sizeD) < MATCHING_RATE)
                return false;
            // ok, it passed rate test, now make sure it had SOME matches...
            if (allowAllNulls || matchCount > 0)
                return true;
            else
                return false;
        }
        else if (size == 1) {
            if(allowAllNulls && nullCount == 1)
                return true;
            if(matchCountD/sizeD < 1)
                return false;
            return true;
        }
        //this should never happen - size >0
        return false;
    }
    
    /**
     * Scans over the given String and returns a new String that contains
     * no invalid whitespace XML characters if any exist.  If none exist
     * the original string is returned.
     * <p>
     * This DOES NOT CONVERT entities such as & or <, it will only remove
     * invalid characters such as \u0002, \u0004, etc...
     */
    public static String scanForBadCharacters(String input) {
        if(input == null)
            return null;
        
        int length = input.length();
        //lazily create the buffer so that we can scan & return the string
        //itself w/o recreating it if we didn't have to.
        StringBuilder buffer = null;
        for (int i = 0; i < length; ) {
            int c = input.codePointAt(i);
            // TODO: do other types need to be blanked out?
            if(Character.getType(c) == Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE) {
                if(buffer == null)
                    buffer = createBuffer(input, i);
                buffer.append(' ');
            } else {
                if(buffer != null)
                    buffer.appendCodePoint(c);
            }
            
            i += Character.charCount(c);
        }
        
        if(buffer == null)
            return input;
        else
            return buffer.toString();
    }
    
    /**
     * Attempts to unencode any leftover encoded entities in the XML.
     * This is generally caused by poor ID3 writers that write "&" instead of "&".
     */
    public static String unencodeXML(String input) {
        //return null, if null is passed as argument
        if(input == null)
            return null;
        
        int length = input.length();        
        
        //lazily create the buffer so that we can scan & return the string
        //itself w/o recreating it if we didn't have to.
        StringBuilder buffer = null;

        for (int i = 0; i < length; ) {
            int c = input.codePointAt(i);
            if(c == '&') {
                if(input.regionMatches(i+1, "amp;", 0, 4)) {
                    if(buffer == null)
                        buffer = createBuffer(input, i);
                    buffer.append("&");
                    i += 4;
                } else if(input.regionMatches(i+1, "lt;", 0, 3)) {
                    if(buffer == null)
                        buffer = createBuffer(input, i);
                    buffer.append("<");
                    i += 3;
                } else if(input.regionMatches(i+1, "gt;", 0, 3)) {
                    if(buffer == null)
                        buffer = createBuffer(input, i);
                    buffer.append(">");
                    i += 3;
                } else if(input.regionMatches(i+1, "quot;", 0, 5)) {
                    if(buffer == null)
                        buffer = createBuffer(input, i);
                    buffer.append("\"");
                    i += 5;
                } else if(input.regionMatches(i+1, "apos;", 0, 5)) {
                    if(buffer == null)
                        buffer = createBuffer(input, i);
                    buffer.append("'");
                    i += 5;
                } else {
                    if(buffer != null)
                        buffer.appendCodePoint(c);
                }
            } else { 
                if(buffer != null)
                    buffer.appendCodePoint(c);
            }
            
            i += Character.charCount(c);
        }
        
        // If we never created the buffer, return the string itself.
        if(buffer == null)
            return input;
        else
            return buffer.toString();       
    }
    
    /**
     * Parses the passed string, and encodes the special characters (used in
     * xml for special purposes) with the appropriate codes.
     * e.g. '<' is changed to '<'
     * @return the encoded string. Returns null, if null is passed as argument
     */
    public static String encodeXML(String input) {
        //return null, if null is passed as argument
        if(input == null)
            return null;
        
        int length = input.length();        
        
        //lazily create the buffer so that we can scan & return the string
        //itself w/o recreating it if we didn't have to.
        StringBuilder buffer = null;

        for (int i = 0; i < length; ) {
            int c = input.codePointAt(i);
            // TODO: do other types need to be blanked out?
            if(Character.getType(c) == Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE) {
                if(buffer == null)
                    buffer = createBuffer(input, i);
                buffer.append(' ');
            } else {
                switch (c) {
                case '&':
                    if(buffer == null)
                        buffer = createBuffer(input, i);
                    buffer.append("&");
                    break;
                case '<':
                    if(buffer == null)
                        buffer = createBuffer(input, i);
                    buffer.append("<");
                    break;
                case '>':
                    if(buffer == null)
                        buffer = createBuffer(input, i);
                    buffer.append(">");
                    break;
                case '\"':
                    if(buffer == null)
                        buffer = createBuffer(input, i);
                    buffer.append(""");
                    break;
                case '\'':
                    if(buffer == null)
                        buffer = createBuffer(input, i);
                    buffer.append("'");
                    break;
                default:
                    if(buffer != null)
                        buffer.appendCodePoint(c);
                }
            }
            
            i += Character.charCount(c);
        }
        
        // If we never created the buffer, return the string itself.
        if(buffer == null)
            return input;
        else
            return buffer.toString();
    }
    
    /** Creates a StringBuilder from the given data, up to the right length. */
    private static StringBuilder createBuffer(String data, int upTo) {
        StringBuilder sb = new StringBuilder(data.length() * 2);
        sb.append(data, 0, upTo);
        return sb;
    }

    /** @return A properly formatted version of the input data.
     */
    public static byte[] compress(byte[] data) {

        byte[] compressedData = null;
        if (shouldCompress(data)) 
                compressedData = compressZLIB(data);
        
        byte[] retBytes = null;
        if (compressedData != null) {
            retBytes = new byte[COMPRESS_HEADER_ZLIB.length() +
                               compressedData.length];
            System.arraycopy(StringUtils.toAsciiBytes(COMPRESS_HEADER_ZLIB),
                             0,
                             retBytes,
                             0,
                             COMPRESS_HEADER_ZLIB.length());
            System.arraycopy(compressedData, 0,
                             retBytes, COMPRESS_HEADER_ZLIB.length(),
                             compressedData.length);
        }
        else {  // essentially compress failed, just send prefixed raw data....
            retBytes = new byte[COMPRESS_HEADER_NONE.length() +
                                data.length];
            System.arraycopy(StringUtils.toAsciiBytes(COMPRESS_HEADER_NONE),
                             0,
                             retBytes,
                             0,
                             COMPRESS_HEADER_NONE.length());
            System.arraycopy(data, 0,
                             retBytes, COMPRESS_HEADER_NONE.length(),
                             data.length);

        }

        return retBytes;
    }


    /** Currently, all data is compressed.  In the future, this will handle
     *  heuristics about whether data should be compressed or not.
     */
    private static boolean shouldCompress(byte[] data) {
        if (data.length >= 1000)
            return true;
        else
            return false;
    }

    /** Returns a ZLIB'ed version of data. */
    private static byte[] compressZLIB(byte[] data) {
        DeflaterOutputStream gos = null;
        Deflater def = null;
        try {
            def = new Deflater();
            ByteArrayOutputStream baos=new ByteArrayOutputStream();
            gos=new DeflaterOutputStream(baos, def);
            gos.write(data, 0, data.length);
            gos.flush();
            gos.close(); // required to flush data  -- flush doesn't do it.
            //            System.out.println("compression savings: " + ((1-((double)baos.toByteArray().length/(double)data.length))*100) + "%");
            return baos.toByteArray();
        } catch (IOException e) {
            //This should REALLY never happen because no devices are involved.
            //But could we propagate it up.
            assert false : "Couldn't write to byte stream";
            return null;
        } finally {
            IOUtils.close(gos);
            IOUtils.close(def);
        }
    }


    /** Returns a GZIP'ed version of data. */
    /*
    private static byte[] compressGZIP(byte[] data) {
        try {
            ByteArrayOutputStream baos=new ByteArrayOutputStream();
            DeflaterOutputStream gos=new GZIPOutputStream(baos);
            gos.write(data, 0, data.length);
            gos.flush();
            gos.close();                      //flushes bytes
            //            System.out.println("compression savings: " + ((1-((double)baos.toByteArray().length/(double)data.length))*100) + "%");
            return baos.toByteArray();
        } catch (IOException e) {
            //This should REALLY never happen because no devices are involved.
            //But could we propogate it up.
            Assert.that(false, "Couldn't write to byte stream");
            return null;
        }
    } */

    /** @return Correctly uncompressed data (according to Content-Type header) 
     *  May return a byte[] of length 0 if something bad happens. 
     */
    public static byte[] uncompress(byte[] data) throws IOException {
        byte[] retBytes = new byte[0];
        String headerFragment = StringUtils.getASCIIString(data, 0, C_HEADER_BEGIN.length());
        if (headerFragment.equals(C_HEADER_BEGIN)) {
            // we have well formed input (so far)
            boolean found = false;
            int i=0;
            for(; i<data.length && !found; i++)
                if(data[i]==(byte)125)
                    found = true;
            //We know know that "{" is at 1 because we are in this if block
            headerFragment = StringUtils.getASCIIString(data,1,i-1-1);
            int comp = getCompressionType(headerFragment);
            if (comp == NONE) {
                retBytes = new byte[data.length-(headerFragment.length()+2)];
                System.arraycopy(data,
                                 i,
                                 retBytes,
                                 0,
                                 data.length-(headerFragment.length()+2));
            }
            else if (comp == GZIP) {
                retBytes = new byte[data.length-COMPRESS_HEADER_GZIP.length()];
                System.arraycopy(data,
                                 COMPRESS_HEADER_GZIP.length(),
                                 retBytes,
                                 0,
                                 data.length-COMPRESS_HEADER_GZIP.length());
                retBytes = uncompressGZIP(retBytes);                
            }
            else if (comp == ZLIB) {
                retBytes = new byte[data.length-COMPRESS_HEADER_ZLIB.length()];
                System.arraycopy(data,
                                 COMPRESS_HEADER_ZLIB.length(),
                                 retBytes,
                                 0,
                                 data.length-COMPRESS_HEADER_ZLIB.length());
                retBytes = uncompressZLIB(retBytes);                
            }
            else
                ; // uncompressible XML, just drop it on the floor....
        }
        else
            return data;  // the Content-Type header is optional, assumes PT
        return retBytes;
    }

    private static int getCompressionType(String header) {
        String s = header.trim();
        if(s.equals("") || s.equalsIgnoreCase(C_HEADER_NONE_VAL))
            return NONE;
        else if(s.equalsIgnoreCase(C_HEADER_GZIP_VAL))
            return GZIP;
        else if(s.equalsIgnoreCase(C_HEADER_ZLIB_VAL))
            return ZLIB;
        else
            return -1;
        
    }
    

    /** Returns the uncompressed version of the given ZLIB'ed bytes.  Throws
     *  IOException if the data is corrupt. */
    private static byte[] uncompressGZIP(byte[] data) throws IOException {
        ByteArrayInputStream bais=new ByteArrayInputStream(data);
        InflaterInputStream gis = null;
        try {
            gis =new GZIPInputStream(bais);
            ByteArrayOutputStream baos=new ByteArrayOutputStream();
            while (true) {
                int b=gis.read();
                if (b==-1)
                    break;
                baos.write(b);
            }
            return baos.toByteArray();
        } finally {
            IOUtils.close(gis);
        }
    }

        

    /** Returns the uncompressed version of the given ZLIB'ed bytes.  Throws
     *  IOException if the data is corrupt. */
    private static byte[] uncompressZLIB(byte[] data) throws IOException {
        ByteArrayInputStream bais=new ByteArrayInputStream(data);
        InflaterInputStream gis = null;
        Inflater inf = null;
        try {
            inf = new Inflater();
            gis =new InflaterInputStream(bais, inf);
            ByteArrayOutputStream baos=new ByteArrayOutputStream();
            while (true) {
                int b=gis.read();
                if (b==-1)
                    break;
                baos.write(b);
            }
            return baos.toByteArray();
        } finally {
            IOUtils.close(gis);
            IOUtils.close(inf);
        }
    }


    private static final int NUM_BYTES_TO_HASH = 100;
    private static final int NUM_TOTAL_HASH    = NUM_BYTES_TO_HASH*3;
    private static void clearHashBytes(byte[] hashBytes) {
        for (int i = 0; i < NUM_BYTES_TO_HASH; i++)
            hashBytes[i] = (byte)0;
    }

    /**
     * Hashes the file using bits and pieces of the file.
     * 
     * @return the SHA hash bytes of the input bytes.
     * @throws IOException if hashing failed for any reason.
     */
    public static byte[] hashFile(File toHash) throws IOException {
        byte[] retBytes = null;
        FileInputStream fis = null;
        byte[] hashBytes = new byte[NUM_BYTES_TO_HASH];
        
        try {        

            // setup
            fis = new FileInputStream(toHash);
            MessageDigest md = null;
           
            try {
                md = MessageDigest.getInstance("SHA");
            } catch(NoSuchAlgorithmException nsae) {
                throw new IllegalStateException(nsae);
            }

            long fileLength = toHash.length();            
            if (fileLength < NUM_TOTAL_HASH) {
                int numRead = 0;
                do {
                    clearHashBytes(hashBytes);
                    numRead = fis.read(hashBytes);
                    md.update(hashBytes);
                    // if the file changed underneath me, throw away...
                    if (toHash.length() != fileLength)
                        throw new IOException("invalid length");
                } while (numRead == NUM_BYTES_TO_HASH);
            }
            else { // need to do some mathy stuff.......

                long thirds = fileLength / 3;

                // beginning input....
                clearHashBytes(hashBytes);
                fis.read(hashBytes);
                md.update(hashBytes);

                // if the file changed underneath me, throw away...
                if (toHash.length() != fileLength)
                    throw new IOException("invalid length");

                // middle input...
                clearHashBytes(hashBytes);
                fis.skip(thirds - NUM_BYTES_TO_HASH);
                fis.read(hashBytes);
                md.update(hashBytes);

                // if the file changed underneath me, throw away...
                if (toHash.length() != fileLength)
                    throw new IOException("invalid length");
                
                // ending input....
                clearHashBytes(hashBytes);
                fis.skip(toHash.length() - 
                         (thirds + NUM_BYTES_TO_HASH) -
                         NUM_BYTES_TO_HASH);
                fis.read(hashBytes);
                md.update(hashBytes);

                // if the file changed underneath me, throw away...
                if (toHash.length() != fileLength)
                    throw new IOException("invalid length");

            }
                
            retBytes = md.digest();
        } finally {
            if (fis != null)
                fis.close();
        }
        return retBytes;
    }

    /**
     * Tries to parse <code>integer</code> to an int. If it fails, returns
     * <code>defaultValue</code>.
     */
    public static int parseInteger(String integer, int defaultValue) {
        try {
            return Integer.parseInt(integer);
        } catch(NumberFormatException nfx) {
            LOG.error("Unable to parse number: " + integer, nfx);
            return defaultValue;
        }
    }

    /**
     * Removes <pre><elementName>.*</elementName></pre> from <code>input</code>.
     * 
     * @return <code>input</code> if element not found in the input
     */
    public static String stripElement(String input, String elementName) {
        return input.replaceAll("<" + elementName + ">[^<]*</" + elementName +">", "");
    }
}