/* * XMLUtils.java * * Created on April 30, 2001, 4:51 PM */ package com.limegroup.gnutella.xml; import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileReader; import java.io.IOException; import java.io.InputStream; import java.io.StringReader; import java.io.Writer; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.Iterator; import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.zip.DeflaterOutputStream; import java.util.zip.GZIPInputStream; import java.util.zip.InflaterInputStream; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.InputSource; import com.limegroup.gnutella.Assert; import com.limegroup.gnutella.util.I18NConvert; import com.limegroup.gnutella.util.IOUtils; /** * Contains utility methods * @author asingla */ public class LimeXMLUtils { private static final double MATCHING_RATE = .9; private static final String C_HEADER_BEGIN = "{"; private static final String C_HEADER_END = "}"; private static final String C_HEADER_NONE_VAL = "plaintext"; private static final String C_HEADER_ZLIB_VAL = "deflate"; private static final String C_HEADER_GZIP_VAL = "gzip"; private static final String COMPRESS_HEADER_ZLIB = C_HEADER_BEGIN + C_HEADER_ZLIB_VAL + C_HEADER_END; private static final String COMPRESS_HEADER_GZIP = C_HEADER_BEGIN + C_HEADER_GZIP_VAL + C_HEADER_END; private static final String COMPRESS_HEADER_NONE = C_HEADER_BEGIN + C_HEADER_END; private static final int NONE = 0; private static final int GZIP = 1; private static final int ZLIB = 2; public static final String AUDIO_BITRATE_ATTR = "audios__audio__bitrate__"; /** * Returns an instance of InputSource after reading the file, and trimming * the extraneous white spaces. * @param file The file from where to read * @return The instance of InputSource created from the passed file * @exception IOException If file doesnt get opened or other I/O problems */ public static InputSource getInputSource(File file) throws IOException { //open the file, read it, and derive the structure, store internally StringBuffer sb = new StringBuffer(); String line = ""; //open the file BufferedReader br = null; try { br = new BufferedReader(new FileReader(file)); while(line != null) { //read a line from file line = br.readLine(); if(line != null) { //append the line (along with the newline that got removed) sb.append(line + "\n"); } } } finally { if( br != null) br.close(); } //get & return the input source return new InputSource(new StringReader(sb.toString())); } /** * Gets the text content of the child nodes. * This is the same as Node.getTextContent(), but exists on all * JDKs. */ public static String getTextContent(Node node) { return getText(node.getChildNodes()); } /** * Collapses a list of CDATASection, Text, and predefined EntityReference * nodes into a single string. If the list contains other types of nodes, * those other nodes are ignored. */ public static String getText(NodeList nodeList) { StringBuffer buffer = new StringBuffer(); for(int i = 0; i < nodeList.getLength(); i++) { Node node = nodeList.item(i); switch(node.getNodeType()) { case Node.CDATA_SECTION_NODE : case Node.TEXT_NODE : buffer.append(node.getNodeValue()); break; case Node.ENTITY_REFERENCE_NODE : if(node.getNodeName().equals("amp")) buffer.append('&'); else if(node.getNodeName().equals("lt")) buffer.append('<'); else if(node.getNodeName().equals("gt")) buffer.append('>'); else if(node.getNodeName().equals("apos")) buffer.append('\''); else if(node.getNodeName().equals("quot")) buffer.append('"'); // Any other entity references are ignored break; default : // All other nodes are ignored } } return buffer.toString(); } /** * Writes <CODE>string</CODE> into writer, escaping &, ', ", <, and > * with the XML excape strings. */ public static void writeEscapedString(Writer writer, String string) throws IOException { for(int i = 0; i < string.length(); i++) { char c = string.charAt(i); if(c == '<') writer.write("<"); else if(c == '>') writer.write(">"); else if(c == '&') writer.write("&"); else if(c == '\'') writer.write("'"); else if(c == '"') writer.write("""); else writer.write(c); } } /** * Reads all the bytes from the passed input stream till end of stream * reached. * @param in The input stream to read from * @return array of bytes read * @exception IOException If any I/O exception occurs while reading data */ public static byte[] readFully(InputStream in) throws IOException { //create a new byte array stream to store the read data ByteArrayOutputStream byteArray = new ByteArrayOutputStream(); //read the bytes till EOF byte[] buffer = new byte[1024]; int bytesRead; while((bytesRead = in.read(buffer)) != -1) { //append the bytes read to the byteArray buffer byteArray.write(buffer,0,bytesRead); } //return the bytes read return byteArray.toByteArray(); } /** * Compares the queryDoc with the replyDoc and finds out if the * replyDoc is a match for the queryDoc * @param queryDoc The query Document * @param replyDoc potential reply Document * @return true if the replyDoc is a match for the queryDoc, false * otherwise */ public static boolean match(LimeXMLDocument replyDoc, LimeXMLDocument queryDoc, boolean allowAllNulls) { if(queryDoc == null || replyDoc == null) throw new NullPointerException("querying with null doc."); //First find the names of all the fields in the query Set queryNameValues = queryDoc.getNameValueSet(); int size = queryNameValues.size(); int matchCount = 0; // number of matches int nullCount = 0; // number of fields in query not in replyDoc. boolean matchedBitrate = false; for (Iterator i = queryNameValues.iterator(); i.hasNext(); ) { Map.Entry entry = (Map.Entry)i.next(); String currFieldName = (String)entry.getKey(); String queryValue = (String)entry.getValue(); Assert.that(queryValue != null, "null value"); if (queryValue.equals("")) continue; // "" matches everything!! String replyDocValue = replyDoc.getValue(currFieldName); if (currFieldName.endsWith("license_type__") && queryValue.length() > 0) { if (replyDocValue == null || !replyDocValue.startsWith(queryValue)) return false; } if (replyDocValue == null || replyDocValue.equals("")) nullCount++; else { try { // if this is a parse-able numeric value, doing a prefix // matching doesn't make sense. cast it to a double and do // a straight equals comparison double rDVD = (new Double(replyDocValue)).doubleValue(); double qVD = (new Double(queryValue)).doubleValue(); if (rDVD == qVD) { matchCount++; if (currFieldName.equals(AUDIO_BITRATE_ATTR)) matchedBitrate = true; } continue; } catch (NumberFormatException nfe) { // just roll through and try to do a normal test... } // we used to do a .equalsIgnoreCase, but that is a little too // rigid. so do a ignore case prefix match. String queryValueLC = queryValue.toLowerCase(Locale.US); String replyDocValueLC = I18NConvert.instance().getNorm(replyDocValue); if (replyDocValueLC.startsWith(queryValueLC) || replyDocValueLC.indexOf(" " + queryValueLC) >= 0) matchCount++; } } // The metric of a correct match is that whatever fields are specified // in the query must have prefix match* with the fields in the reply // unless the reply has a null for that feild, in which case we are OK // with letting it slide. also, %MATCHING_RATE of the fields must // either be a prefix match or null. // We make an exception for queries of size 1 field. In this case, there // must be a 100% match (which is trivially >= %MATCHING_RATE) // * prefix match assumes a string; for numerics just do an equality test double sizeD = size; double matchCountD = matchCount; double nullCountD = nullCount; if (size > 1) { if (matchedBitrate) { // discount a bitrate match. matching bitrate's shouldn't // influence the logic because where size is 2, a matching // bitrate will result in a lot of irrelevant results. sizeD--; matchCountD--; matchCount--; } if (((nullCountD + matchCountD)/sizeD) < MATCHING_RATE) return false; // ok, it passed rate test, now make sure it had SOME matches... if (allowAllNulls || matchCount > 0) return true; else return false; } else if (size == 1) { if(allowAllNulls && nullCount == 1) return true; if(matchCountD/sizeD < 1) return false; return true; } //this should never happen - size >0 return false; } public static boolean isMP3File(File in) { return isMP3File(in.getName()); } public static boolean isMP3File(String in) { return in.toLowerCase(Locale.US).endsWith(".mp3"); } public static boolean isRIFFFile(File f) { return isRIFFFile(f.getName()); } public static boolean isRIFFFile(String in) { return in.toLowerCase(Locale.US).endsWith(".avi"); } public static boolean isOGMFile(File f) { return isOGMFile(f.getName()); } public static boolean isOGMFile(String in) { return in.toLowerCase(Locale.US).endsWith(".ogm"); } public static boolean isOGGFile(File in) { return isOGGFile(in.getName()); } public static boolean isOGGFile(String in) { return in.toLowerCase(Locale.US).endsWith(".ogg"); } public static boolean isFLACFile(File in) { return isFLACFile(in.getName()); } public static boolean isFLACFile(String in) { in = in.toLowerCase(Locale.US); return in.endsWith(".flac") || in.endsWith(".fla"); } public static boolean isM4AFile(File in) { return isM4AFile(in.getName()); } public static boolean isM4AFile(String in) { in = in.toLowerCase(Locale.US); return in.endsWith(".m4a")|| in.endsWith(".m4p"); } public static boolean isWMAFile(File f) { return isWMAFile(f.getName()); } public static boolean isWMAFile(String in) { return in.toLowerCase(Locale.US).endsWith(".wma"); } public static boolean isWMVFile(File f) { return isWMVFile(f.getName()); } public static boolean isWMVFile(String in) { return in.toLowerCase(Locale.US).endsWith(".wmv"); } public static boolean isASFFile(File f) { return isASFFile(f.getName()); } public static boolean isASFFile(String in) { in = in.toLowerCase(Locale.US); return in.endsWith(".asf") || in.endsWith(".wm"); } public static boolean isSupportedAudioFormat(File file) { return isSupportedAudioFormat(file.getName()); } public static boolean isSupportedAudioFormat(String file) { return isMP3File(file) || isOGGFile(file) || isM4AFile(file) || isWMAFile(file) || isFLACFile(file); } public static boolean isSupportedVideoFormat(File file) { return isSupportedVideoFormat(file.getName()); } public static boolean isSupportedVideoFormat(String file) { return isRIFFFile(file) || isOGMFile(file) || isWMVFile(file); } public static boolean isSupportedMultipleFormat(File file) { return isSupportedMultipleFormat(file.getName()); } public static boolean isSupportedMultipleFormat(String file) { return isASFFile(file); } public static boolean isSupportedFormat(File file) { return isSupportedFormat(file.getName()); } public static boolean isSupportedFormat(String file) { return isSupportedAudioFormat(file) || isSupportedVideoFormat(file) || isSupportedMultipleFormat(file); } /** * @return whether LimeWire supports writing metadata into the file of specific type. * (we may be able to parse the metadata, but not annotate it) */ public static boolean isEditableFormat(File file) { return isEditableFormat(file.getName()); } public static boolean isEditableFormat(String file) { return isMP3File(file) || isOGGFile(file); } public static boolean isSupportedFormatForSchema(File file, String schemaURI) { if(isSupportedMultipleFormat(file)) return true; else if("http://www.limewire.com/schemas/audio.xsd".equals(schemaURI)) return isSupportedAudioFormat(file); else if("http://www.limewire.com/schemas/video.xsd".equals(schemaURI)) return isSupportedVideoFormat(file); else return false; } public static boolean isFilePublishable(File file) { return isMP3File(file.getName()) || isOGGFile(file.getName()); } /** * Parses the passed string, and encodes the special characters (used in * xml for special purposes) with the appropriate codes. * e.g. '<' is changed to '<' * @return the encoded string. Returns null, if null is passed as argument */ public static String encodeXML(String inData) { //return null, if null is passed as argument if(inData == null) return null; //if no special characters, just return //(for optimization. Though may be an overhead, but for most of the //strings, this will save time) if((inData.indexOf('&') == -1) && (inData.indexOf('<') == -1) && (inData.indexOf('>') == -1) && (inData.indexOf('\'') == -1) && (inData.indexOf('\"') == -1)) { return inData; } //get the length of input String int length = inData.length(); //create a StringBuffer of double the size (size is just for guidance //so as to reduce increase-capacity operations. The actual size of //the resulting string may be even greater than we specified, but is //extremely rare) StringBuffer buffer = new StringBuffer(2 * length); char charToCompare; //iterate over the input String for(int i=0; i < length; i++) { charToCompare = inData.charAt(i); //if the ith character is special character, replace by code if(charToCompare == '&') { buffer.append("&"); } else if(charToCompare == '<') { buffer.append("<"); } else if(charToCompare == '>') { buffer.append(">"); } else if(charToCompare == '\"') { buffer.append("""); } else if(charToCompare == '\'') { buffer.append("'"); } else { buffer.append(charToCompare); } } //return the encoded string return buffer.toString(); } /** @return A properly formatted version of the input data. */ public static byte[] compress(byte[] data) { byte[] compressedData = null; if (shouldCompress(data)) compressedData = compressZLIB(data); byte[] retBytes = null; if (compressedData != null) { retBytes = new byte[COMPRESS_HEADER_ZLIB.length() + compressedData.length]; System.arraycopy(COMPRESS_HEADER_ZLIB.getBytes(), 0, retBytes, 0, COMPRESS_HEADER_ZLIB.length()); System.arraycopy(compressedData, 0, retBytes, COMPRESS_HEADER_ZLIB.length(), compressedData.length); } else { // essentially compress failed, just send prefixed raw data.... retBytes = new byte[COMPRESS_HEADER_NONE.length() + data.length]; System.arraycopy(COMPRESS_HEADER_NONE.getBytes(), 0, retBytes, 0, COMPRESS_HEADER_NONE.length()); System.arraycopy(data, 0, retBytes, COMPRESS_HEADER_NONE.length(), data.length); } return retBytes; } /** Currently, all data is compressed. In the future, this will handle * heuristics about whether data should be compressed or not. */ private static boolean shouldCompress(byte[] data) { if (data.length >= 1000) return true; else return false; } /** Returns a ZLIB'ed version of data. */ private static byte[] compressZLIB(byte[] data) { DeflaterOutputStream gos = null; try { ByteArrayOutputStream baos=new ByteArrayOutputStream(); gos=new DeflaterOutputStream(baos); gos.write(data, 0, data.length); gos.flush(); gos.close(); // required to flush data -- flush doesn't do it. // System.out.println("compression savings: " + ((1-((double)baos.toByteArray().length/(double)data.length))*100) + "%"); return baos.toByteArray(); } catch (IOException e) { //This should REALLY never happen because no devices are involved. //But could we propogate it up. Assert.that(false, "Couldn't write to byte stream"); return null; } finally { IOUtils.close(gos); } } /** Returns a GZIP'ed version of data. */ /* private static byte[] compressGZIP(byte[] data) { try { ByteArrayOutputStream baos=new ByteArrayOutputStream(); DeflaterOutputStream gos=new GZIPOutputStream(baos); gos.write(data, 0, data.length); gos.flush(); gos.close(); //flushes bytes // System.out.println("compression savings: " + ((1-((double)baos.toByteArray().length/(double)data.length))*100) + "%"); return baos.toByteArray(); } catch (IOException e) { //This should REALLY never happen because no devices are involved. //But could we propogate it up. Assert.that(false, "Couldn't write to byte stream"); return null; } } */ /** @return Correctly uncompressed data (according to Content-Type header) * May return a byte[] of length 0 if something bad happens. */ public static byte[] uncompress(byte[] data) throws IOException { byte[] retBytes = new byte[0]; String headerFragment = new String(data, 0, C_HEADER_BEGIN.length()); if (headerFragment.equals(C_HEADER_BEGIN)) { // we have well formed input (so far) boolean found = false; int i=0; for(; i<data.length && !found; i++) if(data[i]==(byte)125) found = true; //We know know that "{" is at 1 because we are in this if block headerFragment = new String(data,1,i-1-1); int comp = getCompressionType(headerFragment); if (comp == NONE) { retBytes = new byte[data.length-(headerFragment.length()+2)]; System.arraycopy(data, i, retBytes, 0, data.length-(headerFragment.length()+2)); } else if (comp == GZIP) { retBytes = new byte[data.length-COMPRESS_HEADER_GZIP.length()]; System.arraycopy(data, COMPRESS_HEADER_GZIP.length(), retBytes, 0, data.length-COMPRESS_HEADER_GZIP.length()); retBytes = uncompressGZIP(retBytes); } else if (comp == ZLIB) { retBytes = new byte[data.length-COMPRESS_HEADER_ZLIB.length()]; System.arraycopy(data, COMPRESS_HEADER_ZLIB.length(), retBytes, 0, data.length-COMPRESS_HEADER_ZLIB.length()); retBytes = uncompressZLIB(retBytes); } else ; // uncompressible XML, just drop it on the floor.... } else return data; // the Content-Type header is optional, assumes PT return retBytes; } private static int getCompressionType(String header) { String s = header.trim(); if(s.equals("") || s.equalsIgnoreCase(C_HEADER_NONE_VAL)) return NONE; else if(s.equalsIgnoreCase(C_HEADER_GZIP_VAL)) return GZIP; else if(s.equalsIgnoreCase(C_HEADER_ZLIB_VAL)) return ZLIB; else return -1; } /** Returns the uncompressed version of the given ZLIB'ed bytes. Throws * IOException if the data is corrupt. */ private static byte[] uncompressGZIP(byte[] data) throws IOException { ByteArrayInputStream bais=new ByteArrayInputStream(data); InflaterInputStream gis = null; try { gis =new GZIPInputStream(bais); ByteArrayOutputStream baos=new ByteArrayOutputStream(); while (true) { int b=gis.read(); if (b==-1) break; baos.write(b); } return baos.toByteArray(); } finally { IOUtils.close(gis); } } /** Returns the uncompressed version of the given ZLIB'ed bytes. Throws * IOException if the data is corrupt. */ private static byte[] uncompressZLIB(byte[] data) throws IOException { ByteArrayInputStream bais=new ByteArrayInputStream(data); InflaterInputStream gis = null; try { gis =new InflaterInputStream(bais); ByteArrayOutputStream baos=new ByteArrayOutputStream(); while (true) { int b=gis.read(); if (b==-1) break; baos.write(b); } return baos.toByteArray(); } finally { IOUtils.close(gis); } } private static final int NUM_BYTES_TO_HASH = 100; private static final int NUM_TOTAL_HASH = NUM_BYTES_TO_HASH*3; private static void clearHashBytes(byte[] hashBytes) { for (int i = 0; i < NUM_BYTES_TO_HASH; i++) hashBytes[i] = (byte)0; } /** * Hashes the file using bits and pieces of the file. * * @return The SHA hash bytes of the input bytes. * @throws IOException if hashing failed for any reason. */ public static byte[] hashFile(File toHash) throws IOException { byte[] retBytes = null; FileInputStream fis = null; byte[] hashBytes = new byte[NUM_BYTES_TO_HASH]; try { // setup fis = new FileInputStream(toHash); MessageDigest md = null; try { md = MessageDigest.getInstance("SHA"); } catch(NoSuchAlgorithmException nsae) { Assert.that(false, "no sha algorithm."); } long fileLength = toHash.length(); if (fileLength < NUM_TOTAL_HASH) { int numRead = 0; do { clearHashBytes(hashBytes); numRead = fis.read(hashBytes); md.update(hashBytes); // if the file changed underneath me, throw away... if (toHash.length() != fileLength) throw new IOException("invalid length"); } while (numRead == NUM_BYTES_TO_HASH); } else { // need to do some mathy stuff....... long thirds = fileLength / 3; // beginning input.... clearHashBytes(hashBytes); fis.read(hashBytes); md.update(hashBytes); // if the file changed underneath me, throw away... if (toHash.length() != fileLength) throw new IOException("invalid length"); // middle input... clearHashBytes(hashBytes); fis.skip(thirds - NUM_BYTES_TO_HASH); fis.read(hashBytes); md.update(hashBytes); // if the file changed underneath me, throw away... if (toHash.length() != fileLength) throw new IOException("invalid length"); // ending input.... clearHashBytes(hashBytes); fis.skip(toHash.length() - (thirds + NUM_BYTES_TO_HASH) - NUM_BYTES_TO_HASH); fis.read(hashBytes); md.update(hashBytes); // if the file changed underneath me, throw away... if (toHash.length() != fileLength) throw new IOException("invalid length"); } retBytes = md.digest(); } finally { if (fis != null) fis.close(); } return retBytes; } }