/*
* XMLUtils.java
*
* Created on April 30, 2001, 4:51 PM
*/
package com.limegroup.gnutella.xml;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.io.Writer;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Iterator;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.zip.DeflaterOutputStream;
import java.util.zip.GZIPInputStream;
import java.util.zip.InflaterInputStream;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import com.limegroup.gnutella.Assert;
import com.limegroup.gnutella.util.I18NConvert;
import com.limegroup.gnutella.util.IOUtils;
/**
* Contains utility methods
* @author asingla
*/
public class LimeXMLUtils {
private static final double MATCHING_RATE = .9;
private static final String C_HEADER_BEGIN = "{";
private static final String C_HEADER_END = "}";
private static final String C_HEADER_NONE_VAL = "plaintext";
private static final String C_HEADER_ZLIB_VAL = "deflate";
private static final String C_HEADER_GZIP_VAL = "gzip";
private static final String COMPRESS_HEADER_ZLIB =
C_HEADER_BEGIN + C_HEADER_ZLIB_VAL + C_HEADER_END;
private static final String COMPRESS_HEADER_GZIP =
C_HEADER_BEGIN + C_HEADER_GZIP_VAL + C_HEADER_END;
private static final String COMPRESS_HEADER_NONE =
C_HEADER_BEGIN + C_HEADER_END;
private static final int NONE = 0;
private static final int GZIP = 1;
private static final int ZLIB = 2;
public static final String AUDIO_BITRATE_ATTR = "audios__audio__bitrate__";
/**
* Returns an instance of InputSource after reading the file, and trimming
* the extraneous white spaces.
* @param file The file from where to read
* @return The instance of InputSource created from the passed file
* @exception IOException If file doesnt get opened or other I/O problems
*/
public static InputSource getInputSource(File file) throws IOException {
//open the file, read it, and derive the structure, store internally
StringBuffer sb = new StringBuffer();
String line = "";
//open the file
BufferedReader br = null;
try {
br = new BufferedReader(new FileReader(file));
while(line != null)
{
//read a line from file
line = br.readLine();
if(line != null)
{
//append the line (along with the newline that got removed)
sb.append(line + "\n");
}
}
} finally {
if( br != null)
br.close();
}
//get & return the input source
return new InputSource(new StringReader(sb.toString()));
}
/**
* Gets the text content of the child nodes.
* This is the same as Node.getTextContent(), but exists on all
* JDKs.
*/
public static String getTextContent(Node node) {
return getText(node.getChildNodes());
}
/**
* Collapses a list of CDATASection, Text, and predefined EntityReference
* nodes into a single string. If the list contains other types of nodes,
* those other nodes are ignored.
*/
public static String getText(NodeList nodeList) {
StringBuffer buffer = new StringBuffer();
for(int i = 0; i < nodeList.getLength(); i++) {
Node node = nodeList.item(i);
switch(node.getNodeType()) {
case Node.CDATA_SECTION_NODE :
case Node.TEXT_NODE :
buffer.append(node.getNodeValue());
break;
case Node.ENTITY_REFERENCE_NODE :
if(node.getNodeName().equals("amp"))
buffer.append('&');
else if(node.getNodeName().equals("lt"))
buffer.append('<');
else if(node.getNodeName().equals("gt"))
buffer.append('>');
else if(node.getNodeName().equals("apos"))
buffer.append('\'');
else if(node.getNodeName().equals("quot"))
buffer.append('"');
// Any other entity references are ignored
break;
default :
// All other nodes are ignored
}
}
return buffer.toString();
}
/**
* Writes <CODE>string</CODE> into writer, escaping &, ', ", <, and >
* with the XML excape strings.
*/
public static void writeEscapedString(Writer writer, String string)
throws IOException {
for(int i = 0; i < string.length(); i++) {
char c = string.charAt(i);
if(c == '<')
writer.write("<");
else if(c == '>')
writer.write(">");
else if(c == '&')
writer.write("&");
else if(c == '\'')
writer.write("'");
else if(c == '"')
writer.write(""");
else
writer.write(c);
}
}
/**
* Reads all the bytes from the passed input stream till end of stream
* reached.
* @param in The input stream to read from
* @return array of bytes read
* @exception IOException If any I/O exception occurs while reading data
*/
public static byte[] readFully(InputStream in) throws IOException {
//create a new byte array stream to store the read data
ByteArrayOutputStream byteArray = new ByteArrayOutputStream();
//read the bytes till EOF
byte[] buffer = new byte[1024];
int bytesRead;
while((bytesRead = in.read(buffer)) != -1)
{
//append the bytes read to the byteArray buffer
byteArray.write(buffer,0,bytesRead);
}
//return the bytes read
return byteArray.toByteArray();
}
/**
* Compares the queryDoc with the replyDoc and finds out if the
* replyDoc is a match for the queryDoc
* @param queryDoc The query Document
* @param replyDoc potential reply Document
* @return true if the replyDoc is a match for the queryDoc, false
* otherwise
*/
public static boolean match(LimeXMLDocument replyDoc,
LimeXMLDocument queryDoc,
boolean allowAllNulls) {
if(queryDoc == null || replyDoc == null)
throw new NullPointerException("querying with null doc.");
//First find the names of all the fields in the query
Set queryNameValues = queryDoc.getNameValueSet();
int size = queryNameValues.size();
int matchCount = 0; // number of matches
int nullCount = 0; // number of fields in query not in replyDoc.
boolean matchedBitrate = false;
for (Iterator i = queryNameValues.iterator(); i.hasNext(); ) {
Map.Entry entry = (Map.Entry)i.next();
String currFieldName = (String)entry.getKey();
String queryValue = (String)entry.getValue();
Assert.that(queryValue != null, "null value");
if (queryValue.equals(""))
continue; // "" matches everything!!
String replyDocValue = replyDoc.getValue(currFieldName);
if (currFieldName.endsWith("license_type__") && queryValue.length() > 0) {
if (replyDocValue == null || !replyDocValue.startsWith(queryValue))
return false;
}
if (replyDocValue == null || replyDocValue.equals(""))
nullCount++;
else {
try {
// if this is a parse-able numeric value, doing a prefix
// matching doesn't make sense. cast it to a double and do
// a straight equals comparison
double rDVD = (new Double(replyDocValue)).doubleValue();
double qVD = (new Double(queryValue)).doubleValue();
if (rDVD == qVD) {
matchCount++;
if (currFieldName.equals(AUDIO_BITRATE_ATTR))
matchedBitrate = true;
}
continue;
} catch (NumberFormatException nfe) {
// just roll through and try to do a normal test...
}
// we used to do a .equalsIgnoreCase, but that is a little too
// rigid. so do a ignore case prefix match.
String queryValueLC = queryValue.toLowerCase(Locale.US);
String replyDocValueLC = I18NConvert.instance().getNorm(replyDocValue);
if (replyDocValueLC.startsWith(queryValueLC) ||
replyDocValueLC.indexOf(" " + queryValueLC) >= 0)
matchCount++;
}
}
// The metric of a correct match is that whatever fields are specified
// in the query must have prefix match* with the fields in the reply
// unless the reply has a null for that feild, in which case we are OK
// with letting it slide. also, %MATCHING_RATE of the fields must
// either be a prefix match or null.
// We make an exception for queries of size 1 field. In this case, there
// must be a 100% match (which is trivially >= %MATCHING_RATE)
// * prefix match assumes a string; for numerics just do an equality test
double sizeD = size;
double matchCountD = matchCount;
double nullCountD = nullCount;
if (size > 1) {
if (matchedBitrate) {
// discount a bitrate match. matching bitrate's shouldn't
// influence the logic because where size is 2, a matching
// bitrate will result in a lot of irrelevant results.
sizeD--;
matchCountD--;
matchCount--;
}
if (((nullCountD + matchCountD)/sizeD) < MATCHING_RATE)
return false;
// ok, it passed rate test, now make sure it had SOME matches...
if (allowAllNulls || matchCount > 0)
return true;
else
return false;
}
else if (size == 1) {
if(allowAllNulls && nullCount == 1)
return true;
if(matchCountD/sizeD < 1)
return false;
return true;
}
//this should never happen - size >0
return false;
}
public static boolean isMP3File(File in) {
return isMP3File(in.getName());
}
public static boolean isMP3File(String in) {
return in.toLowerCase(Locale.US).endsWith(".mp3");
}
public static boolean isRIFFFile(File f) {
return isRIFFFile(f.getName());
}
public static boolean isRIFFFile(String in) {
return in.toLowerCase(Locale.US).endsWith(".avi");
}
public static boolean isOGMFile(File f) {
return isOGMFile(f.getName());
}
public static boolean isOGMFile(String in) {
return in.toLowerCase(Locale.US).endsWith(".ogm");
}
public static boolean isOGGFile(File in) {
return isOGGFile(in.getName());
}
public static boolean isOGGFile(String in) {
return in.toLowerCase(Locale.US).endsWith(".ogg");
}
public static boolean isFLACFile(File in) {
return isFLACFile(in.getName());
}
public static boolean isFLACFile(String in) {
in = in.toLowerCase(Locale.US);
return in.endsWith(".flac") || in.endsWith(".fla");
}
public static boolean isM4AFile(File in) {
return isM4AFile(in.getName());
}
public static boolean isM4AFile(String in) {
in = in.toLowerCase(Locale.US);
return in.endsWith(".m4a")|| in.endsWith(".m4p");
}
public static boolean isWMAFile(File f) {
return isWMAFile(f.getName());
}
public static boolean isWMAFile(String in) {
return in.toLowerCase(Locale.US).endsWith(".wma");
}
public static boolean isWMVFile(File f) {
return isWMVFile(f.getName());
}
public static boolean isWMVFile(String in) {
return in.toLowerCase(Locale.US).endsWith(".wmv");
}
public static boolean isASFFile(File f) {
return isASFFile(f.getName());
}
public static boolean isASFFile(String in) {
in = in.toLowerCase(Locale.US);
return in.endsWith(".asf") || in.endsWith(".wm");
}
public static boolean isSupportedAudioFormat(File file) {
return isSupportedAudioFormat(file.getName());
}
public static boolean isSupportedAudioFormat(String file) {
return isMP3File(file) || isOGGFile(file) || isM4AFile(file) || isWMAFile(file) || isFLACFile(file);
}
public static boolean isSupportedVideoFormat(File file) {
return isSupportedVideoFormat(file.getName());
}
public static boolean isSupportedVideoFormat(String file) {
return isRIFFFile(file) || isOGMFile(file) || isWMVFile(file);
}
public static boolean isSupportedMultipleFormat(File file) {
return isSupportedMultipleFormat(file.getName());
}
public static boolean isSupportedMultipleFormat(String file) {
return isASFFile(file);
}
public static boolean isSupportedFormat(File file) {
return isSupportedFormat(file.getName());
}
public static boolean isSupportedFormat(String file) {
return isSupportedAudioFormat(file) || isSupportedVideoFormat(file) || isSupportedMultipleFormat(file);
}
/**
* @return whether LimeWire supports writing metadata into the file of specific type.
* (we may be able to parse the metadata, but not annotate it)
*/
public static boolean isEditableFormat(File file) {
return isEditableFormat(file.getName());
}
public static boolean isEditableFormat(String file) {
return isMP3File(file) || isOGGFile(file);
}
public static boolean isSupportedFormatForSchema(File file, String schemaURI) {
if(isSupportedMultipleFormat(file))
return true;
else if("http://www.limewire.com/schemas/audio.xsd".equals(schemaURI))
return isSupportedAudioFormat(file);
else if("http://www.limewire.com/schemas/video.xsd".equals(schemaURI))
return isSupportedVideoFormat(file);
else
return false;
}
public static boolean isFilePublishable(File file) {
return isMP3File(file.getName()) || isOGGFile(file.getName());
}
/**
* Parses the passed string, and encodes the special characters (used in
* xml for special purposes) with the appropriate codes.
* e.g. '<' is changed to '<'
* @return the encoded string. Returns null, if null is passed as argument
*/
public static String encodeXML(String inData)
{
//return null, if null is passed as argument
if(inData == null)
return null;
//if no special characters, just return
//(for optimization. Though may be an overhead, but for most of the
//strings, this will save time)
if((inData.indexOf('&') == -1)
&& (inData.indexOf('<') == -1)
&& (inData.indexOf('>') == -1)
&& (inData.indexOf('\'') == -1)
&& (inData.indexOf('\"') == -1))
{
return inData;
}
//get the length of input String
int length = inData.length();
//create a StringBuffer of double the size (size is just for guidance
//so as to reduce increase-capacity operations. The actual size of
//the resulting string may be even greater than we specified, but is
//extremely rare)
StringBuffer buffer = new StringBuffer(2 * length);
char charToCompare;
//iterate over the input String
for(int i=0; i < length; i++)
{
charToCompare = inData.charAt(i);
//if the ith character is special character, replace by code
if(charToCompare == '&')
{
buffer.append("&");
}
else if(charToCompare == '<')
{
buffer.append("<");
}
else if(charToCompare == '>')
{
buffer.append(">");
}
else if(charToCompare == '\"')
{
buffer.append(""");
}
else if(charToCompare == '\'')
{
buffer.append("'");
}
else
{
buffer.append(charToCompare);
}
}
//return the encoded string
return buffer.toString();
}
/** @return A properly formatted version of the input data.
*/
public static byte[] compress(byte[] data) {
byte[] compressedData = null;
if (shouldCompress(data))
compressedData = compressZLIB(data);
byte[] retBytes = null;
if (compressedData != null) {
retBytes = new byte[COMPRESS_HEADER_ZLIB.length() +
compressedData.length];
System.arraycopy(COMPRESS_HEADER_ZLIB.getBytes(),
0,
retBytes,
0,
COMPRESS_HEADER_ZLIB.length());
System.arraycopy(compressedData, 0,
retBytes, COMPRESS_HEADER_ZLIB.length(),
compressedData.length);
}
else { // essentially compress failed, just send prefixed raw data....
retBytes = new byte[COMPRESS_HEADER_NONE.length() +
data.length];
System.arraycopy(COMPRESS_HEADER_NONE.getBytes(),
0,
retBytes,
0,
COMPRESS_HEADER_NONE.length());
System.arraycopy(data, 0,
retBytes, COMPRESS_HEADER_NONE.length(),
data.length);
}
return retBytes;
}
/** Currently, all data is compressed. In the future, this will handle
* heuristics about whether data should be compressed or not.
*/
private static boolean shouldCompress(byte[] data) {
if (data.length >= 1000)
return true;
else
return false;
}
/** Returns a ZLIB'ed version of data. */
private static byte[] compressZLIB(byte[] data) {
DeflaterOutputStream gos = null;
try {
ByteArrayOutputStream baos=new ByteArrayOutputStream();
gos=new DeflaterOutputStream(baos);
gos.write(data, 0, data.length);
gos.flush();
gos.close(); // required to flush data -- flush doesn't do it.
// System.out.println("compression savings: " + ((1-((double)baos.toByteArray().length/(double)data.length))*100) + "%");
return baos.toByteArray();
} catch (IOException e) {
//This should REALLY never happen because no devices are involved.
//But could we propogate it up.
Assert.that(false, "Couldn't write to byte stream");
return null;
} finally {
IOUtils.close(gos);
}
}
/** Returns a GZIP'ed version of data. */
/*
private static byte[] compressGZIP(byte[] data) {
try {
ByteArrayOutputStream baos=new ByteArrayOutputStream();
DeflaterOutputStream gos=new GZIPOutputStream(baos);
gos.write(data, 0, data.length);
gos.flush();
gos.close(); //flushes bytes
// System.out.println("compression savings: " + ((1-((double)baos.toByteArray().length/(double)data.length))*100) + "%");
return baos.toByteArray();
} catch (IOException e) {
//This should REALLY never happen because no devices are involved.
//But could we propogate it up.
Assert.that(false, "Couldn't write to byte stream");
return null;
}
} */
/** @return Correctly uncompressed data (according to Content-Type header)
* May return a byte[] of length 0 if something bad happens.
*/
public static byte[] uncompress(byte[] data) throws IOException {
byte[] retBytes = new byte[0];
String headerFragment = new String(data, 0,
C_HEADER_BEGIN.length());
if (headerFragment.equals(C_HEADER_BEGIN)) {
// we have well formed input (so far)
boolean found = false;
int i=0;
for(; i<data.length && !found; i++)
if(data[i]==(byte)125)
found = true;
//We know know that "{" is at 1 because we are in this if block
headerFragment = new String(data,1,i-1-1);
int comp = getCompressionType(headerFragment);
if (comp == NONE) {
retBytes = new byte[data.length-(headerFragment.length()+2)];
System.arraycopy(data,
i,
retBytes,
0,
data.length-(headerFragment.length()+2));
}
else if (comp == GZIP) {
retBytes = new byte[data.length-COMPRESS_HEADER_GZIP.length()];
System.arraycopy(data,
COMPRESS_HEADER_GZIP.length(),
retBytes,
0,
data.length-COMPRESS_HEADER_GZIP.length());
retBytes = uncompressGZIP(retBytes);
}
else if (comp == ZLIB) {
retBytes = new byte[data.length-COMPRESS_HEADER_ZLIB.length()];
System.arraycopy(data,
COMPRESS_HEADER_ZLIB.length(),
retBytes,
0,
data.length-COMPRESS_HEADER_ZLIB.length());
retBytes = uncompressZLIB(retBytes);
}
else
; // uncompressible XML, just drop it on the floor....
}
else
return data; // the Content-Type header is optional, assumes PT
return retBytes;
}
private static int getCompressionType(String header) {
String s = header.trim();
if(s.equals("") || s.equalsIgnoreCase(C_HEADER_NONE_VAL))
return NONE;
else if(s.equalsIgnoreCase(C_HEADER_GZIP_VAL))
return GZIP;
else if(s.equalsIgnoreCase(C_HEADER_ZLIB_VAL))
return ZLIB;
else
return -1;
}
/** Returns the uncompressed version of the given ZLIB'ed bytes. Throws
* IOException if the data is corrupt. */
private static byte[] uncompressGZIP(byte[] data) throws IOException {
ByteArrayInputStream bais=new ByteArrayInputStream(data);
InflaterInputStream gis = null;
try {
gis =new GZIPInputStream(bais);
ByteArrayOutputStream baos=new ByteArrayOutputStream();
while (true) {
int b=gis.read();
if (b==-1)
break;
baos.write(b);
}
return baos.toByteArray();
} finally {
IOUtils.close(gis);
}
}
/** Returns the uncompressed version of the given ZLIB'ed bytes. Throws
* IOException if the data is corrupt. */
private static byte[] uncompressZLIB(byte[] data) throws IOException {
ByteArrayInputStream bais=new ByteArrayInputStream(data);
InflaterInputStream gis = null;
try {
gis =new InflaterInputStream(bais);
ByteArrayOutputStream baos=new ByteArrayOutputStream();
while (true) {
int b=gis.read();
if (b==-1)
break;
baos.write(b);
}
return baos.toByteArray();
} finally {
IOUtils.close(gis);
}
}
private static final int NUM_BYTES_TO_HASH = 100;
private static final int NUM_TOTAL_HASH = NUM_BYTES_TO_HASH*3;
private static void clearHashBytes(byte[] hashBytes) {
for (int i = 0; i < NUM_BYTES_TO_HASH; i++)
hashBytes[i] = (byte)0;
}
/**
* Hashes the file using bits and pieces of the file.
*
* @return The SHA hash bytes of the input bytes.
* @throws IOException if hashing failed for any reason.
*/
public static byte[] hashFile(File toHash) throws IOException {
byte[] retBytes = null;
FileInputStream fis = null;
byte[] hashBytes = new byte[NUM_BYTES_TO_HASH];
try {
// setup
fis = new FileInputStream(toHash);
MessageDigest md = null;
try {
md = MessageDigest.getInstance("SHA");
} catch(NoSuchAlgorithmException nsae) {
Assert.that(false, "no sha algorithm.");
}
long fileLength = toHash.length();
if (fileLength < NUM_TOTAL_HASH) {
int numRead = 0;
do {
clearHashBytes(hashBytes);
numRead = fis.read(hashBytes);
md.update(hashBytes);
// if the file changed underneath me, throw away...
if (toHash.length() != fileLength)
throw new IOException("invalid length");
} while (numRead == NUM_BYTES_TO_HASH);
}
else { // need to do some mathy stuff.......
long thirds = fileLength / 3;
// beginning input....
clearHashBytes(hashBytes);
fis.read(hashBytes);
md.update(hashBytes);
// if the file changed underneath me, throw away...
if (toHash.length() != fileLength)
throw new IOException("invalid length");
// middle input...
clearHashBytes(hashBytes);
fis.skip(thirds - NUM_BYTES_TO_HASH);
fis.read(hashBytes);
md.update(hashBytes);
// if the file changed underneath me, throw away...
if (toHash.length() != fileLength)
throw new IOException("invalid length");
// ending input....
clearHashBytes(hashBytes);
fis.skip(toHash.length() -
(thirds + NUM_BYTES_TO_HASH) -
NUM_BYTES_TO_HASH);
fis.read(hashBytes);
md.update(hashBytes);
// if the file changed underneath me, throw away...
if (toHash.length() != fileLength)
throw new IOException("invalid length");
}
retBytes = md.digest();
} finally {
if (fis != null)
fis.close();
}
return retBytes;
}
}