package com.limegroup.gnutella.xml; import java.io.File; import java.io.IOException; import java.io.Serializable; import java.io.StringReader; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import com.limegroup.gnutella.util.NameValue; import com.limegroup.gnutella.licenses.CCConstants; import com.limegroup.gnutella.licenses.License; import com.limegroup.gnutella.licenses.LicenseConstants; import com.limegroup.gnutella.licenses.LicenseFactory; import com.limegroup.gnutella.metadata.WeedInfo; import com.limegroup.gnutella.metadata.WRMXML; import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.Log; /** * @author Sumeet Thadani * A LimeXMLDocument is basically a hashmap that maps a * Names of fields to the values as per a XML document. */ public class LimeXMLDocument implements Serializable { private static final Log LOG = LogFactory.getLog(LimeXMLDocument.class); public static final String XML_ID_ATTRIBUTE = "identifier__"; public static final String XML_ACTION_ATTRIBUTE = "action__"; public static final String XML_INDEX_ATTRIBUTE = "index__"; public static final String XML_LICENSE_ATTRIBUTE = "license__"; public static final String XML_LICENSE_TYPE_ATTRIBUTE = "licensetype__"; /** * The current version of LimeXMLDocuments. * * Increment this number as features are added which require * reparsing documents on disk. */ private static final int CURRENT_VERSION = 2; /** * Cached hash code for this instance. */ private volatile transient int hashCode = 0; /** For backwards compatibility with downloads.dat. */ private static final long serialVersionUID = 7396170507085078485L; //TODO2: Need to build in the ability to work with multiple instances //of some fields. /** * Map of canonical attribute name -> value. */ private Map fieldToValue = new HashMap(); /** * The schema of this LimeXMLDocument. */ private String schemaUri; /** * The cached string of attributes. */ private transient String attributeString; /** * The file this is related to. Can be null if pure meta-data. */ private transient File fileId; /** * The action that this doc has. */ private transient String action; /** * The version of this LimeXMLDocument. */ private int version = CURRENT_VERSION; boolean isCurrent() { return version == CURRENT_VERSION; } void setCurrent() { version = CURRENT_VERSION; } /** * Cached list of keywords. Because keywords are only filled up * upon construction, they can be cached upon retrieval. */ private transient List CACHED_KEYWORDS = null; /** The kind of license this has. */ private transient int licenseType = LicenseConstants.NO_LICENSE; /** * Constructs a LimeXMLDocument with the given string. */ public LimeXMLDocument(String xml) throws SAXException, SchemaNotFoundException, IOException { if(xml==null || xml.equals("")) throw new SAXException("null or empty string"); InputSource doc = new InputSource(new StringReader(xml)); XMLParsingUtils.ParseResult result = XMLParsingUtils.parse(doc); if (result.isEmpty()) throw new IOException("No element present"); if (result.schemaURI == null) throw new SchemaNotFoundException("no schema"); this.fieldToValue = (Map)result.get(0); this.schemaUri = result.schemaURI; setFields(result.canonicalKeyPrefix); if(!isValid()) throw new IOException("Invalid XML: " + xml); } /** * Constructs a new LimeXMLDocument * @param map Map with keys in canonicalized * form and corresponding values that will be used to create the * new instance * @param schemaURI The schema URI for the LimeXMLDocument to be * created */ LimeXMLDocument(Map map, String schemaURI, String keyPrefix) throws IOException { if(map.isEmpty()) throw new IllegalArgumentException("empty map"); this.schemaUri = schemaURI; this.fieldToValue = map; fieldToValue.remove(keyPrefix + XML_ID_ATTRIBUTE); // remove id. setFields(keyPrefix); if(!isValid()) throw new IOException("invalid doc! "+map+" \nschema uri: "+schemaURI); } /** * Constructs a new LimeXMLDocument * @param nameValueList List (of Map.Entry) of fieldnames (in canonicalized * form) and corresponding values that will be used to create the * new instance * @param schemaURI The schema URI for the LimeXMLDocument to be * created */ public LimeXMLDocument(Collection nameValueList, String schemaURI) { if(nameValueList.isEmpty()) throw new IllegalArgumentException("empty list"); //set the schema URI this.schemaUri = schemaURI; //iterate over the passed list of fieldnames & values for(Iterator i = nameValueList.iterator(); i.hasNext(); ) { Map.Entry next = (Map.Entry)i.next(); String name = (String)next.getKey(); Object value = next.getValue(); fieldToValue.put(name.trim(), value); } // scan for action/id/etc.. scanFields(); if(!isValid()) throw new IllegalArgumentException("Invalid Doc!"); } /** * Determines whether or not this LimeXMLDocument is valid. */ boolean isValid() { // no schemaURI or the schemaURI doesn't map to a LimeXMLSchema if(schemaUri == null || getSchema() == null) return false; // no valid attributes. if(getAttributeString().length() == 0) return false; return true; } /** * Reads the object and initializes transient fields. */ private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException { in.defaultReadObject(); scanFields(); } /** * Returns the number of fields this document has. */ public int getNumFields() { return fieldToValue.size(); } /** * Returns all the non-numeric fields in this. These are * not necessarily QRP keywords. For example, one of the * elements of the returned list may be "Some comment-blah". * QRP code may want to split this into the QRP keywords * "Some", "comment", and "blah". * * Indivisible keywords are not returned. To retrieve those, * use getIndivisibleKeywords(). Indivisible keywords are * those which QRP will not split up. */ public List getKeyWords() { if( CACHED_KEYWORDS != null ) return CACHED_KEYWORDS; List retList = new ArrayList(); Iterator iter = fieldToValue.keySet().iterator(); while(iter.hasNext()){ String currKey = (String) iter.next(); String val = (String) fieldToValue.get(currKey); if(val != null && !val.equals("") && !isIndivisible(currKey)) { try { Double.parseDouble(val); // will trigger NFE. } catch(NumberFormatException ignored) { retList.add(val); } } } CACHED_KEYWORDS = retList; return retList; } /** * Returns all the indivisible keywords for entry into QRP tables. */ public List getKeyWordsIndivisible() { return LicenseConstants.getIndivisible(licenseType); } /** * Determines if this keyword & value is indivisible * (thus making QRP not split it). */ private boolean isIndivisible(String currKey) { //the license-type is always indivisible. //note that for weed licenses, this works because getKeyWordsIndivisible //is returning a list of only 'WeedInfo.LAINFO'. the content-id & version-id //are essentially lost & ignored. return currKey.endsWith(XML_LICENSE_TYPE_ATTRIBUTE); } /** * Returns the unique identifier which identifies the schema this XML * document conforms to */ public String getSchemaURI() { return schemaUri; } /** * Returns the LimeXMLSchema associated with this XML document. */ public LimeXMLSchema getSchema() { return LimeXMLSchemaRepository.instance().getSchema(schemaUri); } /** * Returns the description of the schema URI. */ public String getSchemaDescription() { LimeXMLSchema schema = getSchema(); if(schema != null) return schema.getDescription(); else return LimeXMLSchema.getDisplayString(schemaUri); } /** * Returns the name of the file that the data in this XML document * corresponds to. If the meta-data does not correspond to any file * in the file system, this method will rerurn a null. */ public File getIdentifier() { return fileId; } /** * Sets the identifier. */ public void setIdentifier(File id) { fileId = id; } /** * Returns the action corresponding with this LimeXMLDocument. */ public String getAction() { if(action == null) return ""; else return action; } /** * Returns a Set of Map.Entry, where each key-value corresponds to a * Canonicalized field name (placeholder), and its corresponding value in * the XML Document. * <p> * Canonicalization: * <p> * So as to preserve the structure, Structure.Field will be represented as * Structure__Field (Double Underscore is being used as a delimiter to * represent the structure). *<p> * In case of multiple structured values with same name, * as might occur while using + or * in the regular expressions in schema, * those should be represented as using the array index using the __ * notation (withouth the square brackets) * for e.g. myarray[0].name ==> myarray__0__name * * attribute names for an element in the XML schema should be postfixed * with __ (double underscore). * So element.attribute ==> element__attribute__ * * @return a Set of Map.Entry, where each key-value corresponds to a * canonicalized field name (placeholder), and its corresponding value in * the XML Document. */ public Set getNameValueSet() { return fieldToValue.entrySet(); } /** * Returns a set of the names within this LimeXMLDocument. */ public Set getNameSet() { return fieldToValue.keySet(); } /** * Returns a collection of the values of this LimeXMLDocument. */ public Collection getValueList() { return fieldToValue.values(); } /** * Determines if a license exists that this LimeXMLDocument knows about. */ public boolean isLicenseAvailable() { return licenseType != LicenseConstants.NO_LICENSE; } /** * Returns a string that can be used to verify if this license is valid. */ public String getLicenseString() { if(isLicenseAvailable()) { String licenseStringSuffix = getVerifiableLicenseElement(licenseType); if (licenseStringSuffix == null) return null; for(Iterator i = fieldToValue.entrySet().iterator(); i.hasNext(); ) { Map.Entry next = (Map.Entry)i.next(); String key = (String)next.getKey(); if (key.endsWith(licenseStringSuffix)) return (String)next.getValue(); } } return null; } private static String getVerifiableLicenseElement(int type) { if (type == LicenseConstants.CC_LICENSE) return LimeXMLDocument.XML_LICENSE_ATTRIBUTE; if (LicenseConstants.isDRMLicense(type)) return LimeXMLDocument.XML_LICENSE_TYPE_ATTRIBUTE; return null; } /** * Returns the license. */ public License getLicense() { String license = getLicenseString(); if(license != null) return LicenseFactory.create(license); else return null; } /** * Returns a list of attributes and their values in the same order * as is in the schema. */ public List getOrderedNameValueList() { String[] fNames = getSchema().getCanonicalizedFieldNames(); List retList = new ArrayList(fNames.length); for (int i = 0; i < fNames.length; i++) { String name = fNames[i].trim(); Object value = fieldToValue.get(name); if (value != null) retList.add(new NameValue(name, value)); } return retList; } /** * Returns the value associated with this canonicalized fieldname. */ public String getValue(String fieldName) { return (String)fieldToValue.get(fieldName); } /** * Constructs an XML string from this document. */ public String getXMLString() { StringBuffer fullXML = new StringBuffer(); LimeXMLDocumentHelper.buildXML(fullXML, getSchema(), getAttributeString() + "/>"); return fullXML.toString(); } /** * Returns the attribute string with the given index. * * For example, this will return: * <thing att1="value1" att2="value2" att3="value3" index="4"/> */ public String getAttributeStringWithIndex(int i) { String attributes = getAttributeString(); return attributes + " index=\"" + i + "\"/>"; } /** * Returns the attribute string. THIS IS NOT A FULL XML ELEMENT. * It is purposely left unclosed so an index can easily be inserted. */ private String getAttributeString() { if(attributeString == null) attributeString = constructAttributeString(); return attributeString; } /** * Constructs the open-ended XML that contains the attributes. * This is purposely open-ended so that an index can easily be * inserted. * If no attributes exist, this returns an empty string, * to easily be marked as invalid. */ private String constructAttributeString() { List attributes = getOrderedNameValueList(); if(attributes.isEmpty()) return ""; // invalid. StringBuffer tag = new StringBuffer(); String root = getSchema().getRootXMLName(); String type = getSchema().getInnerXMLName(); String canonicalKey = root + "__" + type + "__"; tag.append("<"); tag.append(type); for(Iterator i = attributes.iterator(); i.hasNext(); ) { NameValue nv = (NameValue)i.next(); String name = XMLStringUtils.getLastField(canonicalKey, nv.getName()); if(name == null) continue; // Construct: ' attribute="value"' tag.append(" "); tag.append(name); tag.append("=\""); tag.append(LimeXMLUtils.encodeXML((String)nv.getValue())); tag.append("\""); } return tag.toString(); } /** * Overrides equals to check for equality of all xml document fields. * * @param o the object to compare * @return <tt>true</tt> if the objects are equal, <tt>false</tt> * otherwise */ public boolean equals(Object o) { if(o == this) return true; if(o == null) return false; if(!(o instanceof LimeXMLDocument)) return false; LimeXMLDocument xmlDoc = (LimeXMLDocument)o; return ((schemaUri == null ? xmlDoc.schemaUri == null : schemaUri.equals(xmlDoc.schemaUri)) && (fileId == null ? xmlDoc.fileId == null : fileId.equals(xmlDoc.fileId)) && (action == null ? xmlDoc.action == null : action.equals(xmlDoc.action)) && (fieldToValue == null ? xmlDoc.fieldToValue == null : fieldToValue.equals(xmlDoc.fieldToValue))); } /** * Overrides <tt>Object.hashCode</tt> to satisfy the contract for * hashCode, given that we're overriding equals. * * @return a hashcode for this object for use in hash-based collections */ public int hashCode() { if(hashCode == 0) { int result = 17; if(fieldToValue != null) result = 37*result + fieldToValue.hashCode(); if(schemaUri != null) result = 37*result + schemaUri.hashCode(); if(fileId != null) result = 37*result + fileId.hashCode(); if(action != null) result = 37*result + action.hashCode(); hashCode = result; } return hashCode; } /** * Returns the XML identifier for the string. */ public String toString() { return getXMLString(); } /** * Looks in the fields for the ACTION, IDENTIFIER, and INDEX, and a license. * Action is stored, index & identifier are removed. */ private void scanFields() { String canonicalKey = getCanonicalKey(getNameValueSet()); if(canonicalKey == null) return; setFields(canonicalKey); fieldToValue.remove(canonicalKey + XML_INDEX_ATTRIBUTE); fieldToValue.remove(canonicalKey + XML_ID_ATTRIBUTE); } /** * Stores whether or not an action or CC license are in this LimeXMLDocument. */ private void setFields(String prefix) { // store action. action = (String)fieldToValue.get(prefix + XML_ACTION_ATTRIBUTE); // deal with updating license_type based on the license String license = (String)fieldToValue.get(prefix + XML_LICENSE_ATTRIBUTE); String type = (String)fieldToValue.get(prefix + XML_LICENSE_TYPE_ATTRIBUTE); if(LOG.isDebugEnabled()) LOG.debug("type: " + type); // Do specific stuff on licenseType for various licenses. // CC licenses require that the 'license' field has the CC_URI_PREFIX & CC_URL_INDICATOR // somewhere. Weed licenses require that the 'license type' field has WeedInfo.LINFO, // a content id & a version id. licenseType = LicenseConstants.determineLicenseType(license, type); if (licenseType == LicenseConstants.CC_LICENSE) fieldToValue.put(prefix + XML_LICENSE_TYPE_ATTRIBUTE, CCConstants.CC_URI_PREFIX); if(LOG.isDebugEnabled()) LOG.debug("Fields after setting: " + fieldToValue); } /** * Derives a canonicalKey from a collection of Map.Entry's. */ private String getCanonicalKey(Collection entries) { if(entries.isEmpty()) return null; Map.Entry firstEntry = (Map.Entry)entries.iterator().next(); String firstKey = (String)firstEntry.getKey(); // The canonicalKey is always going to be x__x__<other stuff here> int idx = firstKey.indexOf(XMLStringUtils.DELIMITER); idx = firstKey.indexOf(XMLStringUtils.DELIMITER, idx+1); // not two delimiters? can't find the canonicalKey if(idx == -1) return null; // 2 == XMLStringUtils.DELIMITER.length() return firstKey.substring(0, idx + 2); } }