package com.limegroup.gnutella.xml;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import com.limegroup.gnutella.util.NameValue;
import com.limegroup.gnutella.licenses.CCConstants;
import com.limegroup.gnutella.licenses.License;
import com.limegroup.gnutella.licenses.LicenseConstants;
import com.limegroup.gnutella.licenses.LicenseFactory;
import com.limegroup.gnutella.metadata.WeedInfo;
import com.limegroup.gnutella.metadata.WRMXML;
import org.apache.commons.logging.LogFactory;
import org.apache.commons.logging.Log;
/**
* @author Sumeet Thadani
* A LimeXMLDocument is basically a hashmap that maps a
* Names of fields to the values as per a XML document.
*/
public class LimeXMLDocument implements Serializable {
private static final Log LOG = LogFactory.getLog(LimeXMLDocument.class);
public static final String XML_ID_ATTRIBUTE = "identifier__";
public static final String XML_ACTION_ATTRIBUTE = "action__";
public static final String XML_INDEX_ATTRIBUTE = "index__";
public static final String XML_LICENSE_ATTRIBUTE = "license__";
public static final String XML_LICENSE_TYPE_ATTRIBUTE = "licensetype__";
/**
* The current version of LimeXMLDocuments.
*
* Increment this number as features are added which require
* reparsing documents on disk.
*/
private static final int CURRENT_VERSION = 2;
/**
* Cached hash code for this instance.
*/
private volatile transient int hashCode = 0;
/** For backwards compatibility with downloads.dat. */
private static final long serialVersionUID = 7396170507085078485L;
//TODO2: Need to build in the ability to work with multiple instances
//of some fields.
/**
* Map of canonical attribute name -> value.
*/
private Map fieldToValue = new HashMap();
/**
* The schema of this LimeXMLDocument.
*/
private String schemaUri;
/**
* The cached string of attributes.
*/
private transient String attributeString;
/**
* The file this is related to. Can be null if pure meta-data.
*/
private transient File fileId;
/**
* The action that this doc has.
*/
private transient String action;
/**
* The version of this LimeXMLDocument.
*/
private int version = CURRENT_VERSION;
boolean isCurrent() { return version == CURRENT_VERSION; }
void setCurrent() { version = CURRENT_VERSION; }
/**
* Cached list of keywords. Because keywords are only filled up
* upon construction, they can be cached upon retrieval.
*/
private transient List CACHED_KEYWORDS = null;
/** The kind of license this has. */
private transient int licenseType = LicenseConstants.NO_LICENSE;
/**
* Constructs a LimeXMLDocument with the given string.
*/
public LimeXMLDocument(String xml)
throws SAXException, SchemaNotFoundException, IOException {
if(xml==null || xml.equals(""))
throw new SAXException("null or empty string");
InputSource doc = new InputSource(new StringReader(xml));
XMLParsingUtils.ParseResult result = XMLParsingUtils.parse(doc);
if (result.isEmpty())
throw new IOException("No element present");
if (result.schemaURI == null)
throw new SchemaNotFoundException("no schema");
this.fieldToValue = (Map)result.get(0);
this.schemaUri = result.schemaURI;
setFields(result.canonicalKeyPrefix);
if(!isValid())
throw new IOException("Invalid XML: " + xml);
}
/**
* Constructs a new LimeXMLDocument
* @param map Map with keys in canonicalized
* form and corresponding values that will be used to create the
* new instance
* @param schemaURI The schema URI for the LimeXMLDocument to be
* created
*/
LimeXMLDocument(Map map, String schemaURI, String keyPrefix)
throws IOException {
if(map.isEmpty())
throw new IllegalArgumentException("empty map");
this.schemaUri = schemaURI;
this.fieldToValue = map;
fieldToValue.remove(keyPrefix + XML_ID_ATTRIBUTE); // remove id.
setFields(keyPrefix);
if(!isValid())
throw new IOException("invalid doc! "+map+" \nschema uri: "+schemaURI);
}
/**
* Constructs a new LimeXMLDocument
* @param nameValueList List (of Map.Entry) of fieldnames (in canonicalized
* form) and corresponding values that will be used to create the
* new instance
* @param schemaURI The schema URI for the LimeXMLDocument to be
* created
*/
public LimeXMLDocument(Collection nameValueList, String schemaURI) {
if(nameValueList.isEmpty())
throw new IllegalArgumentException("empty list");
//set the schema URI
this.schemaUri = schemaURI;
//iterate over the passed list of fieldnames & values
for(Iterator i = nameValueList.iterator(); i.hasNext(); ) {
Map.Entry next = (Map.Entry)i.next();
String name = (String)next.getKey();
Object value = next.getValue();
fieldToValue.put(name.trim(), value);
}
// scan for action/id/etc..
scanFields();
if(!isValid())
throw new IllegalArgumentException("Invalid Doc!");
}
/**
* Determines whether or not this LimeXMLDocument is valid.
*/
boolean isValid() {
// no schemaURI or the schemaURI doesn't map to a LimeXMLSchema
if(schemaUri == null || getSchema() == null)
return false;
// no valid attributes.
if(getAttributeString().length() == 0)
return false;
return true;
}
/**
* Reads the object and initializes transient fields.
*/
private void readObject(java.io.ObjectInputStream in)
throws IOException, ClassNotFoundException {
in.defaultReadObject();
scanFields();
}
/**
* Returns the number of fields this document has.
*/
public int getNumFields() {
return fieldToValue.size();
}
/**
* Returns all the non-numeric fields in this. These are
* not necessarily QRP keywords. For example, one of the
* elements of the returned list may be "Some comment-blah".
* QRP code may want to split this into the QRP keywords
* "Some", "comment", and "blah".
*
* Indivisible keywords are not returned. To retrieve those,
* use getIndivisibleKeywords(). Indivisible keywords are
* those which QRP will not split up.
*/
public List getKeyWords() {
if( CACHED_KEYWORDS != null )
return CACHED_KEYWORDS;
List retList = new ArrayList();
Iterator iter = fieldToValue.keySet().iterator();
while(iter.hasNext()){
String currKey = (String) iter.next();
String val = (String) fieldToValue.get(currKey);
if(val != null && !val.equals("") && !isIndivisible(currKey)) {
try {
Double.parseDouble(val); // will trigger NFE.
} catch(NumberFormatException ignored) {
retList.add(val);
}
}
}
CACHED_KEYWORDS = retList;
return retList;
}
/**
* Returns all the indivisible keywords for entry into QRP tables.
*/
public List getKeyWordsIndivisible() {
return LicenseConstants.getIndivisible(licenseType);
}
/**
* Determines if this keyword & value is indivisible
* (thus making QRP not split it).
*/
private boolean isIndivisible(String currKey) {
//the license-type is always indivisible.
//note that for weed licenses, this works because getKeyWordsIndivisible
//is returning a list of only 'WeedInfo.LAINFO'. the content-id & version-id
//are essentially lost & ignored.
return currKey.endsWith(XML_LICENSE_TYPE_ATTRIBUTE);
}
/**
* Returns the unique identifier which identifies the schema this XML
* document conforms to
*/
public String getSchemaURI() {
return schemaUri;
}
/**
* Returns the LimeXMLSchema associated with this XML document.
*/
public LimeXMLSchema getSchema() {
return LimeXMLSchemaRepository.instance().getSchema(schemaUri);
}
/**
* Returns the description of the schema URI.
*/
public String getSchemaDescription() {
LimeXMLSchema schema = getSchema();
if(schema != null)
return schema.getDescription();
else
return LimeXMLSchema.getDisplayString(schemaUri);
}
/**
* Returns the name of the file that the data in this XML document
* corresponds to. If the meta-data does not correspond to any file
* in the file system, this method will rerurn a null.
*/
public File getIdentifier() {
return fileId;
}
/**
* Sets the identifier.
*/
public void setIdentifier(File id) {
fileId = id;
}
/**
* Returns the action corresponding with this LimeXMLDocument.
*/
public String getAction() {
if(action == null)
return "";
else
return action;
}
/**
* Returns a Set of Map.Entry, where each key-value corresponds to a
* Canonicalized field name (placeholder), and its corresponding value in
* the XML Document.
* <p>
* Canonicalization:
* <p>
* So as to preserve the structure, Structure.Field will be represented as
* Structure__Field (Double Underscore is being used as a delimiter to
* represent the structure).
*<p>
* In case of multiple structured values with same name,
* as might occur while using + or * in the regular expressions in schema,
* those should be represented as using the array index using the __
* notation (withouth the square brackets)
* for e.g. myarray[0].name ==> myarray__0__name
*
* attribute names for an element in the XML schema should be postfixed
* with __ (double underscore).
* So element.attribute ==> element__attribute__
*
* @return a Set of Map.Entry, where each key-value corresponds to a
* canonicalized field name (placeholder), and its corresponding value in
* the XML Document.
*/
public Set getNameValueSet() {
return fieldToValue.entrySet();
}
/**
* Returns a set of the names within this LimeXMLDocument.
*/
public Set getNameSet() {
return fieldToValue.keySet();
}
/**
* Returns a collection of the values of this LimeXMLDocument.
*/
public Collection getValueList() {
return fieldToValue.values();
}
/**
* Determines if a license exists that this LimeXMLDocument knows about.
*/
public boolean isLicenseAvailable() {
return licenseType != LicenseConstants.NO_LICENSE;
}
/**
* Returns a string that can be used to verify if this license is valid.
*/
public String getLicenseString() {
if(isLicenseAvailable()) {
String licenseStringSuffix = getVerifiableLicenseElement(licenseType);
if (licenseStringSuffix == null)
return null;
for(Iterator i = fieldToValue.entrySet().iterator(); i.hasNext(); ) {
Map.Entry next = (Map.Entry)i.next();
String key = (String)next.getKey();
if (key.endsWith(licenseStringSuffix))
return (String)next.getValue();
}
}
return null;
}
private static String getVerifiableLicenseElement(int type) {
if (type == LicenseConstants.CC_LICENSE)
return LimeXMLDocument.XML_LICENSE_ATTRIBUTE;
if (LicenseConstants.isDRMLicense(type))
return LimeXMLDocument.XML_LICENSE_TYPE_ATTRIBUTE;
return null;
}
/**
* Returns the license.
*/
public License getLicense() {
String license = getLicenseString();
if(license != null)
return LicenseFactory.create(license);
else
return null;
}
/**
* Returns a list of attributes and their values in the same order
* as is in the schema.
*/
public List getOrderedNameValueList() {
String[] fNames = getSchema().getCanonicalizedFieldNames();
List retList = new ArrayList(fNames.length);
for (int i = 0; i < fNames.length; i++) {
String name = fNames[i].trim();
Object value = fieldToValue.get(name);
if (value != null)
retList.add(new NameValue(name, value));
}
return retList;
}
/**
* Returns the value associated with this canonicalized fieldname.
*/
public String getValue(String fieldName) {
return (String)fieldToValue.get(fieldName);
}
/**
* Constructs an XML string from this document.
*/
public String getXMLString() {
StringBuffer fullXML = new StringBuffer();
LimeXMLDocumentHelper.buildXML(fullXML, getSchema(), getAttributeString() + "/>");
return fullXML.toString();
}
/**
* Returns the attribute string with the given index.
*
* For example, this will return:
* <thing att1="value1" att2="value2" att3="value3" index="4"/>
*/
public String getAttributeStringWithIndex(int i) {
String attributes = getAttributeString();
return attributes + " index=\"" + i + "\"/>";
}
/**
* Returns the attribute string. THIS IS NOT A FULL XML ELEMENT.
* It is purposely left unclosed so an index can easily be inserted.
*/
private String getAttributeString() {
if(attributeString == null)
attributeString = constructAttributeString();
return attributeString;
}
/**
* Constructs the open-ended XML that contains the attributes.
* This is purposely open-ended so that an index can easily be
* inserted.
* If no attributes exist, this returns an empty string,
* to easily be marked as invalid.
*/
private String constructAttributeString() {
List attributes = getOrderedNameValueList();
if(attributes.isEmpty())
return ""; // invalid.
StringBuffer tag = new StringBuffer();
String root = getSchema().getRootXMLName();
String type = getSchema().getInnerXMLName();
String canonicalKey = root + "__" + type + "__";
tag.append("<");
tag.append(type);
for(Iterator i = attributes.iterator(); i.hasNext(); ) {
NameValue nv = (NameValue)i.next();
String name = XMLStringUtils.getLastField(canonicalKey, nv.getName());
if(name == null)
continue;
// Construct: ' attribute="value"'
tag.append(" ");
tag.append(name);
tag.append("=\"");
tag.append(LimeXMLUtils.encodeXML((String)nv.getValue()));
tag.append("\"");
}
return tag.toString();
}
/**
* Overrides equals to check for equality of all xml document fields.
*
* @param o the object to compare
* @return <tt>true</tt> if the objects are equal, <tt>false</tt>
* otherwise
*/
public boolean equals(Object o) {
if(o == this)
return true;
if(o == null)
return false;
if(!(o instanceof LimeXMLDocument))
return false;
LimeXMLDocument xmlDoc = (LimeXMLDocument)o;
return ((schemaUri == null ? xmlDoc.schemaUri == null :
schemaUri.equals(xmlDoc.schemaUri)) &&
(fileId == null ? xmlDoc.fileId == null :
fileId.equals(xmlDoc.fileId)) &&
(action == null ? xmlDoc.action == null :
action.equals(xmlDoc.action)) &&
(fieldToValue == null ? xmlDoc.fieldToValue == null :
fieldToValue.equals(xmlDoc.fieldToValue)));
}
/**
* Overrides <tt>Object.hashCode</tt> to satisfy the contract for
* hashCode, given that we're overriding equals.
*
* @return a hashcode for this object for use in hash-based collections
*/
public int hashCode() {
if(hashCode == 0) {
int result = 17;
if(fieldToValue != null)
result = 37*result + fieldToValue.hashCode();
if(schemaUri != null)
result = 37*result + schemaUri.hashCode();
if(fileId != null)
result = 37*result + fileId.hashCode();
if(action != null)
result = 37*result + action.hashCode();
hashCode = result;
}
return hashCode;
}
/**
* Returns the XML identifier for the string.
*/
public String toString() {
return getXMLString();
}
/**
* Looks in the fields for the ACTION, IDENTIFIER, and INDEX, and a license.
* Action is stored, index & identifier are removed.
*/
private void scanFields() {
String canonicalKey = getCanonicalKey(getNameValueSet());
if(canonicalKey == null)
return;
setFields(canonicalKey);
fieldToValue.remove(canonicalKey + XML_INDEX_ATTRIBUTE);
fieldToValue.remove(canonicalKey + XML_ID_ATTRIBUTE);
}
/**
* Stores whether or not an action or CC license are in this LimeXMLDocument.
*/
private void setFields(String prefix) {
// store action.
action = (String)fieldToValue.get(prefix + XML_ACTION_ATTRIBUTE);
// deal with updating license_type based on the license
String license = (String)fieldToValue.get(prefix + XML_LICENSE_ATTRIBUTE);
String type = (String)fieldToValue.get(prefix + XML_LICENSE_TYPE_ATTRIBUTE);
if(LOG.isDebugEnabled())
LOG.debug("type: " + type);
// Do specific stuff on licenseType for various licenses.
// CC licenses require that the 'license' field has the CC_URI_PREFIX & CC_URL_INDICATOR
// somewhere. Weed licenses require that the 'license type' field has WeedInfo.LINFO,
// a content id & a version id.
licenseType = LicenseConstants.determineLicenseType(license, type);
if (licenseType == LicenseConstants.CC_LICENSE)
fieldToValue.put(prefix + XML_LICENSE_TYPE_ATTRIBUTE, CCConstants.CC_URI_PREFIX);
if(LOG.isDebugEnabled())
LOG.debug("Fields after setting: " + fieldToValue);
}
/**
* Derives a canonicalKey from a collection of Map.Entry's.
*/
private String getCanonicalKey(Collection entries) {
if(entries.isEmpty())
return null;
Map.Entry firstEntry = (Map.Entry)entries.iterator().next();
String firstKey = (String)firstEntry.getKey();
// The canonicalKey is always going to be x__x__<other stuff here>
int idx = firstKey.indexOf(XMLStringUtils.DELIMITER);
idx = firstKey.indexOf(XMLStringUtils.DELIMITER, idx+1);
// not two delimiters? can't find the canonicalKey
if(idx == -1)
return null;
// 2 == XMLStringUtils.DELIMITER.length()
return firstKey.substring(0, idx + 2);
}
}