LimeXMLDocument.java example

Explorer
learning-bittorrent-master
package com.limegroup.gnutella.xml;

import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

import com.limegroup.gnutella.util.NameValue;
import com.limegroup.gnutella.licenses.CCConstants;
import com.limegroup.gnutella.licenses.License;
import com.limegroup.gnutella.licenses.LicenseConstants;
import com.limegroup.gnutella.licenses.LicenseFactory;
import com.limegroup.gnutella.metadata.WeedInfo;
import com.limegroup.gnutella.metadata.WRMXML;

import org.apache.commons.logging.LogFactory;
import org.apache.commons.logging.Log;


/**
 * @author  Sumeet Thadani
 * A LimeXMLDocument is basically a hashmap that maps a
 * Names of fields to the values as per a XML document.
 */
public class LimeXMLDocument implements Serializable {
    
    private static final Log LOG = LogFactory.getLog(LimeXMLDocument.class);

    public static final String XML_ID_ATTRIBUTE = "identifier__";
    public static final String XML_ACTION_ATTRIBUTE = "action__";
    public static final String XML_INDEX_ATTRIBUTE = "index__";
    public static final String XML_LICENSE_ATTRIBUTE = "license__";
    public static final String XML_LICENSE_TYPE_ATTRIBUTE = "licensetype__";
    
    /**
     * The current version of LimeXMLDocuments.
     *
     * Increment this number as features are added which require
     * reparsing documents on disk.
     */
    private static final int CURRENT_VERSION = 2;

	/**
	 * Cached hash code for this instance.
	 */
	private volatile transient int hashCode = 0;

    /** For backwards compatibility with downloads.dat. */
    private static final long serialVersionUID = 7396170507085078485L;

    //TODO2: Need to build in the ability to work with multiple instances
    //of some fields. 
    
    /**
     * Map of canonical attribute name -> value.
     */
    private Map fieldToValue = new HashMap();
    
    /**
     * The schema of this LimeXMLDocument.
     */
    private String schemaUri;
    
    /**
     * The cached string of attributes.
     */
    private transient String attributeString;

    /** 
     * The file this is related to.  Can be null if pure meta-data.
     */
    private transient File fileId;
    
    /**
     * The action that this doc has.
     */
    private transient String action;
    
    /**
     * The version of this LimeXMLDocument.
     */
    private int version = CURRENT_VERSION;
    boolean isCurrent() { return version == CURRENT_VERSION; }
    void setCurrent() { version = CURRENT_VERSION; }
    
    /**
     * Cached list of keywords.  Because keywords are only filled up
     * upon construction, they can be cached upon retrieval.
     */
    private transient List CACHED_KEYWORDS = null;
    
    /** The kind of license this has. */
    private transient int licenseType = LicenseConstants.NO_LICENSE;

    /**
     * Constructs a LimeXMLDocument with the given string.
     */
    public LimeXMLDocument(String xml)
      throws SAXException, SchemaNotFoundException, IOException {
        if(xml==null || xml.equals(""))
            throw new SAXException("null or empty string");

        InputSource doc = new InputSource(new StringReader(xml));
        XMLParsingUtils.ParseResult result = XMLParsingUtils.parse(doc);
        if (result.isEmpty())
            throw new IOException("No element present");
        if (result.schemaURI == null)
            throw new SchemaNotFoundException("no schema");

        this.fieldToValue = (Map)result.get(0);
        this.schemaUri = result.schemaURI;
        setFields(result.canonicalKeyPrefix);
        
        if(!isValid())
            throw new IOException("Invalid XML: " + xml);
    }

    /**
     * Constructs a new LimeXMLDocument
     * @param map Map with keys in canonicalized
     * form and corresponding values that will be used to create the 
     * new instance
     * @param schemaURI The schema URI for the LimeXMLDocument to be
     * created
     */    
    LimeXMLDocument(Map map, String schemaURI, String keyPrefix) 
      throws IOException {
        if(map.isEmpty())
            throw new IllegalArgumentException("empty map");

        this.schemaUri = schemaURI;
        this.fieldToValue = map;
        fieldToValue.remove(keyPrefix + XML_ID_ATTRIBUTE); // remove id.
        setFields(keyPrefix);
        
        if(!isValid())
            throw new IOException("invalid doc! "+map+" \nschema uri: "+schemaURI);
        
    }

    /**
     * Constructs a new LimeXMLDocument
     * @param nameValueList List (of Map.Entry) of fieldnames (in canonicalized
     * form) and corresponding values that will be used to create the 
     * new instance
     * @param schemaURI The schema URI for the LimeXMLDocument to be
     * created
     */
    public LimeXMLDocument(Collection nameValueList, String schemaURI) {
        if(nameValueList.isEmpty())
            throw new IllegalArgumentException("empty list");

        //set the schema URI
        this.schemaUri = schemaURI;
                
        //iterate over the passed list of fieldnames & values
        for(Iterator i = nameValueList.iterator(); i.hasNext(); ) {
            Map.Entry next = (Map.Entry)i.next();
            String name = (String)next.getKey();
            Object value = next.getValue();
            fieldToValue.put(name.trim(), value);
        }
        
        // scan for action/id/etc..
        scanFields();
        
        if(!isValid())
            throw new IllegalArgumentException("Invalid Doc!");
    }
    
    /**
     * Determines whether or not this LimeXMLDocument is valid.
     */
    boolean isValid() {
        // no schemaURI or the schemaURI doesn't map to a LimeXMLSchema
        if(schemaUri == null || getSchema() == null)
            return false;

        // no valid attributes.
        if(getAttributeString().length() == 0)
            return false;
            
        return true;
    }

    /**
     * Reads the object and initializes transient fields.
     */
    private void readObject(java.io.ObjectInputStream in)
      throws IOException, ClassNotFoundException {
        in.defaultReadObject();
        scanFields();
    }

    /**
     * Returns the number of fields this document has.
     */
    public int getNumFields() {
        return fieldToValue.size();
    }

    /**
     * Returns all the non-numeric fields in this.  These are
     * not necessarily QRP keywords.  For example, one of the
     * elements of the returned list may be "Some comment-blah".
     * QRP code may want to split this into the QRP keywords
     * "Some", "comment", and "blah".
     *
     * Indivisible keywords are not returned.  To retrieve those,
     * use getIndivisibleKeywords().  Indivisible keywords are
     * those which QRP will not split up.
     */
    public List getKeyWords() {
        if( CACHED_KEYWORDS != null )
            return CACHED_KEYWORDS;

        List retList = new ArrayList();
        Iterator iter = fieldToValue.keySet().iterator();
        while(iter.hasNext()){
            String currKey = (String) iter.next();
            String val = (String) fieldToValue.get(currKey);
            if(val != null && !val.equals("") && !isIndivisible(currKey)) {
                try {
                    Double.parseDouble(val); // will trigger NFE.
                } catch(NumberFormatException ignored) {
                    retList.add(val);
                }
            }
        }
        CACHED_KEYWORDS = retList;
        return retList;
    }

    /**
     * Returns all the indivisible keywords for entry into QRP tables.
     */
    public List getKeyWordsIndivisible() {
        return LicenseConstants.getIndivisible(licenseType);
    }

    /**
     * Determines if this keyword & value is indivisible
     * (thus making QRP not split it).
     */
    private boolean isIndivisible(String currKey) {
        //the license-type is always indivisible.
        //note that for weed licenses, this works because getKeyWordsIndivisible
        //is returning a list of only 'WeedInfo.LAINFO'.  the content-id & version-id
        //are essentially lost & ignored.
        return currKey.endsWith(XML_LICENSE_TYPE_ATTRIBUTE);
    }    

    /**
     * Returns the unique identifier which identifies the schema this XML
     * document conforms to
     */
    public String getSchemaURI() {
        return schemaUri;
    }
    
    /**
     * Returns the LimeXMLSchema associated with this XML document.
     */
    public LimeXMLSchema getSchema() {
        return LimeXMLSchemaRepository.instance().getSchema(schemaUri);
    }
    
    /**
     * Returns the description of the schema URI.
     */
    public String getSchemaDescription() {
        LimeXMLSchema schema = getSchema();
        if(schema != null)
            return schema.getDescription();
        else
            return LimeXMLSchema.getDisplayString(schemaUri);
    }
    
    /**
     * Returns the name of the file that the data in this XML document 
     * corresponds to. If the meta-data does not correspond to any file
     * in the file system, this method will rerurn a null.
     */
    public File getIdentifier() {
        return fileId;
    }
    
    /**
     * Sets the identifier.
     */
    public void setIdentifier(File id) {
        fileId = id;
    }

    /**
     * Returns the action corresponding with this LimeXMLDocument.
     */
    public String getAction() {
        if(action == null)
            return "";
        else
            return action;
    }

    /**
     * Returns a Set of Map.Entry, where each key-value corresponds to a
     * Canonicalized field name (placeholder), and its corresponding value in
     * the XML Document.
     * <p>
     * Canonicalization:
     * <p>
     * So as to preserve the structure, Structure.Field will be represented as
     * Structure__Field (Double Underscore is being used as a delimiter to
     * represent the structure).
     *<p>
     * In case of multiple structured values with same name,
     * as might occur while using + or * in the regular expressions in schema,
     * those should be represented as using the array index using the __
     * notation (withouth the square brackets)
     * for e.g. myarray[0].name ==> myarray__0__name
     *
     * attribute names for an element in the XML schema should be postfixed 
     * with __ (double underscore).
     * So element.attribute ==> element__attribute__
     *
     * @return a Set of Map.Entry, where each key-value corresponds to a
     * canonicalized field name (placeholder), and its corresponding value in
     * the XML Document.
     */
    public Set getNameValueSet() {
        return fieldToValue.entrySet();
    }
    
    /**
     * Returns a set of the names within this LimeXMLDocument.
     */
    public Set getNameSet() {
        return fieldToValue.keySet();
    }
    
    /**
     * Returns a collection of the values of this LimeXMLDocument.
     */
    public Collection getValueList() {
        return fieldToValue.values();
    }
    
    /**
     * Determines if a license exists that this LimeXMLDocument knows about.
     */
    public boolean isLicenseAvailable() {
        return licenseType != LicenseConstants.NO_LICENSE;
    }
    
    /**
     * Returns a string that can be used to verify if this license is valid.
     */
    public String getLicenseString() {
        if(isLicenseAvailable()) {
            String licenseStringSuffix = getVerifiableLicenseElement(licenseType);
            if (licenseStringSuffix == null)
                return null;
            for(Iterator i = fieldToValue.entrySet().iterator(); i.hasNext(); ) {
                Map.Entry next = (Map.Entry)i.next();
                String key = (String)next.getKey();
                if (key.endsWith(licenseStringSuffix))
                    return (String)next.getValue();
            }
        }
        return null;
    }
    
    private static String getVerifiableLicenseElement(int type) {
        if (type == LicenseConstants.CC_LICENSE)
            return LimeXMLDocument.XML_LICENSE_ATTRIBUTE;
        if (LicenseConstants.isDRMLicense(type))
            return LimeXMLDocument.XML_LICENSE_TYPE_ATTRIBUTE;
        return null;
    }
    
    /**
     * Returns the license.
     */
    public License getLicense() {
        String license = getLicenseString();
        if(license != null)
            return LicenseFactory.create(license);
        else
            return null;
    }

    /**
     * Returns a list of attributes and their values in the same order
     * as is in the schema.
     */
    public List getOrderedNameValueList() {
        String[] fNames = getSchema().getCanonicalizedFieldNames();
        List retList = new ArrayList(fNames.length);
        for (int i = 0; i < fNames.length; i++) {
            String name = fNames[i].trim();
            Object value = fieldToValue.get(name);
            if (value != null)
                retList.add(new NameValue(name, value));
        }
            
        return retList;
    }
    
    /**
     * Returns the value associated with this canonicalized fieldname.
     */
    public String getValue(String fieldName) {
        return (String)fieldToValue.get(fieldName);
    }
    
    /**
     * Constructs an XML string from this document.
     */
    public String getXMLString() {
        StringBuffer fullXML = new StringBuffer();
        LimeXMLDocumentHelper.buildXML(fullXML, getSchema(), getAttributeString() + "/>");
        return fullXML.toString();
    }
    
    /**
     * Returns the attribute string with the given index.
     *
     * For example, this will return:
     *   <thing att1="value1" att2="value2" att3="value3" index="4"/>
     */
    public String getAttributeStringWithIndex(int i) {
        String attributes = getAttributeString();
        return attributes + " index=\"" + i + "\"/>";
    }
    
    /**
     * Returns the attribute string. THIS IS NOT A FULL XML ELEMENT.
     * It is purposely left unclosed so an index can easily be inserted.
     */
    private String getAttributeString() {
        if(attributeString == null)
            attributeString = constructAttributeString();
        return attributeString;
    }
    
    /**
     * Constructs the open-ended XML that contains the attributes.
     * This is purposely open-ended so that an index can easily be
     * inserted.
     * If no attributes exist, this returns an empty string,
     * to easily be marked as invalid.
     */
    private String constructAttributeString() {
        List attributes = getOrderedNameValueList();
        if(attributes.isEmpty())
            return ""; // invalid.
            
        StringBuffer tag = new StringBuffer();
        String root = getSchema().getRootXMLName();
        String type = getSchema().getInnerXMLName();
        String canonicalKey = root + "__" + type + "__";
        tag.append("<");
        tag.append(type);

        for(Iterator i = attributes.iterator(); i.hasNext(); ) {
            NameValue nv = (NameValue)i.next();
            String name = XMLStringUtils.getLastField(canonicalKey, nv.getName());
            if(name == null)
                continue;
            // Construct: ' attribute="value"'
            tag.append(" ");
            tag.append(name);
            tag.append("=\"");
            tag.append(LimeXMLUtils.encodeXML((String)nv.getValue()));
            tag.append("\"");
        }
        
        return tag.toString();
    }

	/**
	 * Overrides equals to check for equality of all xml document fields.
	 *
	 * @param o the object to compare
	 * @return <tt>true</tt> if the objects are equal, <tt>false</tt>
	 *  otherwise
	 */
	public boolean equals(Object o) {
		if(o == this)
		    return true;
		if(o == null)
		    return false;
		if(!(o instanceof LimeXMLDocument))
		    return false;

		LimeXMLDocument xmlDoc = (LimeXMLDocument)o;
		return ((schemaUri == null ? xmlDoc.schemaUri == null :
				 schemaUri.equals(xmlDoc.schemaUri)) &&
				(fileId == null ? xmlDoc.fileId == null :
				 fileId.equals(xmlDoc.fileId)) &&
				(action == null ? xmlDoc.action == null :
				 action.equals(xmlDoc.action)) &&
				(fieldToValue == null ? xmlDoc.fieldToValue == null : 
				 fieldToValue.equals(xmlDoc.fieldToValue)));
	}

	/**
	 * Overrides <tt>Object.hashCode</tt> to satisfy the contract for
	 * hashCode, given that we're overriding equals.
	 *
	 * @return a hashcode for this object for use in hash-based collections
	 */
	public int hashCode() {
		if(hashCode == 0) {
			int result = 17;
			if(fieldToValue != null)
				result = 37*result + fieldToValue.hashCode();
			if(schemaUri != null)
				result = 37*result + schemaUri.hashCode();
			if(fileId != null)
				result = 37*result + fileId.hashCode();
			if(action != null)
				result = 37*result + action.hashCode();
			hashCode = result;
		} 
		return hashCode;
	}
	
	/**
	 * Returns the XML identifier for the string.
	 */
	public String toString() {
	    return getXMLString();
    }
    
    /**
     * Looks in the fields for the ACTION, IDENTIFIER, and INDEX, and a license.
     * Action is stored, index & identifier are removed.
     */
    private void scanFields() {
        String canonicalKey = getCanonicalKey(getNameValueSet());
        if(canonicalKey == null)
            return;

        setFields(canonicalKey);
        fieldToValue.remove(canonicalKey + XML_INDEX_ATTRIBUTE);
        fieldToValue.remove(canonicalKey + XML_ID_ATTRIBUTE);
    }
    
    /**
     * Stores whether or not an action or CC license are in this LimeXMLDocument.
     */
    private void setFields(String prefix) {
        // store action.
        action = (String)fieldToValue.get(prefix + XML_ACTION_ATTRIBUTE);

        // deal with updating license_type based on the license
        String license = (String)fieldToValue.get(prefix + XML_LICENSE_ATTRIBUTE);
        String type = (String)fieldToValue.get(prefix + XML_LICENSE_TYPE_ATTRIBUTE);
        
        if(LOG.isDebugEnabled())
            LOG.debug("type: " + type);
        
        // Do specific stuff on licenseType for various licenses.
        // CC licenses require that the 'license' field has the CC_URI_PREFIX & CC_URL_INDICATOR
        // somewhere.  Weed licenses require that the 'license type' field has WeedInfo.LINFO,
        // a content id & a version id.
        licenseType = LicenseConstants.determineLicenseType(license, type);
        if (licenseType == LicenseConstants.CC_LICENSE)
            fieldToValue.put(prefix + XML_LICENSE_TYPE_ATTRIBUTE, CCConstants.CC_URI_PREFIX);

        if(LOG.isDebugEnabled())
            LOG.debug("Fields after setting: " + fieldToValue);
    }
    
    /**
     * Derives a canonicalKey from a collection of Map.Entry's.
     */
    private String getCanonicalKey(Collection entries) {
        if(entries.isEmpty())
            return null;
        Map.Entry firstEntry = (Map.Entry)entries.iterator().next();
        String firstKey = (String)firstEntry.getKey();
        
        // The canonicalKey is always going to be x__x__<other stuff here>
        int idx = firstKey.indexOf(XMLStringUtils.DELIMITER);
        idx = firstKey.indexOf(XMLStringUtils.DELIMITER, idx+1);
        // not two delimiters? can't find the canonicalKey
        if(idx == -1)
            return null;
            
        // 2 == XMLStringUtils.DELIMITER.length()
        return firstKey.substring(0, idx + 2);
    }
}