LimeXMLReplyCollection.java example

Explorer
learning-bittorrent-master
package com.limegroup.gnutella.xml;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import com.limegroup.gnutella.Assert;
import com.limegroup.gnutella.FileDesc;
import com.limegroup.gnutella.RouterService;
import com.limegroup.gnutella.URN;
import com.limegroup.gnutella.metadata.AudioMetaData;
import com.limegroup.gnutella.metadata.MetaDataEditor;
import com.limegroup.gnutella.metadata.MetaDataReader;
import com.limegroup.gnutella.util.ConverterObjectInputStream;
import com.limegroup.gnutella.util.I18NConvert;
import com.limegroup.gnutella.util.Trie;
import com.limegroup.gnutella.util.IOUtils;
import com.limegroup.gnutella.util.NameValue;

/**
 * Maps LimeXMLDocuments for FileDescs in a specific schema.
 */

public class LimeXMLReplyCollection {
    
    private static final Log LOG = LogFactory.getLog(LimeXMLReplyCollection.class);
    

    /**
     * The schemaURI of this collection.
     */
    private final String schemaURI;
    
    /**
     * A map of URN -> LimeXMLDocument for each shared file that contains XML.
     *
     * SYNCHRONIZATION: Synchronize on mainMap when accessing, 
     *  adding or removing.
     */
    private final Map /* URN -> LimeXMLDocument */ mainMap;
    
    /**
     * The old map that was read off disk.
     *
     * Used while initially processing FileDescs to add.
     */
    private final Map /* URN -> LimeXMLDocument */ oldMap;
    
    /**
     * A mapping of fields in the LimeXMLDocument to a Trie
     * that has a lookup table for the values of that field.
     *
     * The Trie value is a mapping of keywords in LimeXMLDocuments
     * to the list of documents that have that keyword.
     *
     * SYNCHRONIZATION: Synchronize on mainMap when accessing,
     *  adding or removing.
     */
    private final Map /* String -> Trie (String -> List) */ trieMap;
    
    /**
     * Whether or not data became dirty after we last wrote to disk.
     */
    private boolean dirty = false;
    
    /**
     * The location on disk that information is serialized to.
     */
    private final File dataFile;

    public static final int NORMAL = 0;
    public static final int FILE_DEFECTIVE = 1;
    public static final int RW_ERROR = 2;
    public static final int BAD_ID3  = 3;
    public static final int FAILED_TITLE  = 4;
    public static final int FAILED_ARTIST  = 5;
    public static final int FAILED_ALBUM  = 6;
    public static final int FAILED_YEAR  = 7;
    public static final int FAILED_COMMENT  = 8;
    public static final int FAILED_TRACK  = 9;
    public static final int FAILED_GENRE  = 10;
    public static final int HASH_FAILED  = 11;
    public static final int INCORRECT_FILETYPE = 12;

    /**
     * Creates a new LimeXMLReplyCollection.  The reply collection
     * will retain only those XMLDocs that match the given schema URI.
     *
     * @param fds The list of shared FileDescs.
     * @param URI This collection's schema URI
     */
    public LimeXMLReplyCollection(String URI) {
        this.schemaURI = URI;
        this.trieMap = new HashMap();
        this.dataFile = new File(LimeXMLProperties.instance().getXMLDocsDir(),
                                 LimeXMLSchema.getDisplayString(schemaURI)+ ".sxml");
        this.mainMap = new HashMap();
        this.oldMap = readMapFromDisk();
    }
    
    /**
     * Initializes the map using either LimeXMLDocuments in the list of potential
     * documents, or elements stored in oldMap.  Items in potential take priority.
     */
    LimeXMLDocument initialize(FileDesc fd, List potential) {
        URN urn = fd.getSHA1Urn();
        LimeXMLDocument doc = null;
        
        // First try to get a doc from the potential list.
        for(Iterator i = potential.iterator(); i.hasNext(); ) {
            LimeXMLDocument next = (LimeXMLDocument)i.next();
            if(next.getSchemaURI().equals(schemaURI)) {
                doc = next;
                break;
            }
        }
        
        // Then try to get it from the old map.
        if(doc == null)
            doc = (LimeXMLDocument)oldMap.get(urn);
        
        
        // Then try and see it, with validation and all.
        if(doc != null) {
            doc = validate(doc, fd.getFile(), fd);
            if(doc != null) {
                if(LOG.isDebugEnabled())
                    LOG.debug("Adding old document for file: " + fd.getFile() + ", doc: " + doc);
                    addReply(fd, doc);
            }
        }
        
        return doc;
    }
    
    /**
     * Creates a LimeXMLDocument for the given FileDesc if no XML already exists
     * for it.
     */
    LimeXMLDocument createIfNecessary(FileDesc fd) {
        LimeXMLDocument doc = null;
        URN urn = fd.getSHA1Urn();
        
        if(!mainMap.containsKey(urn)) {
            File file = fd.getFile();
            // If we have no documents for this FD, or the file-format only supports
            // a single kind of metadata, construct a document.
            // This is necessary so that we don't keep trying to parse formats that could
            // be multiple kinds of files every time.   
            if(fd.getLimeXMLDocuments().size() == 0 || !LimeXMLUtils.isSupportedMultipleFormat(file)) {
                doc = constructDocument(file);
                if(doc != null) {
                    if(LOG.isDebugEnabled())
                        LOG.debug("Adding newly constructed document for file: " + file + ", doc: " + doc);
                    addReply(fd, doc);
                }
            }
        }
        
        return doc;
    }
    
    /**
     * Notification that initial loading is done.
     */
    void loadFinished() {
        synchronized(mainMap) {
            if(oldMap.equals(mainMap)) {
                dirty = false;
            }
            oldMap.clear();
        }
    
    }
    
    /**
     * Validates a LimeXMLDocument.
     *
     * This checks:
     * 1) If it's current (if not, it attempts to reparse.  If it can't, keeps the old one).
     * 2) If it's valid (if not, attempts to reparse it.  If it can't, drops it).
     * 3) If it's corrupted (if so, fixes & writes the fixed one to disk).
     */
    private LimeXMLDocument validate(LimeXMLDocument doc, File file, FileDesc fd) {
        if(!doc.isCurrent()) {
            if(LOG.isDebugEnabled())
                LOG.debug("reconstructing old document: " + file);
            LimeXMLDocument tempDoc = constructDocument(file);
            if (tempDoc != null)
                doc = update(doc, tempDoc);
            else
                doc.setCurrent();
        }
        
        // Verify the doc has information in it.
        if(!doc.isValid()) {
            //If it is invalid, try and rebuild it.
            doc = constructDocument(file);
            if(doc == null)
                return null;
        }   
            
        // check to see if it's corrupted and if so, fix it.
        if( AudioMetaData.isCorrupted(doc) ) {
            doc = AudioMetaData.fixCorruption(doc);
            mediaFileToDisk(fd, file.getPath(), doc, false);
        }
        
        return doc;
    }
    
    /**
     * Updates an existing old document to be a newer document, but retains all fields
     * that may have been in the old one that are not in the newer (for the case of
     * existing annotations).
     */
    private LimeXMLDocument update(LimeXMLDocument older, LimeXMLDocument newer) {
        Map fields = new HashMap();
        for(Iterator i = newer.getNameValueSet().iterator(); i.hasNext(); ) {
            Map.Entry next = (Map.Entry)i.next();
            fields.put(next.getKey(), next.getValue());
        }
        
        for(Iterator i = older.getNameValueSet().iterator(); i.hasNext(); ) {
            Map.Entry next = (Map.Entry)i.next();
            if(!fields.containsKey(next.getKey()))
                fields.put(next.getKey(), next.getValue());
        }

        List nameValues = new ArrayList(fields.size());
        for(Iterator i = fields.entrySet().iterator(); i.hasNext(); ) {
            Map.Entry next = (Map.Entry)i.next();
            nameValues.add(new NameValue((String)next.getKey(), next.getValue()));
        }
        return new LimeXMLDocument(nameValues, newer.getSchemaURI());
     }
        
    
    /**
     * Creates a LimeXMLDocument from the file.  
     * @return null if the format is not supported or parsing fails,
     *  <tt>LimeXMLDocument</tt> otherwise.
     */
    private LimeXMLDocument constructDocument(File file) {
	    if(LimeXMLUtils.isSupportedFormatForSchema(file, schemaURI)) {
            try {
                // Documents with multiple file formats may be the wrong type.
                LimeXMLDocument document = MetaDataReader.readDocument(file);
                if(document.getSchemaURI().equals(schemaURI))
                    return document;
            } catch (IOException ignored) {
                LOG.warn("Error creating document", ignored);
            }
        }
        
        return null;
    }

    /**
     * Gets a list of keywords from all the documents in this collection.
     * <p>
     * delegates to the individual documents and collates the list
     */
    protected List getKeyWords(){
        List retList = new ArrayList();
        Iterator docs;
        synchronized(mainMap){
            docs = mainMap.values().iterator();
            while(docs.hasNext()){
                LimeXMLDocument d = (LimeXMLDocument)docs.next();
                retList.addAll(d.getKeyWords());
            }
        }
        return retList;
    }

    /**
     * Gets a list of indivisible keywords from all the documents in this 
     * collection.
     * <p>
     * Delegates to the individual documents and collates the list
     */
    protected List getKeyWordsIndivisible(){
        List retList = new ArrayList();
        Iterator docs;
        synchronized(mainMap){
            docs = mainMap.values().iterator();
            while(docs.hasNext()){
                LimeXMLDocument d = (LimeXMLDocument)docs.next();
                retList.addAll(d.getKeyWordsIndivisible());
            }
        }
        return retList;
    }
    
    /**
     * Returns the schema URI of this collection.
     */
    public String getSchemaURI(){
        return schemaURI;
    }
    
    /**
     * Adds the keywords of this LimeXMLDocument into the correct Trie 
     * for the field of the value.
     */
    private void addKeywords(LimeXMLDocument doc) {
        synchronized(mainMap) {
            for(Iterator i = doc.getNameValueSet().iterator(); i.hasNext(); ) {
                Map.Entry entry = (Map.Entry)i.next();
                final String name = (String)entry.getKey();
                final String value = 
                    I18NConvert.instance().getNorm((String)entry.getValue());
                Trie trie = (Trie)trieMap.get(name);
                // if no lookup table created yet, create one & insert.
                if(trie == null) {
                    trie = new Trie(true); //ignore case.
                    trieMap.put(name, trie);
                }
                List allDocs = (List)trie.get(value);
                // if no list of docs for this value created, create & insert.
                if( allDocs == null ) {
                    allDocs = new LinkedList();
                    trie.add(value, allDocs);
                }
                //Add the value to the list of docs
                allDocs.add(doc);
            }
        }
    }
    
    /**
     * Removes the keywords of this LimeXMLDocument from the appropriate Trie.
     * If the list is emptied, it is removed from the Trie.
     */
    private void removeKeywords(LimeXMLDocument doc) {
        synchronized(mainMap) {
            for(Iterator i = doc.getNameValueSet().iterator(); i.hasNext(); ) {
                Map.Entry entry = (Map.Entry)i.next();
                final String name = (String)entry.getKey();
                
                Trie trie = (Trie)trieMap.get(name);
                // if no trie, ignore.
                if(trie == null)
                    continue;
                    
                final String value = 
                    I18NConvert.instance().getNorm((String)entry.getValue());
                List allDocs = (List)trie.get(value);
                // if no list, ignore.
                if( allDocs == null )
                    continue;
                allDocs.remove(doc);
                // if we emptied the doc, remove from trie...
                if( allDocs.size() == 0 )
                    trie.remove(value);
            }
        }
    }

    /**
     * Adds a reply into the mainMap of this collection.
     * Also adds this LimeXMLDocument to the list of documents the
     * FileDesc knows about.
     */
    public void addReply(FileDesc fd, LimeXMLDocument replyDoc) {
        URN hash = fd.getSHA1Urn();
        synchronized(mainMap){
            dirty = true;
            mainMap.put(hash,replyDoc);
            addKeywords(replyDoc);
        }
        
        fd.addLimeXMLDocument(replyDoc);
    }

    /**
     * Returns the amount of items in this collection.
     */
    public int getCount(){
        synchronized(mainMap) {
            return mainMap.size();
        }
    }
    
    /**
     * Returns the LimeXMLDocument associated with this hash.
     * May return null if the hash is not found.
     */
    public LimeXMLDocument getDocForHash(URN hash){
        synchronized(mainMap){
            return (LimeXMLDocument)mainMap.get(hash);
        }
    }
        
    /**
     * Returns all documents that match the particular query.
     * If no documents match, this returns an empty list.
     *
     * This goes through the following methodology:
     * 1) Looks in the index trie to determine if ANY
     *    of the values in the query's document match.
     *    If they do, adds the document to a set of
     *    possible matches.  A set is used so the same
     *    document is not added multiple times.
     * 2) If no documents matched, returns an empty list.
     * 3) Iterates through the possible matching documents
     *    and does a fine-grained matchup, using XML-specific
     *    matching techniques.
     * 4) Returns an empty list if nothing matched or
     *    a list of the matching documents.
     */    
    List getMatchingReplies(LimeXMLDocument query) {
        // First get a list of anything that could possibly match.
        // This uses a set so we don't add the same doc twice ...
        Set matching = null;
        synchronized(mainMap) {
            for(Iterator i = query.getNameValueSet().iterator(); i.hasNext(); ) {
                Map.Entry entry = (Map.Entry)i.next();

                // Get the name of the particular field being queried for.
                final String name = (String)entry.getKey();
                // Lookup the matching Trie for that field.
                Trie trie = (Trie)trieMap.get(name);
                // No matching trie?.. Ignore.
                if(trie == null)
                    continue;

                // Get the value of that field being queried for.    
                final String value = (String)entry.getValue();
                // Get our shared XML docs that match this value.
                // This query is from the network, and is therefore already
                // normalized -- SHOULD NOT NORMALIZE AGAIN!!
                Iterator /* of List */ iter = trie.getPrefixedBy(value);
                // If some matches and 'matching' not allocated yet,
                // allocate a new Set for storing matches
                if(iter.hasNext()) {
                    if (matching == null)
                        matching = new HashSet();
                    // Iterate through each set of matches the Trie found
                    // and add those matching-lists to our set of matches.
                    // Note that the trie.getPrefixedBy returned
                    // an Iterator of Lists -- this is because the Trie
                    // does prefix matching, so there are many Lists of XML
                    // docs that could match.
                    while(iter.hasNext()) {
                        List matchesVal = (List)iter.next();
                        matching.addAll(matchesVal);
                    }
                }
            }
        }
        
        // no matches?... exit.
        if( matching == null || matching.size() == 0)
            return Collections.EMPTY_LIST;
        
        // Now filter that list using the real XML matching tool...
        List actualMatches = null;
        for(Iterator i = matching.iterator(); i.hasNext(); ) {
            LimeXMLDocument currReplyDoc = (LimeXMLDocument)i.next();
            if (LimeXMLUtils.match(currReplyDoc, query, false)) {
                if( actualMatches == null ) // delayed allocation of the list..
                    actualMatches = new LinkedList();
                actualMatches.add(currReplyDoc);
            }
        }
        
        // No actual matches?... exit.
        if( actualMatches == null || actualMatches.size() == 0 )
            return Collections.EMPTY_LIST;

        return actualMatches;
    }
    
    /**
     * Replaces the document in the map with a newer LimeXMLDocument.
     * @return the older document, which is being replaced. Can be null.
     */
    public LimeXMLDocument replaceDoc(FileDesc fd, LimeXMLDocument newDoc) {
        if(LOG.isTraceEnabled())
            LOG.trace("Replacing doc in FD (" + fd + ") with new doc (" + newDoc + ")");
        
        LimeXMLDocument oldDoc = null;
        URN hash = fd.getSHA1Urn();
        synchronized(mainMap) {
            dirty = true;
            oldDoc = (LimeXMLDocument)mainMap.put(hash,newDoc);
            if(oldDoc == null) 
                Assert.that(false, "attempted to replace doc that did not exist!!");
            removeKeywords(oldDoc);
            addKeywords(newDoc);
        }
       
        fd.replaceLimeXMLDocument(oldDoc, newDoc);
        return oldDoc;
    }

    /**
     * Removes the document associated with this FileDesc
     * from this collection, as well as removing it from
     * the FileDesc.
     */
    public boolean removeDoc(FileDesc fd) {
        LimeXMLDocument val;
        synchronized(mainMap) {
            val = (LimeXMLDocument)mainMap.remove(fd.getSHA1Urn());
            if(val != null)
                dirty = true;
        }
        
        if(val != null) {
            fd.removeLimeXMLDocument((LimeXMLDocument)val);
            removeKeywords(val);
        }
        
        if(LOG.isDebugEnabled())
            LOG.debug("removed: " + val);
        
        return val != null;
    }
    
    /**
     * Writes this media file to disk, using the XML in the doc.
     */
    public int mediaFileToDisk(FileDesc fd, String fileName, LimeXMLDocument doc,  boolean checkBetter) {
        int writeState = -1;
        
        if(LOG.isDebugEnabled())
            LOG.debug("writing: " + fileName + " to disk.");
        
        // see if you need to change a hash for a file due to a write...
        // if so, we need to commit the ID3 data to disk....
        MetaDataEditor commitWith = getEditorIfNeeded(fileName, doc, checkBetter);
        if (commitWith != null)  {
        	if(commitWith.getCorrectDocument() == null) {
        		writeState = commitMetaData(fileName, commitWith);
        	} else { 
        		//The data on disk is better than the data we got in the
        		//query reply. So we should update the Document we added
        		removeDoc(fd);
        		addReply(fd, commitWith.getCorrectDocument());
        		writeState = NORMAL;//no need to write anything
        	}
        }
        
        Assert.that(writeState != INCORRECT_FILETYPE, "trying to write data to unwritable file");

        return writeState;
    }

    /**
     * Determines whether or not this LimeXMLDocument can or should be
     * commited to disk to replace the ID3 tags in the mp3File.
     * If the ID3 tags in the file are the same as those in document,
     * this returns null (indicating no changes required).
     * @return An ID3Editor to use when committing or null if nothing 
     *  should be editted.
     */
    private MetaDataEditor getEditorIfNeeded(String mp3File, LimeXMLDocument doc, 
                                                        boolean checkBetter) {
        
        MetaDataEditor newValues = MetaDataEditor.getEditorForFile(mp3File);
        //if this call returned null, we should store the data in our
        //xml repository only.
        if (newValues == null)
        	return null;
        newValues.populate(doc);
        
        // Now see if the file already has the same info ...
        MetaDataEditor existing = MetaDataEditor.getEditorForFile(mp3File);
        LimeXMLDocument existingDoc = null;
        try {
            existingDoc = MetaDataReader.readDocument(new File(mp3File));
        } catch(IOException e) {
            return null;
        }
        existing.populate(existingDoc);
        
        //We are supposed to pick and chose the better set of tags
        if( newValues.equals(existing) ) {
            LOG.debug("tag read from disk is same as XML doc.");
            return null;
        } else if(checkBetter) {
            if(existing.betterThan(newValues)) {
                LOG.debug("Data on disk is better, using disk data.");
                //Note: In this case we are going to discard the LimeXMLDocument we
                //got off the network, because the data on the file is better than
                //the data in the query reply. Only in this case, we set the
                //"correctDocument variable of the ID3Editor.
                existing.setCorrectDocument(existingDoc);
                return existing;
            } else {
                LOG.debug("Retrieving better fields from disk.");
                newValues.pickBetterFields(existing);        
            }
        }
            
        // Commit using this Meta data editor ... 
        return newValues;
    }


    /**
     * Commits the changes to disk.
     * If anything was changed on disk, notifies the FileManager of a change.
     */
    private int commitMetaData(String fileName, MetaDataEditor editor) {
        //write to mp3 file...
        int retVal = editor.commitMetaData(fileName);
        if(LOG.isDebugEnabled())
            LOG.debug("wrote data: " + retVal);
        // any error where the file wasn't changed ... 
        if( retVal == FILE_DEFECTIVE ||
            retVal == RW_ERROR ||
            retVal == BAD_ID3 ||
            retVal == INCORRECT_FILETYPE)
            return retVal;
            
        // We do not remove the hash from the hashMap because
        // MetaFileManager needs to look it up to get the doc.
        
        //Since the hash of the file has changed, the metadata pertaining 
        //to other schemas will be lost unless we update those tables
        //with the new hashValue. 
        //NOTE:This is the only time the hash will change-(mp3 and audio)
        RouterService.getFileManager().fileChanged(new File(fileName));
        return retVal;
    }
    
    /** Serializes the current map to disk. */
    public boolean writeMapToDisk() {
        boolean wrote = false;
        synchronized(mainMap) {
            if(!dirty)
                return true;
                
            ObjectOutputStream out = null;
            try {
                out = new ObjectOutputStream(new BufferedOutputStream(new FileOutputStream(dataFile)));
                out.writeObject(mainMap);
                out.flush();
                wrote = true;
            } catch(Throwable ignored) {
                LOG.trace("Unable to write", ignored);
            } finally {
                IOUtils.close(out);
            }
            
            dirty = false;
        }
        
        return wrote;
    }
    
    /** Reads the map off of the disk. */
    private Map readMapFromDisk() {
        ObjectInputStream in = null;
        Map read = null;
        try {
            in = new ConverterObjectInputStream(new BufferedInputStream(new FileInputStream(dataFile)));
            read = (Map)in.readObject();
        } catch(Throwable t) {
            LOG.error("Unable to read LimeXMLCollection", t);
        } finally {
            IOUtils.close(in);
        }
        
        return read == null ? new HashMap() : read;
    }
}