package com.limegroup.gnutella.xml;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.limegroup.gnutella.Assert;
import com.limegroup.gnutella.FileDesc;
import com.limegroup.gnutella.RouterService;
import com.limegroup.gnutella.URN;
import com.limegroup.gnutella.metadata.AudioMetaData;
import com.limegroup.gnutella.metadata.MetaDataEditor;
import com.limegroup.gnutella.metadata.MetaDataReader;
import com.limegroup.gnutella.util.ConverterObjectInputStream;
import com.limegroup.gnutella.util.I18NConvert;
import com.limegroup.gnutella.util.Trie;
import com.limegroup.gnutella.util.IOUtils;
import com.limegroup.gnutella.util.NameValue;
/**
* Maps LimeXMLDocuments for FileDescs in a specific schema.
*/
public class LimeXMLReplyCollection {
private static final Log LOG = LogFactory.getLog(LimeXMLReplyCollection.class);
/**
* The schemaURI of this collection.
*/
private final String schemaURI;
/**
* A map of URN -> LimeXMLDocument for each shared file that contains XML.
*
* SYNCHRONIZATION: Synchronize on mainMap when accessing,
* adding or removing.
*/
private final Map /* URN -> LimeXMLDocument */ mainMap;
/**
* The old map that was read off disk.
*
* Used while initially processing FileDescs to add.
*/
private final Map /* URN -> LimeXMLDocument */ oldMap;
/**
* A mapping of fields in the LimeXMLDocument to a Trie
* that has a lookup table for the values of that field.
*
* The Trie value is a mapping of keywords in LimeXMLDocuments
* to the list of documents that have that keyword.
*
* SYNCHRONIZATION: Synchronize on mainMap when accessing,
* adding or removing.
*/
private final Map /* String -> Trie (String -> List) */ trieMap;
/**
* Whether or not data became dirty after we last wrote to disk.
*/
private boolean dirty = false;
/**
* The location on disk that information is serialized to.
*/
private final File dataFile;
public static final int NORMAL = 0;
public static final int FILE_DEFECTIVE = 1;
public static final int RW_ERROR = 2;
public static final int BAD_ID3 = 3;
public static final int FAILED_TITLE = 4;
public static final int FAILED_ARTIST = 5;
public static final int FAILED_ALBUM = 6;
public static final int FAILED_YEAR = 7;
public static final int FAILED_COMMENT = 8;
public static final int FAILED_TRACK = 9;
public static final int FAILED_GENRE = 10;
public static final int HASH_FAILED = 11;
public static final int INCORRECT_FILETYPE = 12;
/**
* Creates a new LimeXMLReplyCollection. The reply collection
* will retain only those XMLDocs that match the given schema URI.
*
* @param fds The list of shared FileDescs.
* @param URI This collection's schema URI
*/
public LimeXMLReplyCollection(String URI) {
this.schemaURI = URI;
this.trieMap = new HashMap();
this.dataFile = new File(LimeXMLProperties.instance().getXMLDocsDir(),
LimeXMLSchema.getDisplayString(schemaURI)+ ".sxml");
this.mainMap = new HashMap();
this.oldMap = readMapFromDisk();
}
/**
* Initializes the map using either LimeXMLDocuments in the list of potential
* documents, or elements stored in oldMap. Items in potential take priority.
*/
LimeXMLDocument initialize(FileDesc fd, List potential) {
URN urn = fd.getSHA1Urn();
LimeXMLDocument doc = null;
// First try to get a doc from the potential list.
for(Iterator i = potential.iterator(); i.hasNext(); ) {
LimeXMLDocument next = (LimeXMLDocument)i.next();
if(next.getSchemaURI().equals(schemaURI)) {
doc = next;
break;
}
}
// Then try to get it from the old map.
if(doc == null)
doc = (LimeXMLDocument)oldMap.get(urn);
// Then try and see it, with validation and all.
if(doc != null) {
doc = validate(doc, fd.getFile(), fd);
if(doc != null) {
if(LOG.isDebugEnabled())
LOG.debug("Adding old document for file: " + fd.getFile() + ", doc: " + doc);
addReply(fd, doc);
}
}
return doc;
}
/**
* Creates a LimeXMLDocument for the given FileDesc if no XML already exists
* for it.
*/
LimeXMLDocument createIfNecessary(FileDesc fd) {
LimeXMLDocument doc = null;
URN urn = fd.getSHA1Urn();
if(!mainMap.containsKey(urn)) {
File file = fd.getFile();
// If we have no documents for this FD, or the file-format only supports
// a single kind of metadata, construct a document.
// This is necessary so that we don't keep trying to parse formats that could
// be multiple kinds of files every time.
if(fd.getLimeXMLDocuments().size() == 0 || !LimeXMLUtils.isSupportedMultipleFormat(file)) {
doc = constructDocument(file);
if(doc != null) {
if(LOG.isDebugEnabled())
LOG.debug("Adding newly constructed document for file: " + file + ", doc: " + doc);
addReply(fd, doc);
}
}
}
return doc;
}
/**
* Notification that initial loading is done.
*/
void loadFinished() {
synchronized(mainMap) {
if(oldMap.equals(mainMap)) {
dirty = false;
}
oldMap.clear();
}
}
/**
* Validates a LimeXMLDocument.
*
* This checks:
* 1) If it's current (if not, it attempts to reparse. If it can't, keeps the old one).
* 2) If it's valid (if not, attempts to reparse it. If it can't, drops it).
* 3) If it's corrupted (if so, fixes & writes the fixed one to disk).
*/
private LimeXMLDocument validate(LimeXMLDocument doc, File file, FileDesc fd) {
if(!doc.isCurrent()) {
if(LOG.isDebugEnabled())
LOG.debug("reconstructing old document: " + file);
LimeXMLDocument tempDoc = constructDocument(file);
if (tempDoc != null)
doc = update(doc, tempDoc);
else
doc.setCurrent();
}
// Verify the doc has information in it.
if(!doc.isValid()) {
//If it is invalid, try and rebuild it.
doc = constructDocument(file);
if(doc == null)
return null;
}
// check to see if it's corrupted and if so, fix it.
if( AudioMetaData.isCorrupted(doc) ) {
doc = AudioMetaData.fixCorruption(doc);
mediaFileToDisk(fd, file.getPath(), doc, false);
}
return doc;
}
/**
* Updates an existing old document to be a newer document, but retains all fields
* that may have been in the old one that are not in the newer (for the case of
* existing annotations).
*/
private LimeXMLDocument update(LimeXMLDocument older, LimeXMLDocument newer) {
Map fields = new HashMap();
for(Iterator i = newer.getNameValueSet().iterator(); i.hasNext(); ) {
Map.Entry next = (Map.Entry)i.next();
fields.put(next.getKey(), next.getValue());
}
for(Iterator i = older.getNameValueSet().iterator(); i.hasNext(); ) {
Map.Entry next = (Map.Entry)i.next();
if(!fields.containsKey(next.getKey()))
fields.put(next.getKey(), next.getValue());
}
List nameValues = new ArrayList(fields.size());
for(Iterator i = fields.entrySet().iterator(); i.hasNext(); ) {
Map.Entry next = (Map.Entry)i.next();
nameValues.add(new NameValue((String)next.getKey(), next.getValue()));
}
return new LimeXMLDocument(nameValues, newer.getSchemaURI());
}
/**
* Creates a LimeXMLDocument from the file.
* @return null if the format is not supported or parsing fails,
* <tt>LimeXMLDocument</tt> otherwise.
*/
private LimeXMLDocument constructDocument(File file) {
if(LimeXMLUtils.isSupportedFormatForSchema(file, schemaURI)) {
try {
// Documents with multiple file formats may be the wrong type.
LimeXMLDocument document = MetaDataReader.readDocument(file);
if(document.getSchemaURI().equals(schemaURI))
return document;
} catch (IOException ignored) {
LOG.warn("Error creating document", ignored);
}
}
return null;
}
/**
* Gets a list of keywords from all the documents in this collection.
* <p>
* delegates to the individual documents and collates the list
*/
protected List getKeyWords(){
List retList = new ArrayList();
Iterator docs;
synchronized(mainMap){
docs = mainMap.values().iterator();
while(docs.hasNext()){
LimeXMLDocument d = (LimeXMLDocument)docs.next();
retList.addAll(d.getKeyWords());
}
}
return retList;
}
/**
* Gets a list of indivisible keywords from all the documents in this
* collection.
* <p>
* Delegates to the individual documents and collates the list
*/
protected List getKeyWordsIndivisible(){
List retList = new ArrayList();
Iterator docs;
synchronized(mainMap){
docs = mainMap.values().iterator();
while(docs.hasNext()){
LimeXMLDocument d = (LimeXMLDocument)docs.next();
retList.addAll(d.getKeyWordsIndivisible());
}
}
return retList;
}
/**
* Returns the schema URI of this collection.
*/
public String getSchemaURI(){
return schemaURI;
}
/**
* Adds the keywords of this LimeXMLDocument into the correct Trie
* for the field of the value.
*/
private void addKeywords(LimeXMLDocument doc) {
synchronized(mainMap) {
for(Iterator i = doc.getNameValueSet().iterator(); i.hasNext(); ) {
Map.Entry entry = (Map.Entry)i.next();
final String name = (String)entry.getKey();
final String value =
I18NConvert.instance().getNorm((String)entry.getValue());
Trie trie = (Trie)trieMap.get(name);
// if no lookup table created yet, create one & insert.
if(trie == null) {
trie = new Trie(true); //ignore case.
trieMap.put(name, trie);
}
List allDocs = (List)trie.get(value);
// if no list of docs for this value created, create & insert.
if( allDocs == null ) {
allDocs = new LinkedList();
trie.add(value, allDocs);
}
//Add the value to the list of docs
allDocs.add(doc);
}
}
}
/**
* Removes the keywords of this LimeXMLDocument from the appropriate Trie.
* If the list is emptied, it is removed from the Trie.
*/
private void removeKeywords(LimeXMLDocument doc) {
synchronized(mainMap) {
for(Iterator i = doc.getNameValueSet().iterator(); i.hasNext(); ) {
Map.Entry entry = (Map.Entry)i.next();
final String name = (String)entry.getKey();
Trie trie = (Trie)trieMap.get(name);
// if no trie, ignore.
if(trie == null)
continue;
final String value =
I18NConvert.instance().getNorm((String)entry.getValue());
List allDocs = (List)trie.get(value);
// if no list, ignore.
if( allDocs == null )
continue;
allDocs.remove(doc);
// if we emptied the doc, remove from trie...
if( allDocs.size() == 0 )
trie.remove(value);
}
}
}
/**
* Adds a reply into the mainMap of this collection.
* Also adds this LimeXMLDocument to the list of documents the
* FileDesc knows about.
*/
public void addReply(FileDesc fd, LimeXMLDocument replyDoc) {
URN hash = fd.getSHA1Urn();
synchronized(mainMap){
dirty = true;
mainMap.put(hash,replyDoc);
addKeywords(replyDoc);
}
fd.addLimeXMLDocument(replyDoc);
}
/**
* Returns the amount of items in this collection.
*/
public int getCount(){
synchronized(mainMap) {
return mainMap.size();
}
}
/**
* Returns the LimeXMLDocument associated with this hash.
* May return null if the hash is not found.
*/
public LimeXMLDocument getDocForHash(URN hash){
synchronized(mainMap){
return (LimeXMLDocument)mainMap.get(hash);
}
}
/**
* Returns all documents that match the particular query.
* If no documents match, this returns an empty list.
*
* This goes through the following methodology:
* 1) Looks in the index trie to determine if ANY
* of the values in the query's document match.
* If they do, adds the document to a set of
* possible matches. A set is used so the same
* document is not added multiple times.
* 2) If no documents matched, returns an empty list.
* 3) Iterates through the possible matching documents
* and does a fine-grained matchup, using XML-specific
* matching techniques.
* 4) Returns an empty list if nothing matched or
* a list of the matching documents.
*/
List getMatchingReplies(LimeXMLDocument query) {
// First get a list of anything that could possibly match.
// This uses a set so we don't add the same doc twice ...
Set matching = null;
synchronized(mainMap) {
for(Iterator i = query.getNameValueSet().iterator(); i.hasNext(); ) {
Map.Entry entry = (Map.Entry)i.next();
// Get the name of the particular field being queried for.
final String name = (String)entry.getKey();
// Lookup the matching Trie for that field.
Trie trie = (Trie)trieMap.get(name);
// No matching trie?.. Ignore.
if(trie == null)
continue;
// Get the value of that field being queried for.
final String value = (String)entry.getValue();
// Get our shared XML docs that match this value.
// This query is from the network, and is therefore already
// normalized -- SHOULD NOT NORMALIZE AGAIN!!
Iterator /* of List */ iter = trie.getPrefixedBy(value);
// If some matches and 'matching' not allocated yet,
// allocate a new Set for storing matches
if(iter.hasNext()) {
if (matching == null)
matching = new HashSet();
// Iterate through each set of matches the Trie found
// and add those matching-lists to our set of matches.
// Note that the trie.getPrefixedBy returned
// an Iterator of Lists -- this is because the Trie
// does prefix matching, so there are many Lists of XML
// docs that could match.
while(iter.hasNext()) {
List matchesVal = (List)iter.next();
matching.addAll(matchesVal);
}
}
}
}
// no matches?... exit.
if( matching == null || matching.size() == 0)
return Collections.EMPTY_LIST;
// Now filter that list using the real XML matching tool...
List actualMatches = null;
for(Iterator i = matching.iterator(); i.hasNext(); ) {
LimeXMLDocument currReplyDoc = (LimeXMLDocument)i.next();
if (LimeXMLUtils.match(currReplyDoc, query, false)) {
if( actualMatches == null ) // delayed allocation of the list..
actualMatches = new LinkedList();
actualMatches.add(currReplyDoc);
}
}
// No actual matches?... exit.
if( actualMatches == null || actualMatches.size() == 0 )
return Collections.EMPTY_LIST;
return actualMatches;
}
/**
* Replaces the document in the map with a newer LimeXMLDocument.
* @return the older document, which is being replaced. Can be null.
*/
public LimeXMLDocument replaceDoc(FileDesc fd, LimeXMLDocument newDoc) {
if(LOG.isTraceEnabled())
LOG.trace("Replacing doc in FD (" + fd + ") with new doc (" + newDoc + ")");
LimeXMLDocument oldDoc = null;
URN hash = fd.getSHA1Urn();
synchronized(mainMap) {
dirty = true;
oldDoc = (LimeXMLDocument)mainMap.put(hash,newDoc);
if(oldDoc == null)
Assert.that(false, "attempted to replace doc that did not exist!!");
removeKeywords(oldDoc);
addKeywords(newDoc);
}
fd.replaceLimeXMLDocument(oldDoc, newDoc);
return oldDoc;
}
/**
* Removes the document associated with this FileDesc
* from this collection, as well as removing it from
* the FileDesc.
*/
public boolean removeDoc(FileDesc fd) {
LimeXMLDocument val;
synchronized(mainMap) {
val = (LimeXMLDocument)mainMap.remove(fd.getSHA1Urn());
if(val != null)
dirty = true;
}
if(val != null) {
fd.removeLimeXMLDocument((LimeXMLDocument)val);
removeKeywords(val);
}
if(LOG.isDebugEnabled())
LOG.debug("removed: " + val);
return val != null;
}
/**
* Writes this media file to disk, using the XML in the doc.
*/
public int mediaFileToDisk(FileDesc fd, String fileName, LimeXMLDocument doc, boolean checkBetter) {
int writeState = -1;
if(LOG.isDebugEnabled())
LOG.debug("writing: " + fileName + " to disk.");
// see if you need to change a hash for a file due to a write...
// if so, we need to commit the ID3 data to disk....
MetaDataEditor commitWith = getEditorIfNeeded(fileName, doc, checkBetter);
if (commitWith != null) {
if(commitWith.getCorrectDocument() == null) {
writeState = commitMetaData(fileName, commitWith);
} else {
//The data on disk is better than the data we got in the
//query reply. So we should update the Document we added
removeDoc(fd);
addReply(fd, commitWith.getCorrectDocument());
writeState = NORMAL;//no need to write anything
}
}
Assert.that(writeState != INCORRECT_FILETYPE, "trying to write data to unwritable file");
return writeState;
}
/**
* Determines whether or not this LimeXMLDocument can or should be
* commited to disk to replace the ID3 tags in the mp3File.
* If the ID3 tags in the file are the same as those in document,
* this returns null (indicating no changes required).
* @return An ID3Editor to use when committing or null if nothing
* should be editted.
*/
private MetaDataEditor getEditorIfNeeded(String mp3File, LimeXMLDocument doc,
boolean checkBetter) {
MetaDataEditor newValues = MetaDataEditor.getEditorForFile(mp3File);
//if this call returned null, we should store the data in our
//xml repository only.
if (newValues == null)
return null;
newValues.populate(doc);
// Now see if the file already has the same info ...
MetaDataEditor existing = MetaDataEditor.getEditorForFile(mp3File);
LimeXMLDocument existingDoc = null;
try {
existingDoc = MetaDataReader.readDocument(new File(mp3File));
} catch(IOException e) {
return null;
}
existing.populate(existingDoc);
//We are supposed to pick and chose the better set of tags
if( newValues.equals(existing) ) {
LOG.debug("tag read from disk is same as XML doc.");
return null;
} else if(checkBetter) {
if(existing.betterThan(newValues)) {
LOG.debug("Data on disk is better, using disk data.");
//Note: In this case we are going to discard the LimeXMLDocument we
//got off the network, because the data on the file is better than
//the data in the query reply. Only in this case, we set the
//"correctDocument variable of the ID3Editor.
existing.setCorrectDocument(existingDoc);
return existing;
} else {
LOG.debug("Retrieving better fields from disk.");
newValues.pickBetterFields(existing);
}
}
// Commit using this Meta data editor ...
return newValues;
}
/**
* Commits the changes to disk.
* If anything was changed on disk, notifies the FileManager of a change.
*/
private int commitMetaData(String fileName, MetaDataEditor editor) {
//write to mp3 file...
int retVal = editor.commitMetaData(fileName);
if(LOG.isDebugEnabled())
LOG.debug("wrote data: " + retVal);
// any error where the file wasn't changed ...
if( retVal == FILE_DEFECTIVE ||
retVal == RW_ERROR ||
retVal == BAD_ID3 ||
retVal == INCORRECT_FILETYPE)
return retVal;
// We do not remove the hash from the hashMap because
// MetaFileManager needs to look it up to get the doc.
//Since the hash of the file has changed, the metadata pertaining
//to other schemas will be lost unless we update those tables
//with the new hashValue.
//NOTE:This is the only time the hash will change-(mp3 and audio)
RouterService.getFileManager().fileChanged(new File(fileName));
return retVal;
}
/** Serializes the current map to disk. */
public boolean writeMapToDisk() {
boolean wrote = false;
synchronized(mainMap) {
if(!dirty)
return true;
ObjectOutputStream out = null;
try {
out = new ObjectOutputStream(new BufferedOutputStream(new FileOutputStream(dataFile)));
out.writeObject(mainMap);
out.flush();
wrote = true;
} catch(Throwable ignored) {
LOG.trace("Unable to write", ignored);
} finally {
IOUtils.close(out);
}
dirty = false;
}
return wrote;
}
/** Reads the map off of the disk. */
private Map readMapFromDisk() {
ObjectInputStream in = null;
Map read = null;
try {
in = new ConverterObjectInputStream(new BufferedInputStream(new FileInputStream(dataFile)));
read = (Map)in.readObject();
} catch(Throwable t) {
LOG.error("Unable to read LimeXMLCollection", t);
} finally {
IOUtils.close(in);
}
return read == null ? new HashMap() : read;
}
}