package ecologylab.bigsemantics.html.documentstructure;
import java.util.Iterator;
import java.util.concurrent.ConcurrentHashMap;
import ecologylab.bigsemantics.metadata.builtins.Document;
import ecologylab.bigsemantics.model.text.CompositeTermVector;
import ecologylab.bigsemantics.model.text.ITermVector;
import ecologylab.net.ParsedURL;
//TODO -- make serializable!
@SuppressWarnings("serial")
public class SemanticInLinks extends ConcurrentHashMap<ParsedURL, SemanticAnchor> implements Iterable<SemanticAnchor>
{
private CompositeTermVector compositeTermVector;
private Document ancestor;
private int generation;
private boolean fromSemanticAction;
public SemanticInLinks()
{
}
public synchronized void recycle()
{
int index = size();
while (--index >= 0)
{
SemanticAnchor semanticAnchor = remove(index);
if (semanticAnchor != null)
semanticAnchor.recycle();
}
clear();
if (compositeTermVector != null)
{
compositeTermVector.recycle();
compositeTermVector = null;
}
ancestor = null;
}
public CompositeTermVector semanticInlinkCollection()
{
CompositeTermVector result = compositeTermVector;
if (result == null)
{
compositeTermVector = new CompositeTermVector();
result = compositeTermVector;
}
return result;
}
public synchronized boolean add(SemanticAnchor newAnchor, Document source)
{
SemanticAnchor oldAnchor = putIfAbsent(newAnchor.sourcePurl(), newAnchor);
if (oldAnchor == null)
{
semanticInlinkCollection().add(newAnchor.getSignificance(), newAnchor.termVector());
if (newAnchor.fromSemanticAction())
fromSemanticAction = true;
}
else
{
//TODO -- should we count and incorporate new terms?!
}
add(source);
return true;
}
/**
* @param source
*/
public void add(Document source)
{
if (source != null)
{
int sourceBasedGeneration = source.getGeneration() + 1;
if (sourceBasedGeneration < generation || ancestor == null)
{
generation = sourceBasedGeneration;
ancestor = source;
}
}
}
/**
* Returns a weight for this collection of semantic inlinks, giving a reference ITermVector to weigh against. Usually this is the participant interest TermVector.
* @param weightingVector ITermVector to weigh the semantic inlinks with.
*/
public double getWeight(ITermVector weightingVector)
{
double idfDot = this.semanticInlinkCollection().idfDot(weightingVector);
for(SemanticAnchor anchor : this)
{
idfDot += anchor.getSignificance();
}
return idfDot;
}
/**
* Returns 1 if no links exist, else the mean of the significance's of its contents
* @return
*/
public float meanSignificance()
{
if (this.size() == 0)
return 1;
float meanSig = 0;
for(SemanticAnchor a : this)
meanSig += a.getSignificance();
meanSig /= this.size();
return meanSig;
}
@Override
public Iterator<SemanticAnchor> iterator()
{
return values().iterator();
}
/**
* @return the ancestor
*/
public Document getAncestor()
{
return ancestor;
}
/**
* @return the generation
*/
public int getGeneration()
{
return generation;
}
public int getEffectiveGeneration()
{
return fromSemanticAction ? (ancestor != null ? ancestor.getGeneration() : 0) : generation;
}
public void merge(SemanticInLinks oldInlinks)
{
for (SemanticAnchor inlink : oldInlinks)
{
add(inlink, null);
}
}
}