package arkref.data;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
import arkref.parsestuff.U;
/**
* progressively add pairwise equivalences to this data structure.
* internally, it builds the transitive closure.
**/
public class EntityGraph implements Serializable {
private static final long serialVersionUID = -3407431672317709104L;
public Map<Mention, HashSet<Mention>> mention2corefs;
public Set<Entity> entities = null;
public class Entity implements Serializable{
private static final long serialVersionUID = -6664324222677207904L;
public String id;
public Set<Mention> mentions;
public int hashCode() { return id.hashCode(); }
public boolean equals(Object _e2) {
Entity e2 = (Entity) _e2;
if (e2==null) return false;
assert this.id!=null && e2.id!=null;
return this.id.equals( e2.id );
}
public List<Mention> sortedMentions() {
List<Mention> ms = new ArrayList();
for (Mention m : mentions)
ms.add(m);
Collections.sort(ms, new Comparator<Mention>() {
public int compare(Mention m1, Mention m2) {
return new Integer(m1.ID()).compareTo(m2.ID());
}
});
return ms;
}
public String toString() {
Iterator<Mention> it = mentions.iterator();
Mention m = it.next();
String name = entName(m);
if (mentions.size()==1)
return "singleton_"+name;
return "entity_"+name;
}
}
public EntityGraph(Document d) {
mention2corefs = new HashMap<Mention, HashSet<Mention>>();
for (Mention m : d.mentions()) {
mention2corefs.put(m, new HashSet<Mention>());
mention2corefs.get(m).add(m);
}
}
public void addPair(Mention m1, Mention m2) {
assert entities==null : "we're frozen, please don't addPair() anymore";
// Strategy: always keep mention2corefs a complete record of all coreferents for that mention
// So all we do is merge
Set<Mention> corefs1 = (Set<Mention>) mention2corefs.get(m1).clone();
Set<Mention> corefs2 = (Set<Mention>) mention2corefs.get(m2).clone();
for (Mention n1 : corefs1) {
for (Mention n2 : corefs2) {
mention2corefs.get(n1).add(n2);
mention2corefs.get(n2).add(n1);
}
}
}
/** Call this only once, and only after all addPair()ing is done. **/
public void freezeEntities() {
assert entities == null : "call freezeEntities() only once please";
entities = new HashSet<Entity>();
Set<String> bla = new HashSet<String>();
for (Mention m : mention2corefs.keySet()) {
Entity e = makeEntity(m);
entities.add(e);
}
}
/** helper for freezeEntities() **/
private Entity makeEntity(Mention m) {
Entity e = new Entity();
e.id = entName(m);
e.mentions = mention2corefs.get(m);
return e;
}
public Set<Mention> getLinkedMentions(Mention m){
return mention2corefs.get(m);
}
public boolean isSingleton(Mention m) {
return mention2corefs.get(m).size()==1;
}
public String entName(Mention m) {
return entName(mention2corefs.get(m));
}
public String entName(Set<Mention> corefs) {
List<Integer> L = new ArrayList<Integer>();
for (Mention m : corefs) {
L.add(m.ID());
}
Collections.sort(L);
return StringUtils.join(L, "_");
}
public List<Entity> sortedEntities() {
List<Entity> ents = new ArrayList();
for (Entity e : entities) {
ents.add(e);
}
Collections.sort(ents, new Comparator<Entity>() {
public int compare(Entity e1, Entity e2) {
return e1.id.compareTo(e2.id);
// List<Mention> ms1 = e1.sortedMentions();
// List<Mention> ms2 = e2.sortedMentions();
// if (ms1.size()==0 || ms2.size()==0)
// e1.id
// e1.mentions
}});
return ents;
}
}