import edu.cmu.minorthird.util.*;
import edu.cmu.minorthird.text.*;
import edu.cmu.minorthird.text.gui.*;
import edu.cmu.minorthird.text.learn.*;
import edu.cmu.minorthird.text.learn.experiments.*;
import edu.cmu.minorthird.text.mixup.*;
import edu.cmu.minorthird.classify.*;
import edu.cmu.minorthird.classify.experiments.*;
import edu.cmu.minorthird.classify.algorithms.linear.*;
import edu.cmu.minorthird.classify.algorithms.trees.*;
import java.util.*;
import java.util.regex.*;
import java.io.*;
import org.apache.log4j.*;
// 'from' => ["Y-caption.txt"],
// 'produces' => ["proteinInPanelLabeled Y","cellInPanelLabeled Y", "Y-label.txt"],
public class ImagePointerAnnotator extends AbstractAnnotator implements Serializable
{
/** The implementation for this method should explain how annotation
* would be added to some part of the text base. */
public String explainAnnotation(TextLabels labels,Span documentSpan)
{
return "no explanation";
}
protected void doAnnotate(MonotonicTextLabels labels)
{
MonotonicTextLabels tmpLabels = new NestedTextLabels(labels);
//System.out.println("determine scopes");
determineScopes(tmpLabels);
//System.out.println("import scopes");
importScopes(labels,tmpLabels);
labels.setAnnotatedBy("imagePointer");
}
private Map imgPtrForScope; // scope span -> img Ptr span
private List imagePtrList; // all imagePtr spans, local or regional
private Map imagePtrDefinition; // imagePtr span -> set of strings that define the semantics
private Set allLabels; // set of all imgPtr spans
// define in labels:
// bulletStyle = regional
// citationStyle = local
// imgPtr = bulletStyle union citationStyle
// scope = bulletScope union citationScope
// labelSemantics(imgPtr||scope) = imgPtrDefinition
// labelSemantics(document) = allLabels
private void importScopes(MonotonicTextLabels labels,TextLabels tmpLabels)
{
//System.out.println("importing img ptrs");
defImgPtr("bulletStyle","regional",labels,tmpLabels);
defImgPtr("citationStyle","local",labels,tmpLabels);
//System.out.println("importing scopes");
defScope("bulletScope","regionalScope",labels,tmpLabels);
defScope("citationScope","localScope",labels,tmpLabels);
defScope("globalScope","globalScope",labels,tmpLabels);
//System.out.println("assigning global semantics");
String sem = asString(allLabels);
for (Span.Looper i=labels.getTextBase().documentSpanIterator(); i.hasNext(); ) {
Span doc = i.nextSpan();
labels.setProperty(doc,"semantics",sem);
}
}
private void defScope(String type,String tmpType,MonotonicTextLabels labels,TextLabels tmpLabels)
{
//System.out.println("defScope: "+tmpType+" => "+type);
for (Span.Looper i=tmpLabels.instanceIterator(tmpType); i.hasNext(); ) {
Span s = i.nextSpan();
labels.addToType(s, type);
labels.addToType(s, "scope");
Span t = (Span)imgPtrForScope.get(s);
if (t!=null) {
String sem = semanticsOf(t);
if (sem!=null) labels.setProperty(s,"semantics",sem);
}
}
}
private String semanticsOf(Span s)
{
Set set = (Set)imagePtrDefinition.get(s);
return set==null ? null : asString(set);
}
private void defImgPtr(String type,String tmpType,MonotonicTextLabels labels,TextLabels tmpLabels)
{
for (Span.Looper i=tmpLabels.instanceIterator(tmpType); i.hasNext(); ) {
Span s = i.nextSpan();
labels.addToType(s, type);
labels.addToType(s, "imagePointer");
String sem = semanticsOf(s);
if (sem!=null) labels.setProperty(s,"semantics",sem);
}
}
private String asString(Set set)
{
StringBuffer buf = new StringBuffer("");
for (Iterator i=set.iterator(); i.hasNext(); ) {
String s = (String)i.next();
if (buf.length()==0) buf.append("\t");
buf.append(s.toString());
}
return buf.toString();
}
private void determineScopes(MonotonicTextLabels labels)
{
// figure out which image pointer 'owns' which scope
imgPtrForScope = new TreeMap();
imagePtrList = new ArrayList();
String[] ptrTypes = new String[] { "local", "regional" };
for (int i=0; i<ptrTypes.length; i++) {
ProgressCounter pc = new ProgressCounter("finding "+ptrTypes[i]+" scopes","image pointer",labels.getTextBase().size());
for (Span.Looper j=labels.instanceIterator(ptrTypes[i]); j.hasNext(); ) {
Span imgPtrSpan = j.nextSpan();
imagePtrList.add(imgPtrSpan);
//System.out.println("span for "+imgPtrSpan+" is ....");
Span scopeSpan = findContainingSpan(imgPtrSpan, labels, ptrTypes[i]+"Scope");
//System.out.println("span for "+imgPtrSpan+" is "+scopeSpan);
if (scopeSpan!=null) imgPtrForScope.put( scopeSpan, imgPtrSpan );
pc.progress();
}
pc.finished();
}
// expand out the 'definition' of the image pointers
imagePtrDefinition = new TreeMap();
allLabels = new TreeSet();
Pattern p1 = Pattern.compile(".*\\b([A-Z])\\s*-\\s*([A-Z])\\b.*");
Pattern p2 = Pattern.compile(".*\\b([a-z])\\s*-\\s*([a-z])\\b.*");
Pattern p3 = Pattern.compile(".*\\b([A-Za-z])\\b.*");
for (Iterator i=imagePtrList.iterator(); i.hasNext(); ) {
Span span = (Span)i.next();
//System.out.println("expanding: "+span);
String string = span.asString();
Matcher m1 = p1.matcher(string);
while (m1.find()) defineRange(span,string,m1);
Matcher m2 = p2.matcher(string);
while (m2.find()) defineRange(span,string,m2);
Matcher m3 = p3.matcher(string);
while (m3.find()) defineLetter(span,string,m3);
}
//System.out.println("done with expanding");
}
// define the semantics of an imgPtrSpan of the form 'b-c'
private void defineRange(Span span, String string, Matcher matcher)
{
char lo = string.charAt(matcher.start(1));
char hi = string.charAt(matcher.start(2));
TreeSet set = (TreeSet)imagePtrDefinition.get(span);
if (set==null) imagePtrDefinition.put( span, (set=new TreeSet()) );
for (char ch=lo; ch<=hi; ch++) {
StringBuffer buf = new StringBuffer("");
buf.append(ch);
set.add( buf.toString() );
allLabels.add( buf.toString() );
}
}
// define the semantics of an imgPtrSpan of the form 'a'
private void defineLetter(Span span, String string, Matcher matcher)
{
char ch = string.charAt(matcher.start(1));
TreeSet set = (TreeSet)imagePtrDefinition.get(span);
if (set==null) imagePtrDefinition.put(span, (set=new TreeSet()) );
StringBuffer buf = new StringBuffer("");
buf.append(ch);
set.add( buf.toString() );
}
// find a span of given type containing s
private Span findContainingSpan(Span s,TextLabels labels,String type)
{
String id = s.getDocumentId();
for (Span.Looper j=labels.instanceIterator(type,id); j.hasNext(); ) {
Span t = j.nextSpan();
if (t.contains(s)) return t;
}
return null;
}
}