package edu.cmu.minorthird.text.learn; import java.util.Iterator; import edu.cmu.minorthird.text.*; import edu.cmu.minorthird.classify.*; /** * Feedback for an annotation learner. * * @author William Cohen */ public class AnnotationExample { public static String OUTSIDE = "outside"; public static String INSIDE = "inside"; public static String START = "start"; public static String END = "end"; public static String NOT_START_OR_END = "notStartOrEnd"; private String inputSpanType; private String inputSpanProp; private Span document; private TextLabels labels; /** * @param document give feedback to learner about this document * @param labels feedback information is in these labels * @param inputSpanType learner will learn how to extract spans of this type * @param inputSpanProp learner will classify extracted spans according to this type */ public AnnotationExample(Span document,TextLabels labels,String inputSpanType,String inputSpanProp) { this.document = document; this.labels = labels; this.inputSpanType = inputSpanType; this.inputSpanProp = inputSpanProp; } public Span getDocumentSpan() { return document; } public TextLabels getLabels() { return labels; } public String getInputType() { return inputSpanType; } public String getInputProp() { return inputSpanProp; } @Override public String toString() { return "[AnnEx: document="+document+"]"; } /** Return the name of the class associated with this span. If * inputSpanType is defined, the class name will be POS or NEG; * otherwise, if inputSpanProp is defined, the class name will be * the property value assigned, or NEG. */ public String getClassName(Span span) { String className = ExampleSchema.NEG_CLASS_NAME; if (getInputType()!=null) { if (getLabels().hasType(span,getInputType())) className = ExampleSchema.POS_CLASS_NAME; } else if (getInputProp()!=null) { String propValue = getLabels().getProperty(span, getInputProp()); if (propValue!=null) className = propValue; } else { throw new IllegalStateException("inputType && inputProp undefined for answeredQuery: "+this); } return className; } // // convenience methods // public TextLabels labelTokensInsideOutside(String prop) { MonotonicTextLabels result = new NestedTextLabels(labels); String documentId = document.getDocumentId(); labelTokens(result,result.closureIterator(inputSpanType,documentId),prop,OUTSIDE); labelTokens(result,result.instanceIterator(inputSpanType,documentId),prop,INSIDE); return result; } public TextLabels labelTokensStartEnd(String prop) { MonotonicTextLabels result = new NestedTextLabels(labels); String documentId = document.getDocumentId(); labelTokens(result,result.closureIterator(inputSpanType,documentId),prop,NOT_START_OR_END); for (Iterator<Span> i=result.instanceIterator(inputSpanType,documentId); i.hasNext(); ) { Span s = i.next(); if (s.size()>0) { result.setProperty( s.getToken(0), prop, START ); result.setProperty( s.getToken(s.size()-1), prop, END ); } } return result; } private void labelTokens(MonotonicTextLabels labels,Iterator<Span> i,String prop,String value) { while (i.hasNext()) { Span s = i.next(); for (int j=0; j<s.size(); j++) { labels.setProperty( s.getToken(j), prop, value); } } } }