package edu.stanford.nlp.ie;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Index;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.ie.pascal.AcronymModel;
import edu.stanford.nlp.ling.CoreAnnotations;
import java.util.*;
/**
* @author Jenny Finkel
*/
public class SeminarsPrior<IN extends CoreMap> extends EntityCachingAbstractSequencePrior<IN> {
//double penalty = 4.0;
double penalty = 2.3;
//double penalty1 = 3.0;
//double penalty2 = 4.0;
public SeminarsPrior(String backgroundSymbol, Index<String> classIndex, List<IN> doc) {
super(backgroundSymbol, classIndex, doc);
init(doc);
}
private void init(List<IN> doc) {
interned = new String[doc.size()];
int i = 0;
for (IN wi : doc) {
interned[i++] = wi.get(CoreAnnotations.TextAnnotation.class).toLowerCase().intern();
}
}
private String[] interned;
public double scoreOf(int[] sequence) {
Set<String> speakers = Generics.newHashSet();
Set<String> locations = Generics.newHashSet();
Set<String> stimes = Generics.newHashSet();
Set<String> etimes = Generics.newHashSet();
List<Entity> speakersL = new ArrayList<Entity>();
List<Entity> locationsL = new ArrayList<Entity>();
List<Entity> stimesL = new ArrayList<Entity>();
List<Entity> etimesL = new ArrayList<Entity>();
double p = 0.0;
for (int i = 0; i < entities.length; i++) {
Entity entity = entities[i];
if ((i == 0 || entities[i-1] != entity) && entity != null) {
String type = classIndex.get(entity.type);
String phrase = StringUtils.join(entity.words, " ").toLowerCase();
if (type.equalsIgnoreCase("SPEAKER")) {
speakers.add(phrase);
speakersL.add(entity);
} else if (type.equalsIgnoreCase("LOCATION")) {
locations.add(phrase);
locationsL.add(entity);
} else if (type.equals("STIME")) {
stimes.add(phrase);
stimesL.add(entity);
} else if (type.equals("ETIME")) {
etimes.add(phrase);
etimesL.add(entity);
} else {
System.err.println("unknown entity type: "+type);
System.exit(0);
}
}
}
for (Entity stimeE : stimesL) {
if (stimes.size() == 1) { break; }
String stime = StringUtils.join(stimeE.words, " ");
String time = "";
for (char c : stime.toCharArray()) {
if (c >= '0' && c <= '9') {
time += c;
}
}
if (time.length() == 1 || time.length() == 2) { time = time+"00"; }
boolean match = false;
for (String stime1 : stimes) {
String time1 = "";
for (char c : stime1.toCharArray()) {
if (c >= '0' && c <= '9') {
time1 += c;
}
}
if (time1.length() == 1 || time1.length() == 2) { time1 = time1+"00"; }
if (!time.equals(time1)) {
p -= stimeE.words.size() * penalty;
//System.err.println(time+" ("+s+") "+time1+" ("+s1+") "+stimes);
}
}
}
for (Entity etimeE : etimesL) {
if (etimes.size() == 1) { break; }
String etime = StringUtils.join(etimeE.words, " ");
String time = "";
for (char c : etime.toCharArray()) {
if (c >= '0' && c <= '9') {
time += c;
}
}
if (time.length() == 1 || time.length() == 2) { time = time+"00"; }
boolean match = false;
for (String etime1 : etimes) {
String time1 = "";
for (char c : etime1.toCharArray()) {
if (c >= '0' && c <= '9') {
time1 += c;
}
}
if (time1.length() == 1 || time1.length() == 2) { time1 = time1+"00"; }
if (!time.equals(time1)) {
p -= etimeE.words.size() * penalty;
//System.err.println(time+" ("+s+") "+time1+" ("+s1+") "+etimes);
}
}
}
// for (Entity locationE : locationsL) {
// String location = StringUtils.join(locationE.words, " ");
// for (String location1 : locations) {
// String s1 = location;
// String s2 = location1;
// if (s2.length() > s1.length()) {
// String tmp = s2;
// s2 = s1;
// s1 = tmp;
// }
// Pair<String,String> pair = new Pair(s1, s2);
// Boolean b = aliasLocCache.get(pair);
// if (b == null) {
// double d = acronymModel.HearstSimilarity(s1, s2);
// b = (d >= 0.7);
// aliasLocCache.put(pair, b);
// }
// if (!b) {
// p -= locationE.words.size() * penalty;
// }
// }
// }
int speakerIndex = classIndex.indexOf("SPEAKER");
for (Entity speakerE : speakersL) {
//String lastName = speakerE.words.get(speakerE.words.size()-1);
String lastName = interned[speakerE.startPosition+speakerE.words.size()-1];
for (int i = 0; i < interned.length; i++) {
String w = interned[i];
if (w == lastName) {
if (sequence[i] != speakerIndex) {
p -= penalty;
}
}
}
}
return p;
}
private static Map<Pair<String, String>, Boolean> aliasLocCache = Generics.newHashMap();
private static AcronymModel acronymModel;
static {
try {
acronymModel = new AcronymModel();
} catch (Exception e) {
throw new RuntimeException(e.getMessage());
}
}
}