package edu.stanford.nlp.ie; import edu.stanford.nlp.util.CoreMap; import edu.stanford.nlp.util.Generics; import edu.stanford.nlp.util.Index; import edu.stanford.nlp.util.StringUtils; import java.util.Set; import java.util.List; import java.util.ArrayList; /** * @author Jenny Finkel */ public class AcquisitionsPrior<IN extends CoreMap> extends EntityCachingAbstractSequencePrior<IN> { double penalty = 4.0; double penalty1 = 3.0; double penalty2 = 4.0; public AcquisitionsPrior(String backgroundSymbol, Index<String> classIndex, List<IN> doc) { super(backgroundSymbol, classIndex, doc); } public double scoreOf(int[] sequence) { Set<String> purchasers = Generics.newHashSet(); Set<String> purchabrs = Generics.newHashSet(); Set<String> sellers = Generics.newHashSet(); Set<String> sellerabrs = Generics.newHashSet(); Set<String> acquireds = Generics.newHashSet(); Set<String> acqabrs = Generics.newHashSet(); List<Entity> purchasersL = new ArrayList<Entity>(); List<Entity> purchabrsL = new ArrayList<Entity>(); List<Entity> sellersL = new ArrayList<Entity>(); List<Entity> sellerabrsL = new ArrayList<Entity>(); List<Entity> acquiredsL = new ArrayList<Entity>(); List<Entity> acqabrsL = new ArrayList<Entity>(); double p = 0.0; for (int i = 0; i < entities.length; i++) { Entity entity = entities[i]; if ((i == 0 || entities[i-1] != entity) && entity != null) { String type = classIndex.get(entity.type); String phrase = StringUtils.join(entity.words, " ").toLowerCase(); if (type.equals("purchaser")) { purchasers.add(phrase); purchasersL.add(entity); } else if (type.equals("purchabr")) { purchabrs.add(phrase); purchabrsL.add(entity); } else if (type.equals("seller")) { sellers.add(phrase); sellersL.add(entity); } else if (type.equals("sellerabr")) { sellerabrs.add(phrase); sellerabrsL.add(entity); } else if (type.equals("acquired")) { acquireds.add(phrase); acquiredsL.add(entity); } else if (type.equals("acqabr")) { acqabrs.add(phrase); acqabrsL.add(entity); } else { System.err.println("unknown entity type: "+type); System.exit(0); } } } for (Entity purchaser : purchasersL) { if (purchasers.size() > 1) { p -= purchaser.words.size() * penalty; } String s = StringUtils.join(purchaser.words, "").toLowerCase(); boolean match = false; for (Entity purchabr : purchabrsL) { String s1 = StringUtils.join(purchabr.words, "").toLowerCase(); //int dist = StringUtils.longestCommonSubstring(s, s1); //if (dist > s1.length() - 2) { if (s.indexOf(s1) >= 0) { match = true; break; } } if (!match && purchabrs.size() > 0) { p -= purchaser.words.size() * penalty; } } for (Entity seller : sellersL) { if (sellers.size() > 1) { p -= seller.words.size() * penalty; } String s = StringUtils.join(seller.words, "").toLowerCase(); boolean match = false; for (Entity sellerabr : sellerabrsL) { String s1 = StringUtils.join(sellerabr.words, "").toLowerCase(); //int dist = StringUtils.longestCommonSubstring(s, s1); //if (dist > s1.length() - 2) { if (s.indexOf(s1) >= 0) { match = true; break; } } if (!match && sellerabrs.size() > 0) { p -= seller.words.size() * penalty; } } for (Entity acquired : acquiredsL) { if (acquireds.size() > 1) { p -= acquired.words.size() * penalty; } String s = StringUtils.join(acquired.words, "").toLowerCase(); boolean match = false; for (Entity acqabr : acqabrsL) { String s1 = StringUtils.join(acqabr.words, "").toLowerCase(); //int dist = StringUtils.longestCommonSubstring(s, s1); //if (dist > s1.length() - 2) { if (s.indexOf(s1) >= 0) { match = true; break; } } if (!match && acqabrs.size() > 0) { p -= acquired.words.size() * penalty; } } for (Entity purchabr : purchabrsL) { //p -= purchabr.words.size() * penalty; String s = StringUtils.join(purchabr.words, "").toLowerCase(); boolean match = false; for (Entity purchaser : purchasersL) { String s1 = StringUtils.join(purchaser.words, "").toLowerCase(); //int dist = StringUtils.longestCommonSubstring(s, s1); //if (dist > s1.length() - 2) { if (s1.indexOf(s) >= 0) { match = true; break; } } if (!match) { p -= purchabr.words.size() * penalty2; } match = false; for (Entity acquired : acquiredsL) { String s1 = StringUtils.join(acquired.words, "").toLowerCase(); //int dist = StringUtils.longestCommonSubstring(s, s1); //if (dist > s.length() - 2) { if (s1.indexOf(s) >= 0) { match = true; break; } } for (Entity seller : sellersL) { String s1 = StringUtils.join(seller.words, "").toLowerCase(); //int dist = StringUtils.longestCommonSubstring(s, s1); //if (dist > s.length() - 2) { if (s1.indexOf(s) >= 0) { match = true; break; } } if (match) { p -= purchabr.words.size() * penalty1; } } for (Entity sellerabr : sellerabrsL) { //p -= sellerabr.words.size() * penalty; String s = StringUtils.join(sellerabr.words, "").toLowerCase(); boolean match = false; for (Entity seller : sellersL) { String s1 = StringUtils.join(seller.words, "").toLowerCase(); //int dist = StringUtils.longestCommonSubstring(s, s1); //if (dist > s1.length() - 2) { if (s1.indexOf(s) >= 0) { match = true; break; } } if (!match) { p -= sellerabr.words.size() * penalty2; } match = false; for (Entity acquired : acquiredsL) { String s1 = StringUtils.join(acquired.words, "").toLowerCase(); //int dist = StringUtils.longestCommonSubstring(s, s1); //if (dist > s.length() - 2) { if (s1.indexOf(s) >= 0) { match = true; break; } } for (Entity purchaser : purchasersL) { String s1 = StringUtils.join(purchaser.words, "").toLowerCase(); //int dist = StringUtils.longestCommonSubstring(s, s1); //if (dist > s.length() - 2) { if (s1.indexOf(s) >= 0) { match = true; break; } } if (match) { p -= sellerabr.words.size() * penalty1; } } for (Entity acqabr : acqabrsL) { //p -= acqabr.words.size() * penalty; String s = StringUtils.join(acqabr.words, "").toLowerCase(); boolean match = false; for (Entity acquired : acquiredsL) { String s1 = StringUtils.join(acquired.words, "").toLowerCase(); //int dist = StringUtils.longestCommonSubstring(s, s1); //if (dist > s1.length() - 2) { if (s1.indexOf(s) >= 0) { match = true; break; } } if (!match) { p -= acqabr.words.size() * penalty2; } match = false; for (Entity seller : sellersL) { String s1 = StringUtils.join(seller.words, "").toLowerCase(); //int dist = StringUtils.longestCommonSubstring(s, s1); //if (dist > s.length() - 2) { if (s1.indexOf(s) >= 0) { //System.err.println(acqabr.toString(classIndex)+"\n"+seller.toString(classIndex)+"\n"); match = true; break; } } for (Entity purchaser : purchasersL) { String s1 = StringUtils.join(purchaser.words, "").toLowerCase(); //int dist = StringUtils.longestCommonSubstring(s, s1); //if (dist > s.length() - 2) { if (s1.indexOf(s) >= 0) { match = true; break; } } if (match) { p -= acqabr.words.size() * penalty1; } } return p; } }