package edu.stanford.nlp.ie; import edu.stanford.nlp.util.logging.Redwood; import edu.stanford.nlp.util.CoreMap; import edu.stanford.nlp.util.Index; import edu.stanford.nlp.util.Pair; import edu.stanford.nlp.sequences.SeqClassifierFlags; import java.util.List; /** * @author Mengqiu Wang */ public class EmpiricalNERPriorBIO<IN extends CoreMap> extends EntityCachingAbstractSequencePriorBIO<IN> { /** A logger for this class */ private static Redwood.RedwoodChannels log = Redwood.channels(EmpiricalNERPriorBIO.class); private double[][] entityMatrix, subEntityMatrix; private SeqClassifierFlags flags; protected double p1 = Math.log(0.01); protected double p2 = Math.log(2.0); protected int ORGIndex, LOCIndex; public static boolean DEBUG = false; public EmpiricalNERPriorBIO(String backgroundSymbol, Index<String> classIndex, Index<String> tagIndex, List<IN> doc, Pair<double[][], double[][]> matrices, SeqClassifierFlags flags) { super(backgroundSymbol, classIndex, tagIndex, doc); entityMatrix = matrices.first(); subEntityMatrix = matrices.second(); this.flags = flags; ORGIndex = tagIndex.indexOf("ORG"); LOCIndex = tagIndex.indexOf("LOC"); } @Override public double scoreOf(int[] sequence) { double p = 0.0; for (int i = 0; i < entities.length; i++) { EntityBIO entity = entities[i]; if ((i == 0 || entities[i-1] != entity) && entity != null) { int length = entity.words.size(); int tag1 = entity.type; // String tag1 = classIndex.get(entity.type); int[] other = entities[i].otherOccurrences; for (int otherOccurrence : other) { EntityBIO otherEntity = null; for (int k = otherOccurrence; k < otherOccurrence + length && k < entities.length; k++) { otherEntity = entities[k]; if (otherEntity != null) { break; } } // singleton + other instance null? if (otherEntity == null) { continue; } int oLength = otherEntity.words.size(); // String tag2 = classIndex.get(otherEntity.type); int tag2 = otherEntity.type; // exact match?? boolean exact = false; int[] oOther = otherEntity.otherOccurrences; for (int index : oOther) { if (index >= i && index <= i + length - 1) { exact = true; break; } } double factor; // initialized in 2 cases below if (exact) { if (DEBUG) { log.info("Exact match of tag1=" + tagIndex.get(tag1) + ", tag2=" + tagIndex.get(tag2)); } // entity not complete if (length != oLength) { // if (DEBUG) // log.info("Entity Not Complete"); if (tag1 == tag2) { p += Math.abs(oLength - length) * p1; } else if (!(tag1 == ORGIndex && tag2 == LOCIndex) && !(tag1 == LOCIndex && tag2 == ORGIndex)) { // shorter p += (oLength + length) * p1; } } factor = entityMatrix[tag1][tag2]; } else { if (DEBUG) log.info("Sub match of tag1=" + tagIndex.get(tag1) + ", tag2=" + tagIndex.get(tag2)); factor = subEntityMatrix[tag1][tag2]; } if (tag1 == tag2) { if (flags.matchNERIncentive) { factor = p2; // factor *= -1; } else factor = 0; } if (DEBUG) log.info(" of factor=" + factor + ", p += " + (length * factor)); p += length * factor; } } } return p; } }