package edu.stanford.nlp.ie.pascal; import edu.stanford.nlp.util.logging.Redwood; import edu.stanford.nlp.stats.Counter; import edu.stanford.nlp.util.Index; import edu.stanford.nlp.util.HashIndex; import java.util.HashMap; import java.util.HashSet; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Maps non-background Pascal fields to strings. * * @author Chris Cox */ public class PascalTemplate { /** A logger for this class */ private static Redwood.RedwoodChannels log = Redwood.channels(PascalTemplate.class); public static final String[] fields = { //dates "workshoppapersubmissiondate", "workshopnotificationofacceptancedate", "workshopcamerareadycopydate", "workshopdate", //location "workshoplocation", //workshop info "workshopacronym", "workshophomepage", "workshopname", //conference info "conferenceacronym", "conferencehomepage", "conferencename", //background symbol "0" }; public static final String BACKGROUND_SYMBOL = "0"; private static final Index<String> fieldIndices; static { fieldIndices = new HashIndex<>(); for (String field : fields) { fieldIndices.add(field); } } private final String[] values; public PascalTemplate() { values = new String[fields.length]; for (int i = 0; i < values.length; i++) { values[i] = null; } } //copy constructor public PascalTemplate(PascalTemplate pt) { this.values = new String[fields.length]; for (int i = 0; i < values.length; i++) { if (pt.values[i] == null) { this.values[i] = null; } else { this.values[i] = pt.values[i]; } } } /* * Acronym stemming and matching fields */ private static Pattern acronymPattern = Pattern.compile("([ \r-/a-zA-Z]+?)(?:[ -'*\t\r\n\f0-9]*)", Pattern.DOTALL); /** * */ public static boolean acronymMatch(String s1, String s2, HashMap stemmedAcronymIndex) { log.info("Testing match:" + s1 + " : " + s2); String stem1 = (String) stemmedAcronymIndex.get(s1); String stem2 = (String) stemmedAcronymIndex.get(s2); log.info("Got stems:" + s1 + " : " + s2); return stem1.equals(stem2); } /** * */ public static String stemAcronym(String s, CliqueTemplates ct) { if (ct.stemmedAcronymIndex.containsKey(s)) { return (String) ct.stemmedAcronymIndex.get(s); } Matcher matcher = acronymPattern.matcher(s); if (!matcher.matches() || s.equalsIgnoreCase("www")) { log.info("Not a valid acronym: " + s); return "null"; } String stemmed = matcher.group(1).toLowerCase(); if (stemmed.endsWith("-")) { stemmed = stemmed.substring(0, stemmed.length() - 1); } ct.stemmedAcronymIndex.put(s, stemmed); log.info("Stemmed: " + s + " to: " + stemmed); if (ct.inverseAcronymMap.containsKey(stemmed)) { HashSet set = (HashSet) ct.inverseAcronymMap.get(stemmed); set.add(s); } else { HashSet set = new HashSet(); set.add(s); ct.inverseAcronymMap.put(stemmed, set); } return stemmed; } /** * Merges partial (clique) templates into a full one. * * @param dt date template * @param location location * @param wi workshop/conference info template * @return the {@link PascalTemplate} resulting from this merge. */ public static PascalTemplate mergeCliqueTemplates(DateTemplate dt, String location, InfoTemplate wi) { PascalTemplate pt = new PascalTemplate(); pt.setValue("workshopnotificationofacceptancedate", dt.noadate); pt.setValue("workshopcamerareadycopydate", dt.crcdate); pt.setValue("workshopdate", dt.workdate); pt.setValue("workshoppapersubmissiondate", dt.subdate); pt.setValue("workshoplocation", location); pt.setValue("workshopacronym", wi.wacronym); pt.setValue("workshophomepage", wi.whomepage); pt.setValue("workshopname", wi.wname); pt.setValue("conferenceacronym", wi.cacronym); pt.setValue("conferencehomepage", wi.chomepage); pt.setValue("conferencename", wi.cname); return pt; } /** * Sets template values. * @param fieldName (i.e. workshopname, workshopdate) */ public void setValue(String fieldName, String value) { int index = getFieldIndex(fieldName); assert(index != -1); values[index] = value; } public void setValue(int index, String value) { if (index != values.length - 1) { values[index] = value; } } public String getValue(String fieldName) { int i = getFieldIndex(fieldName); if (i == -1 || i == values.length - 1) { return null; } else { return values[i]; } } @Override public boolean equals(Object obj) { if (obj == null) { return false; } if (!(obj instanceof PascalTemplate)) { return false; } PascalTemplate pt = (PascalTemplate) obj; String[] values2 = pt.values; if (values.length != values2.length) { return false; } for (int i = 0; i < values.length - 1; i++) { if (values[i] == null) { if (values2[i] != null) { return false; } } else { if (values2[i] == null) { return false; } if (!values2[i].equals(values[i])) { return false; } } } return true; } @Override public int hashCode() { int tally = 37; for (int i = 0; i < values.length - 1; i++) { int n; if (values[i] == null) { n = 11; } else { n = values[i].hashCode(); } tally = 17 * tally + n; } return tally; } /** * * @param tag field name (i.e. workshopdate, workshoplocation) * @return the reference of that field in the underlying {@link edu.stanford.nlp.util.Index} */ public static int getFieldIndex(String tag) { return (fieldIndices.indexOf(tag)); } /** * Should be passed a <code>Counter[]</code>, each entry of which * keeps scores for possibilities in that template slot. The counter * for each template value is incremented by the corresponding score of * this PascalTemplate. * * @param fieldValueCounter an array of counters, each of which holds label possibilities for one field * @param score increment counts by this much. */ public void writeToFieldValueCounter(Counter<String>[] fieldValueCounter, double score) { for (int i = 0; i < fields.length; i++) { if ((values[i] != null) && !values[i].equals("NULL")) { fieldValueCounter[i].incrementCount(values[i], score); } } } /** * Divides this template into partial templates, and updates the counts of these * partial templates in the {@link CliqueTemplates} object. * * @param ct the partial templates counter object * @param score increment counts by this much */ public void unpackToCliqueTemplates(CliqueTemplates ct, double score) { ct.dateCliqueCounter.incrementCount(new DateTemplate(values[0], values[1], values[2], values[3]), score); if (values[4] != null) { ct.locationCliqueCounter.incrementCount(values[4], score); } ct.workshopInfoCliqueCounter.incrementCount(new InfoTemplate(values[6], values[5], values[7], values[9], values[8], values[10], ct), score); } public void print() { log.info("PascalTemplate: "); log.info(this.toString()); } @Override public String toString() { String str = "\n====================\n"; for (int i = 0; i < values.length; i++) { if (values[i] != null) { if (!(values[i].equalsIgnoreCase("NULL"))) { str = str.concat(fields[i] + " : " + values[i] + "\n"); } } } return str; } }