package edu.stanford.nlp.ie.machinereading.domains.ace.reader; import edu.stanford.nlp.util.logging.Redwood; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; import javax.xml.parsers.ParserConfigurationException; import org.w3c.dom.Document; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; import edu.stanford.nlp.ie.machinereading.common.DomReader; /** * DOM reader for an ACE specification. * * @author David McClosky */ public class AceDomReader extends DomReader { /** A logger for this class */ private static Redwood.RedwoodChannels log = Redwood.channels(AceDomReader.class); private static AceCharSeq parseCharSeq(Node node) { Node child = getChildByName(node, "charseq"); String start = getAttributeValue(child, "START"); String end = getAttributeValue(child, "END"); String text = child.getFirstChild().getNodeValue(); return new AceCharSeq(text, Integer.parseInt(start), Integer.parseInt(end)); } /** * Extracts one entity mention */ private static AceEntityMention parseEntityMention(Node node) { String id = getAttributeValue(node, "ID"); String type = getAttributeValue(node, "TYPE"); String ldctype = getAttributeValue(node, "LDCTYPE"); AceCharSeq extent = parseCharSeq(getChildByName(node, "extent")); AceCharSeq head = parseCharSeq(getChildByName(node, "head")); return (new AceEntityMention(id, type, ldctype, extent, head)); } /** * Extracts info about one relation mention */ private static AceRelationMention parseRelationMention(Node node, AceDocument doc) { String id = getAttributeValue(node, "ID"); AceCharSeq extent = parseCharSeq(getChildByName(node, "extent")); String lc = getAttributeValue(node, "LEXICALCONDITION"); // create the mention AceRelationMention mention = new AceRelationMention(id, extent, lc); // find the mention args List<Node> args = getChildrenByName(node, "relation_mention_argument"); for(Node arg: args){ String role = getAttributeValue(arg, "ROLE"); String refid = getAttributeValue(arg, "REFID"); AceEntityMention am = doc.getEntityMention(refid); if(am != null){ am.addRelationMention(mention); if(role.equalsIgnoreCase("arg-1")){ mention.getArgs()[0] = new AceRelationMentionArgument(role, am); } else if(role.equalsIgnoreCase("arg-2")){ mention.getArgs()[1] = new AceRelationMentionArgument(role, am); } else { throw new RuntimeException("Invalid relation mention argument role: " + role); } } } return mention; } /** * Extracts info about one relation mention */ private static AceEventMention parseEventMention(Node node, AceDocument doc) { String id = getAttributeValue(node, "ID"); AceCharSeq extent = parseCharSeq(getChildByName(node, "extent")); AceCharSeq anchor = parseCharSeq(getChildByName(node, "anchor")); // create the mention AceEventMention mention = new AceEventMention(id, extent, anchor); // find the mention args List<Node> args = getChildrenByName(node, "event_mention_argument"); for (Node arg : args) { String role = getAttributeValue(arg, "ROLE"); String refid = getAttributeValue(arg, "REFID"); AceEntityMention am = doc.getEntityMention(refid); if(am != null){ am.addEventMention(mention); mention.addArg(am, role); } } return mention; } /** * Parses one ACE specification * @return Simply displays the events to stdout */ public static AceDocument parseDocument(File f) throws IOException, SAXException, ParserConfigurationException { // parse the Dom document Document document = readDocument(f); // // create the ACE document object // Node docElement = document.getElementsByTagName("document").item(0); AceDocument aceDoc = new AceDocument(getAttributeValue(docElement, "DOCID")); // // read all entities // NodeList entities = document.getElementsByTagName("entity"); int entityCount = 0; for(int i = 0; i < entities.getLength(); i ++){ Node node = entities.item(i); // // the entity type and subtype // String id = getAttributeValue(node, "ID"); String type = getAttributeValue(node, "TYPE"); String subtype = getAttributeValue(node, "SUBTYPE"); String cls = getAttributeValue(node, "CLASS"); // create the entity AceEntity entity = new AceEntity(id, type, subtype, cls); aceDoc.addEntity(entity); // fetch all mentions of this event List<Node> mentions = getChildrenByName(node, "entity_mention"); // parse all its mentions for (Node mention1 : mentions) { AceEntityMention mention = parseEntityMention(mention1); entity.addMention(mention); aceDoc.addEntityMention(mention); } entityCount++; } //log.info("Parsed " + entityCount + " XML entities."); // // read all relations // NodeList relations = document.getElementsByTagName("relation"); for(int i = 0; i < relations.getLength(); i ++){ Node node = relations.item(i); // // the relation type, subtype, tense, and modality // String id = getAttributeValue(node, "ID"); String type = getAttributeValue(node, "TYPE"); String subtype = getAttributeValue(node, "SUBTYPE"); String modality = getAttributeValue(node, "MODALITY"); String tense = getAttributeValue(node, "TENSE"); // create the relation AceRelation relation = new AceRelation(id, type, subtype, modality, tense); aceDoc.addRelation(relation); // XXX: fetch relation_arguments here! // fetch all mentions of this relation List<Node> mentions = getChildrenByName(node, "relation_mention"); // traverse all mentions for (Node mention1 : mentions) { AceRelationMention mention = parseRelationMention(mention1, aceDoc); relation.addMention(mention); aceDoc.addRelationMention(mention); } } // // read all events // NodeList events = document.getElementsByTagName("event"); for(int i = 0; i < events.getLength(); i ++){ Node node = events.item(i); // // the event type, subtype, tense, and modality // String id = getAttributeValue(node, "ID"); String type = getAttributeValue(node, "TYPE"); String subtype = getAttributeValue(node, "SUBTYPE"); String modality = getAttributeValue(node, "MODALITY"); String polarity = getAttributeValue(node, "POLARITY"); String genericity = getAttributeValue(node, "GENERICITY"); String tense = getAttributeValue(node, "TENSE"); // create the event AceEvent event = new AceEvent(id, type, subtype, modality, polarity, genericity, tense); aceDoc.addEvent(event); // fetch all mentions of this relation List<Node> mentions = getChildrenByName(node, "event_mention"); // traverse all mentions for (Node mention1 : mentions) { AceEventMention mention = parseEventMention(mention1, aceDoc); event.addMention(mention); aceDoc.addEventMention(mention); } } return aceDoc; } public static void main(String [] argv) throws Exception { if (argv.length != 1) { log.info("Usage: java AceDomReader <APF file>"); System.exit(1); } File f = new File(argv[0]); AceDocument doc = parseDocument(f); System.out.println("Processed ACE document:\n" + doc); ArrayList<ArrayList<AceRelationMention>> r = doc.getAllRelationMentions(); System.out.println("size: " + r.size()); } }