/* * Copyright (c) 2015 University of Illinois Board of Trustees, All rights reserved. * Developed at GSLIS/ the iSchool, by Dr. Jana Diesner, Amirhossein Aleyasen, * Chieh-Li Chin, Shubhanshu Mishra, Kiumars Soltani, and Liang Tao. * * This program is free software; you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation; either version 2 of the License, or any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License along with * this program; if not, see <http://www.gnu.org/licenses>. * */ package context.core.tokenizer; // //import edu.stanford.nlp.trees.semgraph.SemanticGraph; //import edu.stanford.nlp.trees.semgraph.SemanticGraphCoreAnnotations; //import edu.stanford.nlp.trees.semgraph.SemanticGraphEdge; import edu.stanford.nlp.semgraph.SemanticGraph; import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations; import edu.stanford.nlp.semgraph.SemanticGraphEdge; import edu.stanford.nlp.util.CoreMap; import java.util.ArrayList; import java.util.Collection; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; /** * * @author Aale */ public class SPOExtractor { static List<SPOStructure> extractSPOs(CoreMap sentence, String docId, int sentIndex) { // traversing the words in the current sentence // a CoreLabel is a CoreMap with additional token-specific methods int index = 0; Map<String, CustomEdge> customEdges = new LinkedHashMap<>(); SemanticGraph dependencies = sentence.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class); for (SemanticGraphEdge edge : dependencies.edgeListSorted()) { CustomEdge cedge = new CustomEdge(); cedge.setDocId(docId); cedge.setSentenceIndex(sentIndex); cedge.setIndex(index); cedge.setWord1(edge.getSource().originalText()); cedge.setWord2(edge.getTarget().originalText()); cedge.setType(edge.getRelation() + ""); customEdges.put(cedge.getWord1() + "/" + cedge.getWord2() + "/" + cedge.getDocId() + "/" + cedge.getSentenceIndex(), cedge); } Collection<String> verbs = extractVerbs(customEdges.values()); List<SPOStructure> spos_list = new ArrayList<>(); for (String v : verbs) { SPOStructure spo = new SPOStructure(); for (CustomEdge cedge : customEdges.values()) { if (cedge.getType().equals("nsubj") && cedge.getWord1().equals(v)) { CustomToken subject = new CustomToken(); String expandedSubject = expandNoun(cedge.getWord2(), customEdges.values()); subject.setWord(expandedSubject); spo.addSubject(subject); } else if (cedge.getType().equals("dobj") && cedge.getWord1().equals(v)) { CustomToken object = new CustomToken(); String expandedObject = expandNoun(cedge.getWord2(), customEdges.values()); object.setWord(expandedObject); spo.addObject(object); } } if (spo.getObjects().isEmpty()) { for (CustomEdge cedge : customEdges.values()) { if (cedge.getType().contains("prep") && cedge.getWord1().equals(v)) { CustomToken object = new CustomToken(); String expandedObject = expandNoun(cedge.getWord2(), customEdges.values()); object.setWord(expandedObject); spo.addObject(object); break; } } } if (spo.getObjects().size() > 0 && spo.getSubjects().size() > 0) { CustomToken predicate = new CustomToken(); predicate.setWord(v); spo.setPredicate(predicate); spos_list.add(spo); } } return spos_list; } private static Collection<String> extractVerbs(Collection<CustomEdge> relations) { Set<String> verbs = new LinkedHashSet<>(); for (CustomEdge rel : relations) { if (rel.getType().equals("nsubj")) { verbs.add(rel.getWord1()); } } return verbs; } private static String expandNoun(String word, Collection<CustomEdge> relations) { String expanded = word; for (CustomEdge rel : relations) { if (rel.getType().equals("amod") && rel.getWord1().equals(word)) { expanded = rel.getWord2() + " " + expanded; } if (rel.getType().equals("nn") && rel.getWord1().equals(word)) { expanded = rel.getWord2() + " " + expanded; } } return expanded; } }