/******************************************************************************/ /* Copyright (C) 2010-2011, Sebastian Hellmann */ /* */ /* Licensed under the Apache License, Version 2.0 (the "License"); */ /* you may not use this file except in compliance with the License. */ /* You may obtain a copy of the License at */ /* */ /* http://www.apache.org/licenses/LICENSE-2.0 */ /* */ /* Unless required by applicable law or agreed to in writing, software */ /* distributed under the License is distributed on an "AS IS" BASIS, */ /* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ /* See the License for the specific language governing permissions and */ /* limitations under the License. */ /******************************************************************************/ package org.nlp2rdf.implementation.lexo; import com.hp.hpl.jena.datatypes.xsd.XSDDatatype; import com.hp.hpl.jena.datatypes.xsd.XSDDateTime; import com.hp.hpl.jena.ontology.*; import com.hp.hpl.jena.query.*; import com.hp.hpl.jena.rdf.model.*; import com.hp.hpl.jena.util.iterator.ExtendedIterator; import com.hp.hpl.jena.vocabulary.DC; import com.hp.hpl.jena.vocabulary.OWL; import com.hp.hpl.jena.vocabulary.RDF; import com.hp.hpl.jena.vocabulary.RDFS; import com.jamonapi.Monitor; import com.jamonapi.MonitorFactory; import org.nlp2rdf.core.NIFParameters; import org.nlp2rdf.core.vocab.*; import org.nlp2rdf.implementation.stanfordcorenlp.StanfordWrapper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.InputStream; import java.text.NumberFormat; import java.util.*; /** * Debug with echo -n "This is a sentence." | mvn compile exec:java -e -Dexec.mainClass="org.nlp2rdf.implementation.stanfordcore.StanfordCoreCLI" -Dexec.args="-f text -i -" | less * * @author Sebastian Hellmann - http://bis.informatik.uni-leipzig.de/SebastianHellmann */ public class LExO { private static Logger log = LoggerFactory.getLogger(LExO.class); private static StanfordWrapper stanfordWrapper = new StanfordWrapper(); private static OntModel nifModel = null; private static OntModel lexoModel = null; private static OntModel rlogModel = null; private static Map<String, String> queries = null; private static final String sparqlPrefix = "PREFIX lexo: <http://persistence.uni-leipzig.org/nlp2rdf/ontologies/vm/lexo#> \n" + "PREFIX stanford: <http://persistence.uni-leipzig.org/nlp2rdf/ontologies/vm/dep/stanford#> \n" + "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> \n" + "PREFIX owl: <http://www.w3.org/2002/07/owl#> \n" + "PREFIX olia: <http://purl.org/olia/olia.owl#> \n" + "PREFIX nif: <http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#> \n"; private static String lexotypens = "http://example.org/type#"; { init(); } private int axiomCreationCount = 0; private void init() { String nif_core_owl = "org/uni-leipzig/persistence/nlp2rdf/ontologies/nif-core/nif-core.owl"; String nif_core_inf_owl = "org/uni-leipzig/persistence/nlp2rdf/ontologies/nif-core/nif-core-inf.owl"; InputStream is1 = LExO.class.getClassLoader().getResourceAsStream(nif_core_owl); InputStream is2 = LExO.class.getClassLoader().getResourceAsStream(nif_core_inf_owl); nifModel = ModelFactory.createOntologyModel(OntModelSpec.OWL_DL_MEM, ModelFactory.createDefaultModel()); nifModel.createAnnotationProperty(DC.description.getURI()); nifModel.read(is1, "", "RDF/XML"); //nifModel.read(is2, "", "RDF/XML"); lexoModel = ModelFactory.createOntologyModel(OntModelSpec.OWL_DL_MEM, ModelFactory.createDefaultModel()); //InputStream aa = LExO.class.getClassLoader().getResourceAsStream("org/uni-leipzig/persistence/nlp2rdf/ontologies/vm/lexo/lexo.ttl"); InputStream aa = LExO.class.getClassLoader().getResourceAsStream("lexo.ttl"); lexoModel.read(aa, "", "N3"); rlogModel = ModelFactory.createOntologyModel(OntModelSpec.OWL_DL_MEM, ModelFactory.createDefaultModel()); aa = LExO.class.getClassLoader().getResourceAsStream("org/uni-leipzig/persistence/nlp2rdf/ontologies/rlog/rlog.ttl"); rlogModel.read(aa, "", "N3"); queries = new HashMap<>(); ExtendedIterator<Individual> eit = lexoModel.listIndividuals(LExOOntClasses.GenRule.getOntClass(lexoModel)); Individual current = null; while (eit.hasNext()) { current = eit.next(); String query = sparqlPrefix + current.getPropertyValue(LExODatatypeProperties.construct.getDatatypeProperty(lexoModel)).toString(); try { QueryFactory.create(query); } catch (Exception qe) { System.out.println(current); System.out.println(query); qe.printStackTrace(); System.exit(0); } queries.put(current.getURI(), query); } System.err.println("Parsing of " + queries.size() + " queries was successful"); } public void processText(Individual context, OntModel inputModel, OntModel outputModel, NIFParameters nifParameters) { NumberFormat nf = NumberFormat.getNumberInstance(Locale.ENGLISH); nf.setMinimumFractionDigits(2); //prepare the outputModel outputModel.addSubModel(rlogModel); outputModel.addSubModel(lexoModel); //the logging event Resource logRes = outputModel.createResource(nifParameters.getLogPrefix() + UUID.randomUUID()); logRes.addProperty(RDF.type, outputModel.createResource(RLOGOntClasses.Entry.getUri())); logRes.addProperty(RLOGObjectProperties.level.getObjectProperty(outputModel), outputModel.createResource(RLOGIndividuals.INFO.getUri())); XSDDateTime date = new XSDDateTime(Calendar.getInstance()); logRes.addProperty(RLOGDatatypeProperties.date.getDatatypeProperty(outputModel), date.toString(), date.getNarrowedDatatype()); StringBuilder logmessage = new StringBuilder(); /* * Stanford * */ Monitor stanford = MonitorFactory.getTimeMonitor("stanford").start(); inputModel.addSubModel(nifModel); stanfordWrapper.process(context, inputModel, inputModel, nifParameters); logmessage.append("Total stanford time: ").append(nf.format(stanford.stop().getLastValue())).append("\n"); /* * Rule processing * stored in model * */ System.err.println("Starting execution of rules"); Monitor querytimetotal = MonitorFactory.getTimeMonitor("querytimetotal").start(); for (Object key : queries.keySet()) { Monitor mon = MonitorFactory.getTimeMonitor((String) key).start(); String query = queries.get(key); QueryExecution qe = QueryExecutionFactory.create(query, inputModel); qe.execConstruct(outputModel); logRes.addProperty(outputModel.createProperty((String) key + "_time"), outputModel.createTypedLiteral(mon.stop().getLastValue(), XSDDatatype.XSDdouble)); System.err.println(key + " needed: " + nf.format(mon.getLastValue())); } logmessage.append("Total rule time: ").append(nf.format(querytimetotal.stop().getLastValue())).append("\n"); /* * Calculate coverage among nodes * */ /* * get all available nodes * */ Set<Resource> uncoveredNodes = new HashSet<>(); String avnq = sparqlPrefix + "SELECT ?s ?anchorOf {" + "{ ?s nif:dependencyTrans [] ; nif:anchorOf ?anchorOf . }" + "UNION " + "{ [] nif:dependencyTrans ?s . ?s nif:anchorOf ?anchorOf . }}"; QueryExecution getUncoveredNodes = QueryExecutionFactory.create(avnq, inputModel); ResultSet rsavng = getUncoveredNodes.execSelect(); Map<Resource, String> anchorOf = new HashMap<>(); while (rsavng.hasNext()) { QuerySolution qs = rsavng.next(); Resource s = qs.getResource("s"); String uncovanch = qs.getLiteral("anchorOf").toString(); uncoveredNodes.add(s); anchorOf.put(s, uncovanch); } int totalNodes = uncoveredNodes.size(); ResIterator rit = outputModel.listSubjects(); while (rit.hasNext()) { uncoveredNodes.remove(rit.nextResource()); } int covered = totalNodes - uncoveredNodes.size(); //print all uncovered nodes for debugging if (!uncoveredNodes.isEmpty()) { System.err.println("Uncovered nodes found:"); for (Resource r : uncoveredNodes) { System.err.println("- UNCOV: " + anchorOf.get(r) + " [" + r + "]"); Resource un = outputModel.getResource(r.getURI()); un.addProperty(LExODatatypeProperties.uncovered.getDatatypeProperty(outputModel), "uncovered"); } } // print all "skipped" statements once List<Statement> skipped = outputModel.listStatements(null, LExODatatypeProperties.skipped.getDatatypeProperty(outputModel), (String) null).toList(); if (!skipped.isEmpty()) { System.err.println(skipped.size() + " skipped nodes found."); for (Statement s : skipped) { Resource r = s.getSubject(); //System.err.println("- SKIP: " + anchorOf.get(r) + ", Reason: " + s.getObject().asLiteral().toString() + " [" + r + "]"); outputModel.remove(s); } } /******* * merging of axioms ******/ Map<String, Resource> nif2classUri = new HashMap<>(); determine_name(outputModel, nif2classUri); boolean repeat = true; Set<String> finished = new HashSet<>(); List<Resource> nifResources = outputModel.listSubjectsWithProperty(LExOObjectProperties.axDesc.getObjectProperty(outputModel)).toList(); while (repeat) { repeat = false; for (Resource current : nifResources) { if (finished.contains(current.getURI())) { continue; } repeat = repeat || build_axioms(current, outputModel, nif2classUri, finished); } } /********* * quality check: * any bnodes, that are nowhere object ********/ String bnodesQuery = sparqlPrefix + "SELECT ?bn ?p ?o {" + " ?bn ?p ?o " + "FILTER (isBlank(?bn) ) " + "FILTER (NOT EXISTS { [] ?in ?bn } )" + "FILTER (NOT EXISTS { ?bn rdfs:subClassOf [] } )" + " }"; QueryExecution bnodes = QueryExecutionFactory.create(bnodesQuery, outputModel); ResultSet bnodesrs = bnodes.execSelect(); int bnodesrsSize = 0; while (bnodesrs.hasNext()) { QuerySolution qs = bnodesrs.next(); //Resource s = qs.getResource("bn"); System.err.println("unconnected blank nodes found: " + qs); bnodesrsSize++; } System.err.println(axiomCreationCount + " axioms created."); System.err.println(bnodesrsSize + " unconnected blank nodes found."); logRes.addProperty(RLOGDatatypeProperties.message.getDatatypeProperty(outputModel), outputModel.createLiteral(logmessage.toString())); System.err.println("Coverage: " + " " + covered + " of " + totalNodes + " (" + nf.format(100 * (double) covered / totalNodes) + "%)"); } public boolean build_axioms(Resource currentNIFResource, OntModel model, Map<String, Resource> nif2classUri, Set<String> finished) { //split all axiomdescriptions of resource StmtIterator sit = currentNIFResource.listProperties(LExOObjectProperties.axDesc.getObjectProperty(model)); Set<Resource> axioms = new HashSet<>(); Set<Resource> axiomParts = new HashSet<>(); Resource className = nif2classUri.get(currentNIFResource.getURI()); while (sit.hasNext()) { Statement stmt = sit.nextStatement(); Resource currentAxDescriptor = stmt.getObject().asResource(); //this is null, in some cases. Resource axTarget = currentAxDescriptor.getPropertyResourceValue(LExOObjectProperties.axTarget.getObjectProperty(model)); if (axTarget != null) { if (finished.contains(axTarget.getURI())) { if (currentAxDescriptor.hasProperty(RDF.type, LExOOntClasses.Axiom.getOntClass(model))) { axioms.add(currentAxDescriptor); } else if (currentAxDescriptor.hasProperty(RDF.type, LExOOntClasses.AxiomPart.getOntClass(model))) { axiomParts.add(currentAxDescriptor); } } else { return false; } } } Resource result = null; List<Resource> intersections = new ArrayList<Resource>(); if (className != null) { intersections.add(className); } for (Resource part : axiomParts) { Resource realTarget = nif2classUri.get(part.getPropertyResourceValue(LExOObjectProperties.axTarget.getObjectProperty(model)).getURI()); Resource axSemantic = part.getPropertyResourceValue(LExOAnnotationProperties.axSemantic.getAnnotationProperty(model)); if (axSemantic.getURI().equals(OWL.someValuesFrom.getURI())) { ObjectProperty axProperty = model.createObjectProperty(part.getPropertyResourceValue(LExOAnnotationProperties.axProperty.getAnnotationProperty(model)).getURI()); intersections.add(model.createSomeValuesFromRestriction(null, axProperty, realTarget)); } else { intersections.add(realTarget); } } if (intersections.size() == 1) { result = intersections.get(0); } else if (intersections.isEmpty()) { //not sure System.err.println("isempty"); } else { RDFList list = model.createList(); for (Resource r : intersections) { list = list.with(r); } result = model.createIntersectionClass(null, list); } for (Resource axiom : axioms) { Resource realTarget = nif2classUri.get(axiom.getPropertyResourceValue(LExOObjectProperties.axTarget.getObjectProperty(model)).getURI()); Resource axSemantic = axiom.getPropertyResourceValue(LExOAnnotationProperties.axSemantic.getAnnotationProperty(model)); if (axSemantic.getURI().equals(OWL.equivalentClass.getURI())) { ((OntClass) result).addEquivalentClass(realTarget); axiomCreationCount++; } else if (axSemantic.getURI().equals(RDFS.subClassOf.getURI())) { ((OntClass) result).addSuperClass(realTarget); axiomCreationCount++; } else { System.out.println("Please implement: " + axSemantic); System.exit(0); } } nif2classUri.put(currentNIFResource.getURI(), result); finished.add(currentNIFResource.getURI()); return true; } public void determine_name(OntModel model, Map<String, Resource> nif2classUri) { ResIterator rit = model.listSubjectsWithProperty(LExOObjectProperties.axDesc.getObjectProperty(model)); List<Resource> bns = new ArrayList<>(); while (rit.hasNext()) { Resource currentAxDescriptor = null; SortedSet<Sorter> sortedNames = new TreeSet<Sorter>(); Resource currentResource = rit.nextResource(); //get all axiomdescriptions of resource StmtIterator sit = currentResource.listProperties(LExOObjectProperties.axDesc.getObjectProperty(model)); boolean hasOneClassPart = false; while (sit.hasNext()) { Statement stmt = sit.nextStatement(); if (stmt.getObject().asResource().hasProperty(RDF.type, LExOOntClasses.ClassPart.getOntClass(model))) { currentAxDescriptor = stmt.getObject().asResource(); bns.add(currentAxDescriptor); sortedNames.add(new Sorter(currentAxDescriptor.getProperty(LExODatatypeProperties.cnOrder.getDatatypeProperty(model)). getObject().asLiteral().getInt(), currentAxDescriptor.getProperty(LExODatatypeProperties.cnPart.getDatatypeProperty(model)).getObject().asLiteral().toString())); hasOneClassPart = true; } } if (hasOneClassPart) { StringBuilder sb = new StringBuilder(lexotypens); for (Sorter s : sortedNames) { sb.append(s.getName()); sb.append("_"); } String classUri = sb.substring(0, sb.length() - 1); currentResource.addProperty(LExODatatypeProperties.className.getDatatypeProperty(model), model.createResource(classUri)); nif2classUri.put(currentResource.getURI(), model.createClass(classUri)); } } //delete? /*for (Resource r : bns) { r.removeProperties(); } */ } class Sorter implements Comparable<Sorter> { private final int order; private final String name; Sorter(int order, String name) { this.order = order; this.name = name; } public String getName() { return name; } @Override public int compareTo(Sorter s) { return this.order - s.order; } } /** while (it1.hasNext()) { res1.add(it1.nextStatement().getObject().asResource()); } ResIterator it2 = intermediate.listSubjects(); Set<Resource> res2 = new HashSet<>(); while (it2.hasNext()) { Resource r = it2.nextResource(); if (r.isURIResource()) { res2.add(r); } } **/ //Set<Resource> difference = new HashSet<Resource>(res1); //System.out.println(res1); //System.out.println(res2); //System.out.println(difference.toString()); // //model.listResourcesWithProperty(NIFObjectProperties.dependencyTrans.getObjectProperty(model)); /** // // System.out.println().toList()); ObjectProperty deptrans = NIFObjectProperties.dependencyTrans.getObjectProperty(model); DatatypeProperty beginIndex = NIFDatatypeProperties.beginIndex.getDatatypeProperty(model); AnnotationProperty oliaCategory = NIFAnnotationProperties.oliaCategory.getAnnotationProperty(model); DatatypeProperty lexo = model.createDatatypeProperty("http://lexo.le/def"); DatatypeProperty lemma = NIFDatatypeProperties.lemma.getDatatypeProperty(model); WordComparator wc = new WordComparator(beginIndex); ResIterator resIterator = model.listResourcesWithProperty(deptrans); Set<String> forbidden = new HashSet<>(Arrays.asList(new String[]{"http://purl.org/olia/olia.owl#Determiner"})); while (resIterator.hasNext()) { SortedSet<Individual> words = new TreeSet<>(wc); Resource current = resIterator.nextResource(); NodeIterator nt = model.listObjectsOfProperty(current, deptrans); words.add(model.getIndividual(current.getURI())); while (nt.hasNext()) { Individual word = model.getIndividual(nt.nextNode().asResource().getURI()); NodeIterator nt2 = word.listPropertyValues(oliaCategory); boolean allow = true; while (nt2.hasNext()) { String cat = nt2.nextNode().asResource().getURI(); if (forbidden.contains(cat)) { allow = false; } } if (allow) { words.add(word); } } StringBuilder sb = new StringBuilder(); for (Individual word : words) { sb.append(word.getPropertyValue(lemma).asLiteral().getString()); sb.append(" "); } current.addProperty(lexo, sb.toString().trim()); System.out.println(current+" - > "+sb.toString().trim()); } // add properties for each word ExtendedIterator<Individual> itw = model.listIndividuals(NIFOntClasses.Word.getOntClass(model)); for (; itw.hasNext(); ) { Individual current = itw.next(); if (!current.hasProperty(lexo)) { current.addProperty(lexo, current.getPropertyValue(lemma).asLiteral().getString()); } } System.exit(0); QueryExecution qe = QueryExecutionFactory.create("SELECT ?a ?b ?c {?s <" + oliaCategory.getURI() + "> ?o }", model); ResultSet rs = qe.execSelect(); for (; rs.hasNext(); ) { QuerySolution qs = rs.next(); Resource s = qs.getResource("s"); Resource o = qs.getResource("o"); System.out.println(s + " " + o); } System.exit(0); QueryExecution qe2 = QueryExecutionFactory.create("SELECT ?s ?o {?s <" + oliaCategory.getURI() + "> ?o }", model); ResultSet rs2 = qe.execSelect(); for (; rs2.hasNext(); ) { QuerySolution qs = rs2.next(); Resource s = qs.getResource("s"); Resource o = qs.getResource("o"); System.out.println(s + " " + o); } System.exit(0); **/ }