/* * Licensed to the Technische Universität Darmstadt under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The Technische Universität Darmstadt * licenses this file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.dkpro.core.io.rdf.internal; import java.util.Iterator; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.jena.datatypes.xsd.XSDDatatype; import org.apache.jena.ontology.Individual; import org.apache.jena.ontology.OntModel; import org.apache.jena.rdf.model.Property; import org.apache.jena.rdf.model.Resource; import org.apache.uima.cas.CAS; import org.apache.uima.cas.CASException; import org.apache.uima.cas.Feature; import org.apache.uima.cas.FeatureStructure; import org.apache.uima.cas.Type; import org.apache.uima.cas.TypeSystem; import org.apache.uima.jcas.JCas; import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData; import de.tudarmstadt.ukp.dkpro.core.testing.validation.CasAnalysisUtils; public class Uima2Rdf { private static Pattern DKPRO_CORE_SCHEME = Pattern .compile("(?<LONG>de\\.tudarmstadt\\.ukp\\.dkpro\\.core\\.api\\.(?<MODULE>[^.]+)\\.type(\\.(?<INMODULE>.*))?\\.)[^.]+"); public static void convert(JCas aJCas, OntModel aTarget) throws CASException { // Set up prefix mappings TypeSystem ts = aJCas.getTypeSystem(); aTarget.setNsPrefix("cas", RdfCas.NS_UIMA + "uima.cas."); aTarget.setNsPrefix("tcas", RdfCas.NS_UIMA + "uima.tcas."); aTarget.setNsPrefix(RdfCas.PREFIX_RDFCAS, RdfCas.NS_RDFCAS); // Additional prefix mappings for DKPro Core typesystems for (Type t : ts.getProperlySubsumedTypes(ts.getTopType())) { Matcher nameMatcher = DKPRO_CORE_SCHEME.matcher(""); String typeName = t.getName(); if (typeName.endsWith("[]")) { typeName = typeName.substring(0, typeName.length() - 2); } nameMatcher.reset(typeName); if (nameMatcher.matches()) { String prefix = nameMatcher.group("MODULE"); if (nameMatcher.group("INMODULE") != null) { prefix = prefix + "-" + nameMatcher.group("INMODULE"); } aTarget.setNsPrefix(prefix, RdfCas.NS_UIMA + nameMatcher.group("LONG")); } } Iterator<JCas> viewIterator = aJCas.getViewIterator(); while (viewIterator.hasNext()) { convertView(viewIterator.next(), aTarget); } } private static void convertView(JCas aJCas, OntModel aTarget) { // Shorten down variable name for model OntModel m = aTarget; // Set up names Resource tView = m.createResource(RdfCas.TYPE_VIEW); Resource tFeatureStructure = m.createResource(RdfCas.TYPE_FEATURE_STRUCTURE); Property pIndexedIn = m.createProperty(RdfCas.PROP_INDEXED_IN); // Get a URI for the document DocumentMetaData dmd = DocumentMetaData.get(aJCas); String docuri = dmd.getDocumentUri() != null ? dmd.getDocumentUri() : "urn:" + dmd.getDocumentId(); // These only collect a single view... Set<FeatureStructure> reachable = CasAnalysisUtils.collectReachable(aJCas.getCas()); Set<FeatureStructure> indexed = CasAnalysisUtils.collectIndexed(aJCas.getCas()); // ... they do not collect the SOFA, so we add that explicitly reachable.add(aJCas.getSofa()); // Set up the view itself String viewUri = String.format("%s#%d", docuri, aJCas.getLowLevelCas().ll_getFSRef(aJCas.getSofa())); Individual rdfView = m.createIndividual(viewUri, tView); for (FeatureStructure uimaFS : reachable) { String uri = String.format("%s#%d", docuri, aJCas.getLowLevelCas().ll_getFSRef(uimaFS)); Individual rdfFS = m.createIndividual(uri, m.createResource(rdfType(uimaFS.getType()))); // The SoFa is not a regular FS - do not mark it as such if (uimaFS != aJCas.getSofa()) { rdfFS.addOntClass(tFeatureStructure); } // Internal UIMA information if (indexed.contains(uimaFS)) { rdfFS.addProperty(pIndexedIn, rdfView); } // Convert features convertFeatures(docuri, uimaFS, rdfFS); } } private static void convertFeatures(String docuri, FeatureStructure uimaFS, Individual rdfFS) { OntModel m = rdfFS.getOntModel(); for (Feature uimaFeat : uimaFS.getType().getFeatures()) { Property rdfFeat = m.createProperty(rdfFeature(uimaFeat)); if (uimaFeat.getRange().isPrimitive()) { switch (uimaFeat.getRange().getName()) { case CAS.TYPE_NAME_BOOLEAN: rdfFS.addLiteral(rdfFeat, m.createTypedLiteral( uimaFS.getBooleanValue(uimaFeat), XSDDatatype.XSDboolean)); break; case CAS.TYPE_NAME_BYTE: rdfFS.addLiteral(rdfFeat, m.createTypedLiteral( uimaFS.getByteValue(uimaFeat), XSDDatatype.XSDbyte)); break; case CAS.TYPE_NAME_DOUBLE: rdfFS.addLiteral(rdfFeat, m.createTypedLiteral( uimaFS.getDoubleValue(uimaFeat), XSDDatatype.XSDdouble)); break; case CAS.TYPE_NAME_FLOAT: rdfFS.addLiteral(rdfFeat, m.createTypedLiteral( uimaFS.getFloatValue(uimaFeat), XSDDatatype.XSDfloat)); break; case CAS.TYPE_NAME_INTEGER: rdfFS.addLiteral(rdfFeat, m.createTypedLiteral(uimaFS.getIntValue(uimaFeat), XSDDatatype.XSDint)); break; case CAS.TYPE_NAME_LONG: rdfFS.addLiteral(rdfFeat, m.createTypedLiteral( uimaFS.getLongValue(uimaFeat), XSDDatatype.XSDlong)); break; case CAS.TYPE_NAME_SHORT: rdfFS.addLiteral(rdfFeat, m.createTypedLiteral( uimaFS.getShortValue(uimaFeat), XSDDatatype.XSDshort)); break; case CAS.TYPE_NAME_STRING: { String s = uimaFS.getStringValue(uimaFeat); if (s != null) { rdfFS.addLiteral(rdfFeat, m.createTypedLiteral(s, XSDDatatype.XSDstring)); } break; } default: throw new IllegalArgumentException("Feature [" + uimaFeat.getName() + "] has unsupported primitive type [" + uimaFeat.getRange().getName() + "]"); } } else { FeatureStructure targetUimaFS = uimaFS.getFeatureValue(uimaFeat); if (targetUimaFS != null) { rdfFS.addProperty(rdfFeat, m.createResource(rdfUri(docuri, targetUimaFS))); } } } } private static String rdfUri(String docuri, FeatureStructure uimaFS) { return String.format("%s#%d", docuri, uimaFS.getCAS().getLowLevelCAS().ll_getFSRef(uimaFS)); } private static String rdfFeature(Feature aUimaFeature) { return rdfType(aUimaFeature.getDomain()) + "-" + aUimaFeature.getShortName(); } private static String rdfType(Type aUimaType) { return RdfCas.NS_UIMA + aUimaType.getName(); } }