/* * Licensed to the Technische Universität Darmstadt under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The Technische Universität Darmstadt * licenses this file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.dkpro.core.io.rdf.internal; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.commons.collections4.iterators.IteratorIterable; import org.apache.commons.lang3.StringUtils; import org.apache.jena.ontology.OntResource; import org.apache.jena.rdf.model.Model; import org.apache.jena.rdf.model.Property; import org.apache.jena.rdf.model.Resource; import org.apache.jena.rdf.model.Statement; import org.apache.jena.vocabulary.RDF; import org.apache.uima.cas.CAS; import org.apache.uima.cas.CASException; import org.apache.uima.cas.Feature; import org.apache.uima.cas.FeatureStructure; import org.apache.uima.cas.Type; import org.apache.uima.fit.util.CasUtil; import org.apache.uima.fit.util.JCasUtil; import org.apache.uima.jcas.JCas; import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData; public class Rdf2Uima { public static void convert(Statement aContext, JCas aJCas) throws CASException { Model m = aContext.getModel(); // Set up names Resource tView = m.createResource(RdfCas.TYPE_VIEW); Resource tFeatureStructure = m.createResource(RdfCas.TYPE_FEATURE_STRUCTURE); Property pIndexedIn = m.createProperty(RdfCas.PROP_INDEXED_IN); Map<Resource, FeatureStructure> fsIndex = new HashMap<>(); // Convert the views/SofAs Map<Resource, JCas> viewIndex = new HashMap<>(); Iterator<Resource> viewIter = m.listSubjectsWithProperty(RDF.type, tView); for (Resource view : new IteratorIterable<Resource>(viewIter)) { JCas viewJCas = convertView(view, aJCas); viewIndex.put(view, viewJCas); fsIndex.put(view, viewJCas.getSofa()); } // Convert the FSes but without setting their feature values yet - we cannot fill // the feature values just set because some of them may point to FSes not yet created List<Resource> fses = m.listSubjectsWithProperty(RDF.type, tFeatureStructure).toList(); for (Resource fs : fses) { FeatureStructure uimaFS = initFS(fs.as(OntResource.class), aJCas); fsIndex.put(fs, uimaFS); } // Now fill the FSes with their feature values for (Resource fs : fses) { convertFS(fs.as(OntResource.class), aJCas, fsIndex); } // Finally add the FSes to the indexes of the respective views for (Resource fs : fses) { Iterator<Statement> indexedInIter = fs.listProperties(pIndexedIn); for (Statement indexedIn : new IteratorIterable<Statement>(indexedInIter)) { JCas viewJCas = viewIndex.get(indexedIn.getResource()); viewJCas.addFsToIndexes(fsIndex.get(fs)); } } } public static JCas convertView(Resource aView, JCas aJCas) throws CASException { Model m = aView.getModel(); // Set up names Property pSofaID = m.createProperty(RdfCas.PROP_SOFA_ID); Property pSofaString = m.createProperty(RdfCas.PROP_SOFA_STRING); Property pSofaMimeType = m.createProperty(RdfCas.PROP_SOFA_MIME_TYPE); // Get the values String viewName = aView.getProperty(pSofaID).getString(); String sofaString = aView.getProperty(pSofaString).getString(); String sofaMimeType = aView.getProperty(pSofaMimeType).getString(); // Instantiate the view/SofA JCas view = JCasUtil.getView(aJCas, viewName, true); view.setSofaDataString(sofaString, sofaMimeType); return view; } public static FeatureStructure initFS(OntResource aFS, JCas aJCas) { CAS cas = aJCas.getCas(); // Figure out the UIMA type - there can be only one type per FS Set<Resource> types = aFS.listRDFTypes(true).toSet(); types.removeIf(res -> res.getURI().startsWith(RdfCas.NS_RDFCAS)); assert types.size() == 1; Type type = CasUtil.getType(cas, types.iterator().next().getURI().substring(RdfCas.NS_UIMA.length())); FeatureStructure fs; if (type.getName().equals(DocumentMetaData.class.getName())) { // Special handling to avoid ending up with two document annotations in the CAS fs = DocumentMetaData.get(aJCas); } else { fs = cas.createFS(type); } return fs; } public static FeatureStructure convertFS(OntResource aFS, JCas aJCas, Map<Resource, FeatureStructure> aFsIndex) { FeatureStructure fs = aFsIndex.get(aFS); Iterator<Statement> stmtIter = aFS.listProperties(); for (Statement stmt : new IteratorIterable<Statement>(stmtIter)) { // Skip all non-features if (!stmt.getPredicate().getURI().startsWith("uima:")) { // System.out.println("Skipping: " + stmt); continue; } String featureName = StringUtils.substringAfterLast(stmt.getPredicate().getURI(), "-"); Feature uimaFeat = fs.getType().getFeatureByBaseName(featureName); // Cannot update start/end of document annotation because that FS is already indexed, so // we skip those if (fs == aJCas.getDocumentAnnotationFs() && (CAS.FEATURE_BASE_NAME_BEGIN.equals(featureName) || CAS.FEATURE_BASE_NAME_END.equals(featureName))) { System.out.println("Skipping: " + stmt); continue; } if (uimaFeat.getRange().isPrimitive()) { switch (uimaFeat.getRange().getName()) { case CAS.TYPE_NAME_BOOLEAN: fs.setBooleanValue(uimaFeat, stmt.getObject().asLiteral().getBoolean()); break; case CAS.TYPE_NAME_BYTE: fs.setByteValue(uimaFeat, stmt.getObject().asLiteral().getByte()); break; case CAS.TYPE_NAME_DOUBLE: fs.setDoubleValue(uimaFeat, stmt.getObject().asLiteral().getDouble()); break; case CAS.TYPE_NAME_FLOAT: fs.setFloatValue(uimaFeat, stmt.getObject().asLiteral().getFloat()); break; case CAS.TYPE_NAME_INTEGER: fs.setIntValue(uimaFeat, stmt.getObject().asLiteral().getInt()); break; case CAS.TYPE_NAME_LONG: fs.setLongValue(uimaFeat, stmt.getObject().asLiteral().getLong()); break; case CAS.TYPE_NAME_SHORT: fs.setShortValue(uimaFeat, stmt.getObject().asLiteral().getShort()); break; case CAS.TYPE_NAME_STRING: { fs.setStringValue(uimaFeat, stmt.getObject().asLiteral().getString()); break; } default: throw new IllegalArgumentException("Feature [" + uimaFeat.getName() + "] has unsupported primitive type [" + uimaFeat.getRange().getName() + "]"); } } else { FeatureStructure targetUimaFS = aFsIndex.get(stmt.getObject().asResource()); if (targetUimaFS == null) { throw new IllegalStateException("No UIMA FS found for [" + stmt.getObject().asResource().getURI() + "]"); } fs.setFeatureValue(uimaFeat, targetUimaFS); } } return fs; } }