/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.stanbol.entityhub.indexing.source.jenatdb; import static org.apache.stanbol.entityhub.indexing.source.jenatdb.Utils.initTDBDataset; import java.io.File; import java.net.URI; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; import org.apache.commons.io.FilenameUtils; import org.apache.marmotta.ldpath.api.backend.RDFBackend; import org.apache.stanbol.entityhub.core.model.InMemoryValueFactory; import org.apache.stanbol.entityhub.indexing.core.EntityDataIterable; import org.apache.stanbol.entityhub.indexing.core.EntityDataIterator; import org.apache.stanbol.entityhub.indexing.core.EntityDataProvider; import org.apache.stanbol.entityhub.indexing.core.IndexingComponent; import org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig; import org.apache.stanbol.entityhub.indexing.core.source.ResourceLoader; import org.apache.stanbol.entityhub.indexing.core.source.ResourceState; import org.apache.stanbol.entityhub.servicesapi.model.Reference; import org.apache.stanbol.entityhub.servicesapi.model.Representation; import org.apache.stanbol.entityhub.servicesapi.model.Text; import org.apache.stanbol.entityhub.servicesapi.model.ValueFactory; import org.apache.stanbol.entityhub.servicesapi.util.ModelUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.hp.hpl.jena.datatypes.BaseDatatype; import com.hp.hpl.jena.datatypes.DatatypeFormatException; import com.hp.hpl.jena.datatypes.RDFDatatype; import com.hp.hpl.jena.datatypes.xsd.XSDDateTime; import com.hp.hpl.jena.datatypes.xsd.XSDDuration; import com.hp.hpl.jena.graph.Node; import com.hp.hpl.jena.graph.NodeFactory; import com.hp.hpl.jena.graph.Triple; import com.hp.hpl.jena.graph.impl.LiteralLabel; import com.hp.hpl.jena.query.Query; import com.hp.hpl.jena.query.QueryExecutionFactory; import com.hp.hpl.jena.query.QueryFactory; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.query.Syntax; import com.hp.hpl.jena.rdf.model.AnonId; import com.hp.hpl.jena.rdf.model.RDFNode; import com.hp.hpl.jena.sparql.core.DatasetGraph; import com.hp.hpl.jena.sparql.core.Var; import com.hp.hpl.jena.sparql.engine.binding.Binding; import com.hp.hpl.jena.tdb.store.DatasetGraphTDB; import com.hp.hpl.jena.util.iterator.ExtendedIterator; /** * Implementation of an {@link IndexingComponent} for Entity data that provides * the possibility to both:<ol> * <li>randomly access entity data via the {@link EntityDataProvider} interface * <li>iterate over all entities in this store via the {@link EntityDataIterator} * interface. * </ol> * * @author Rupert Westenthaler * */ public class RdfIndexingSource extends AbstractTdbBackend implements EntityDataIterable,EntityDataProvider, RDFBackend<Node> { /** * The Parameter used to configure the source folder(s) relative to the * {@link IndexingConfig#getSourceFolder()}. The ',' (comma) is used as * separator to parsed multiple sources. */ public static final String PARAM_SOURCE_FILE_OR_FOLDER = "source"; /** * The directory where successfully imported files are copied to */ public static final String PARAM_IMPORTED_FOLDER = "imported"; /** * Allows to enable/disable the indexing of Bnodes (see * <a href="https://issues.apache.org/jira/browse/STANBOL-765">STANBOL-765</a> * for details). */ private static final String PARAM_BNODE_STATE = "bnode"; /** * If present, this Parameter allows to convert RDF BlankNodes to dereferable * URIs by using {bnode-prefix}{bnode-id} (see * <a href="https://issues.apache.org/jira/browse/STANBOL-765">STANBOL-765</a> * for details) */ public static final String PARAM_BNODE_PREFIX = "bnode-prefix"; /** * The Parameter that can be used to deactivate the importing of sources. * If this parameter is set to <code>false</code> the values configured for * {@link #PARAM_IMPORT_SOURCE} are ignored. The default value is * <code>true</code> */ public static final String PARAM_IMPORT_SOURCE = "import"; /** * Allows to configure a {@link RdfImportFilter} (full qualified class name). * If present it gets the full configuration set for this component parsed. * This means that the import filter can be configured by the same * configuration as this component. */ public static final String PARAM_IMPORT_FILTER = "import-filter"; /** * The default directory name used to search for RDF files to be imported */ public static final String DEFAULT_SOURCE_FOLDER_NAME = "rdfdata"; public static final String DEFAULT_IMPORTED_FOLDER_NAME = "imported"; //protected to allow internal classes direct access (without hidden getter/ //setter added by the compiler that decrease performance) protected final static Logger log = LoggerFactory.getLogger(RdfIndexingSource.class); /** * The RDF data */ private DatasetGraphTDB indexingDataset; /** * The valueFactory used to create {@link Representation}s, {@link Reference}s * and {@link Text} instances. */ private ValueFactory vf; private ResourceLoader loader; protected String bnodePrefix; //protected to allow direct access in inner classes /** * used for logging a single WARN level entry on the first ignored BlankNode */ private boolean bnodeIgnored = false; private RdfImportFilter importFilter; /** * Default Constructor relaying on that {@link #setConfiguration(Map)} is * called afterwards to provide the configuration! */ public RdfIndexingSource(){ this(null); } /** * Internally used to initialise a {@link ValueFactory} * @param valueFactory */ private RdfIndexingSource(ValueFactory valueFactory){ if(valueFactory == null){ this.vf = InMemoryValueFactory.getInstance(); } else { this.vf = valueFactory; } } /** * Constructs an instance based on the provided parameter * @param modelLocation the directory for the RDF model. MUST NOT be NULL * however the parsed {@link File} needs not to exist. * @param sourceFileOrDirectory the source file or directory containing the * file(s) to import. Parse <code>null</code> if no RDF files need to be * imported * @param valueFactory The {@link ValueFactory} used to create instances * or <code>null</code> to use the default implementation. * @param importFilter Optionally an importFilter used for filtering some * triples read from the RDF source files. */ public RdfIndexingSource(File modelLocation, File sourceFileOrDirectory, ValueFactory valueFactory, RdfImportFilter importFilter){ if(modelLocation == null){ throw new IllegalArgumentException("The parsed model location MUST NOT be NULL!"); } //init the store this.indexingDataset = initTDBDataset(modelLocation); //use a ResourceLoader that fails on the first invalid RDF file (STANBOL-328) this.loader = new ResourceLoader(new RdfResourceImporter(indexingDataset,importFilter), true,true); loader.addResource(sourceFileOrDirectory); } @Override public void setConfiguration(Map<String,Object> config) { IndexingConfig indexingConfig = (IndexingConfig)config.get(IndexingConfig.KEY_INDEXING_CONFIG); //first init the RDF Model this.indexingDataset = Utils.getTDBDataset(config); //second we need to check if we need to import RDF files to the RDF model //look if we need want to use an import filter Object value = config.get(PARAM_IMPORT_FILTER); if(value == null){ log.info("No RDF Import Filter configured"); importFilter = null; } else { String[] filterNames = value.toString().split(","); List<RdfImportFilter> filters = new ArrayList<RdfImportFilter>(); ClassLoader cl = indexingConfig.getClass().getClassLoader(); for(String filterName : filterNames){ filterName = filterName.trim(); try { Class<? extends RdfImportFilter> importFilterClass = cl.loadClass( filterName).asSubclass(RdfImportFilter.class); RdfImportFilter filter = importFilterClass.newInstance(); filter.setConfiguration(config); filters.add(filter); log.info("Use RDF ImportFilter {} (type: {})",importFilter,importFilterClass.getSimpleName()); } catch (ClassNotFoundException e) { throw new IllegalArgumentException("Configured RdfImportFilter '" +filterName+"' not found", e); } catch (InstantiationException e) { throw new IllegalArgumentException("Configured RdfImportFilter '" +filterName+"' can not be instantiated", e); } catch (IllegalAccessException e) { throw new IllegalArgumentException("Configured RdfImportFilter '" +filterName+"' can not be created", e); } } if(filters.isEmpty()){ this.importFilter = null; } else if(filters.size() == 1){ this.importFilter = filters.get(0); } else { this.importFilter = new UnionImportFilter(filters.toArray( new RdfImportFilter[filters.size()])); } } boolean failOnError = indexingConfig.isFailOnError(); //create the ResourceLoader this.loader = new ResourceLoader(new RdfResourceImporter(indexingDataset, importFilter), failOnError); value = config.get(PARAM_IMPORTED_FOLDER); String importedFolderName; if(value != null && !value.toString().isEmpty()){ importedFolderName = value.toString(); } else { importedFolderName = DEFAULT_IMPORTED_FOLDER_NAME; } File importedFolder = new File(indexingConfig.getSourceFolder(),importedFolderName); log.info("Imported RDF File Folder: {}",importedFolder); this.loader.setImportedDir(importedFolder); //check if importing is deactivated boolean importSource = true; //default is true value = config.get(PARAM_IMPORT_SOURCE); if(value != null){ importSource = Boolean.parseBoolean(value.toString()); } if(importSource){ // if we need to import ... check the source config log.info("Importing RDF data from:"); value = config.get(PARAM_SOURCE_FILE_OR_FOLDER); if(value == null){ //if not set use the default value = DEFAULT_SOURCE_FOLDER_NAME; } for(String source : value.toString().split(",")){ File sourceFileOrDirectory = indexingConfig.getSourceFile(source); if(sourceFileOrDirectory.exists()){ //register the configured source with the ResourceLoader this.loader.addResource(sourceFileOrDirectory); } else { if(FilenameUtils.getExtension(source).isEmpty()){ //non existent directory -> create //This is typically the case if this method is called to //initialise the default configuration. So we will try //to create the directory users need to copy the source //RDF files. if(!sourceFileOrDirectory.mkdirs()){ log.warn("Unable to create directory {} configured to improt RDF data from. " + "You will need to create this directory manually before copying the" + "RDF files into it.",sourceFileOrDirectory); //this would not be necessary because the directory will //be empty - however I like to be consistent and have //all configured and existent files & dirs added the the //resource loader this.loader.addResource(sourceFileOrDirectory); } } else { log.warn("Unable to find RDF source {} within the indexing Source folder ",source,indexingConfig.getSourceFolder()); } } } if(log.isInfoEnabled()){ for(String registeredSource : loader.getResources(ResourceState.REGISTERED)){ log.info(" > "+registeredSource); } } } else { log.info("Importing RDF data deactivated by parameer {}={}"+PARAM_IMPORT_SOURCE,value); } //STANBOL-765: parsed bnode-prefix from parsed configuration. value = config.get(PARAM_BNODE_STATE); final Boolean bnodeState; if(value != null){ bnodeState = value instanceof Boolean ? (Boolean) value : Boolean.parseBoolean(value.toString()); } else if(config.containsKey(PARAM_BNODE_STATE)){ //support key without value bnodeState = true; } else { bnodeState = null; //undefined } if(bnodeState == null || bnodeState){ //null or enabled -> consider prefix value = config.get(PARAM_BNODE_PREFIX); if(value != null){ try { new URI(value.toString()); } catch (URISyntaxException e) { throw new IllegalArgumentException("The configured "+PARAM_BNODE_PREFIX+"='" + value.toString() + "' MUST BE a valid URI!"); } bnodePrefix = value.toString(); } else if(bnodeState != null) { //use default prefix if bnodeState is true bnodePrefix = String.format("urn:bnode:%s:",indexingConfig.getName()); } // else bnodeState == null and no custom prefix -> disable by default } if(bnodePrefix != null){ log.info("Indexing of Bnodes enabled (prefix: {}",bnodePrefix); } else { log.info("Indexing of Bnodes disabled"); } } @Override public boolean needsInitialisation() { return (importFilter != null && importFilter.needsInitialisation()) || !loader.getResources(ResourceState.REGISTERED).isEmpty(); } @Override public void initialise(){ if(importFilter != null && importFilter.needsInitialisation()){ importFilter.initialise(); } if(!loader.getResources(ResourceState.REGISTERED).isEmpty()){ loader.loadResources(); } } @Override public void close() { loader = null; indexingDataset.close(); if(importFilter != null){ importFilter.close(); } } public void debug(){ String entityVar = "s"; String fieldVar = "p"; String valueVar = "o"; StringBuilder qb = new StringBuilder(); qb.append(String.format("SELECT ?%s ?%s ?%s \n", entityVar,fieldVar,valueVar)); //for the select qb.append("{ \n"); qb.append(String.format(" ?%s ?%s ?%s . \n", entityVar,fieldVar,valueVar)); //for the where qb.append("} \n"); log.debug("EntityDataIterator Query: \n"+qb.toString()); Query q = QueryFactory.create(qb.toString(), Syntax.syntaxARQ); ResultSet rs = QueryExecutionFactory.create(q, indexingDataset.toDataset()).execSelect(); Var s = Var.alloc(entityVar); Var p = Var.alloc(fieldVar); Var o = Var.alloc(valueVar); while (rs.hasNext()){ Binding b = rs.nextBinding(); log.debug("{} {} {}",new Object[]{b.get(s),b.get(p),b.get(o)}); } } @Override public EntityDataIterator entityDataIterator() { String entityVar = "s"; String fieldVar = "p"; String valueVar = "o"; StringBuilder qb = new StringBuilder(); qb.append(String.format("SELECT ?%s ?%s ?%s \n", entityVar,fieldVar,valueVar)); //for the select qb.append("{ \n"); qb.append(String.format(" ?%s ?%s ?%s . \n", entityVar,fieldVar,valueVar)); //for the where qb.append("} \n"); log.debug("EntityDataIterator Query: \n"+qb.toString()); Query q = QueryFactory.create(qb.toString(), Syntax.syntaxARQ); return new RdfEntityIterator( QueryExecutionFactory.create(q, indexingDataset.toDataset()).execSelect(), entityVar,fieldVar,valueVar); } @Override public Representation getEntityData(String id) { final Node resource; //STANBOL-765: check if the parsed id represents an bnode if(bnodePrefix != null && id.startsWith(bnodePrefix)){ resource = NodeFactory.createAnon(AnonId.create(id.substring(bnodePrefix.length()))); } else { resource = NodeFactory.createURI(id); } Representation source = vf.createRepresentation(id); boolean found; ExtendedIterator<Triple> outgoing = null; try { // There may still be exceptions while reading triples outgoing = indexingDataset.getDefaultGraph().find(resource, null, null); found = outgoing.hasNext(); while(outgoing.hasNext()){ //iterate over the statements for that resource Triple statement = outgoing.next(); Node predicate = statement.getPredicate(); if(predicate == null || !predicate.isURI()){ log.warn("Ignore field {} for resource {} because it is null or not an URI!", predicate,resource); } else { String field = predicate.getURI(); Node value = statement.getObject(); processValue(value, source, field); } //end else predicate != null } //end iteration over resource triple } catch (Exception e) { log.warn("Unable to retrieve entity data for Entity '"+id+"'",e); found = false; try { if(outgoing != null){ outgoing.close(); } } catch (Exception e1) { /* ignore */} } if(found) { if(log.isTraceEnabled()){ log.info("RDFTerm: \n{}", ModelUtils.getRepresentationInfo(source)); } return source; } else { log.debug("No Statements found for id {} (Node: {})!",id,resource); return null; } } /** * Getter for the Jena TDB {@link DatasetGraph} used as source * @return the indexingDataset */ public final DatasetGraphTDB getIndexingDataset() { return indexingDataset; } /** * Processes a {@link Node} and adds the according value to the parsed * Representation. * @param value The node to convert to an value for the Representation * @param source the representation (MUST NOT be <code>null</code> * @param field the field (MUST NOT be <code>null</code>) */ private void processValue(Node value, Representation source, String field) { if(value == null){ log.warn("Encountered NULL value for field {} and entity {}", field,source.getId()); } else if(value.isURI()){ //add a reference source.addReference(field, value.getURI()); } else if(value.isLiteral()){ //add a value or a text depending on the dataType LiteralLabel ll = value.getLiteral(); // log.debug("LL: lexical {} | value {} | dataType {} | language {}", // new Object[]{ll.getLexicalForm(),ll.getValue(),ll.getDatatype(),ll.language()}); //if the dataType == null , than we can expect a plain literal RDFDatatype dataType = ll.getDatatype(); if(dataType != null){ //add a value Object literalValue; try { literalValue = ll.getValue(); if(literalValue instanceof BaseDatatype.TypedValue){ //used for unknown data types // -> in such cases just use the lexical type String lexicalValue = ((BaseDatatype.TypedValue)literalValue).lexicalValue; if(lexicalValue != null && !lexicalValue.isEmpty()){ source.add(field,lexicalValue); } } else if(literalValue instanceof XSDDateTime) { source.add(field, ((XSDDateTime)literalValue).asCalendar().getTime()); //Entityhub uses the time } else if(literalValue instanceof XSDDuration) { String duration = literalValue.toString(); if(duration != null && !duration.isEmpty()) { source.add(field, literalValue.toString()); } } else if(!ll.getLexicalForm().isEmpty()){ source.add(field, literalValue); } //else ignore literals that are empty } catch (DatatypeFormatException e) { log.warn(" Unable to convert {} to {} -> use lecicalForm", ll.getLexicalForm(),ll.getDatatype()); literalValue = ll.getLexicalForm(); } } else { //add a text String lexicalForm = ll.getLexicalForm(); if(lexicalForm != null && !lexicalForm.isEmpty()){ String language = ll.language(); if(language!=null && language.length()<1){ language = null; } source.addNaturalText(field, lexicalForm, language); } //else ignore empty literals } // "" is parsed if there is no language } else if(value.isBlank()) { if(bnodePrefix != null) { //STANBOL-765: convert Bnodes to URIs StringBuilder sb = new StringBuilder(bnodePrefix); sb.append(value.getBlankNodeId().getLabelString()); source.addReference(field, sb.toString()); } else { logIgnoredBnode(log, source, field, value); } } else { log.warn("ignoreing value {} for field {} and RDFTerm {} because it is of an unsupported type!", new Object[]{value,field,source.getId()}); } //end different value node type } /** * Logs that a BlankNode was ignored (only the first time). Also debugs the * ignored triple. * @param log the logger to use * @param s subject * @param p predicate * @param o object */ protected void logIgnoredBnode(Logger log, Object s, Object p, Object o) { if(!bnodeIgnored){ bnodeIgnored = true; log.warn("The Indexed RDF Data do contain Blank Nodes. Those are " + "ignored unless the '{}' parameter is set to valid URI. " + "If this parameter is set Bnodes are converted to URIs by " + "using {bnode-prefix}{bnodeId} (see STANBOL-765)", PARAM_BNODE_PREFIX); } log.debug("ignoreing blank node value(s) for Triple {},{},{}!", new Object[]{s,p,o}); } /** * Implementation of the iterator over the entities stored in a * {@link RdfIndexingSource}. This Iterator is based on query * {@link ResultSet}. It uses the low level SPARQL API because this allows * to use the same code to create values for Representations * @author Rupert Westenthaler * */ public final class RdfEntityIterator implements EntityDataIterator { /** * Variable used to */ final Var entityVar; final Var fieldVar; final Var valueVar; /** * The result set containing all triples in the form of <code> * "entity -> field -> value"</code> */ private final ResultSet resultSet; /** * The {@link Node} representing the current entity or <code>null</code> * if the iterator is newly created.<p> * {@link Node#isURI()} is guaranteed to return <code>true</code> and * {@link Node#getURI()} is guaranteed to return the id for the entity */ private Node currentEntity = null; /** * The {@link Node} for the next Entity in the iteration or <code>null</code> * in case there are no further or the iterator is newly created (in that * case {@link #currentEntity} will be also <code>null</code>)<p> * {@link Node#isURI()} is guaranteed to return <code>true</code> and * {@link Node#getURI()} is guaranteed to return the id for the entity */ private Node nextEntity = null; /** * The Representation of the current Element. Only available after a * call to {@link #getRepresentation()} */ private Representation currentRepresentation = null; /** * Holds all <code>field,value"</code> pairs of the current Entity. * Elements at even positions represent<code>fields</code> and elements * at uneven positions represent <code>values</code>. */ private List<Node> data = new ArrayList<Node>(); /** * The next (not consumed) solution of the query. */ private Binding nextBinding = null; protected RdfEntityIterator(ResultSet resultSet, String entityVar,String fieldVar, String valueVar){ if(resultSet == null){ throw new IllegalArgumentException("The parsed ResultSet MUST NOT be NULL!"); } //check if the ResultSet provides the required variables to perform the query List<String> vars = resultSet.getResultVars(); if(!vars.contains(entityVar)){ throw new IllegalArgumentException("The parsed ResultSet is missing the required" + "Variable \""+entityVar+"\" representing the Entity!"); } else { this.entityVar = Var.alloc(entityVar); } if(!vars.contains(fieldVar)){ throw new IllegalArgumentException("The parsed ResultSet is missing the required" + "Variable \""+fieldVar+"\" representing the Field of an Entity!"); } else { this.fieldVar = Var.alloc(fieldVar); } if(!vars.contains(valueVar)){ throw new IllegalArgumentException("The parsed ResultSet is missing the required" + "Variable \""+valueVar+"\" representing the Value of a Field of an Entity!"); } else { this.valueVar = Var.alloc(valueVar); } this.resultSet = resultSet; //this will read until the first binding of the first Entity is found initFirst(); } private void initFirst(){ if(currentEntity == null && nextEntity == null){ //only for the first call //consume binding until the first valid entity starts while(nextEntity == null && resultSet.hasNext()){ Binding firstValid = resultSet.nextBinding(); Node entityNode = firstValid.get(entityVar); if((entityNode.isURI() && !entityNode.toString().isEmpty()) || entityNode.isBlank() && bnodePrefix != null){ //store it temporarily in nextBinding nextBinding = firstValid; //store it as next (first) entity nextEntity = entityNode; } else { logIgnoredBnode(log,entityNode,firstValid.get(fieldVar),firstValid.get(valueVar)); } } } else { throw new IllegalStateException("This Mehtod MUST be only used for Initialisation!"); } } @Override public void close() { data.clear(); data = null; currentEntity = null; currentRepresentation = null; //Looks like it is not possible to close a resultSet } @Override public Representation getRepresentation() { //current Entity will be null if // - next() was never called // - the end of the iteration was reached if(currentEntity == null){ return null; } else if(currentRepresentation == null){ currentRepresentation = createRepresentation(); } return currentRepresentation; } @Override public boolean hasNext() { return resultSet.hasNext(); } @Override public String next() { return getNext(); } @Override public void remove() { throw new UnsupportedOperationException( "Removal of Entities is not supported by this Implementation!"); } /** * Iterates over all {@link QuerySolution} of the {@link #resultSet} * that do have {@link #currentEntity} as * {@link RdfIndexingSource#VARIABLE_NAME_ENTITY VARIABLE_NAME_ENTITY}. * NOTES: <ul> * <li>This method also initialises the {@link #data} and sets the * {@link #nextBinding} to the first solution of the next entity.<br> * <li>That means also, that it would iterate over additional * {@link RdfIndexingSource#VARIABLE_NAME_ENTITY VARIABLE_NAME_ENTITY} * values that are not URIResources ( in cases * {@link RDFNode#isURIResource()} returns <code>false</code>) * <li>This method is also used to initialise the first Entity * @return the URI of the current entity */ private String getNext(){ //check for more elements if(!resultSet.hasNext()){ throw new NoSuchElementException("No more Entities available"); } //clean up data of the previous entity this.data.clear(); //remove data of the previous entity this.currentRepresentation = null; //and the representation this.currentEntity = nextEntity; //set the nextEntity to the current //and process the first binding already consumed from the resultSet //by calling this method for the previous Entity if(nextBinding != null){ //will be null for the first Entity processSolution(nextBinding); } //now get all the other Solutions for the current entity boolean next = false; while(!next && resultSet.hasNext()){ Binding binding = resultSet.nextBinding(); Node entityNode = binding.get(entityVar); //NOTES: // * for URIs we need to check for empty URIs! // * STANBOL-765: added support for BlankNodes if((entityNode.isURI() && !entityNode.toString().isEmpty()) || entityNode.isBlank() && bnodePrefix != null){ if(!entityNode.equals(currentEntity)){ //start of next Entity this.nextEntity = entityNode; //store the node for the next entity this.nextBinding = binding; //store the first binding of the next entity //we are done for this entity -> exit the loop next = true; } else { processSolution(binding); } } else { logIgnoredBnode(log,entityNode,binding.get(fieldVar),binding.get(valueVar)); } } if(!next){ // exit the loop but still no new entity ... that means nextEntity = null; //there are no more entities nextBinding = null; // and there are also no more solutions } //STANBOL-765: if current is a Bnode add the bnode-prefix return currentEntity.isBlank() ? new StringBuilder(bnodePrefix).append(currentEntity.getBlankNodeId().getLabelString()).toString() : currentEntity.getURI(); } /** * Processes a {@link Binding} by storing the {@link Node}s for the * variables {@link #fieldVar} and {@link #valueVar} to {@link #data}. * This method ensures that both values are not <code>null</code> and * that the {@link Node} representing the field is an URI ( * returns <code>true</code> for {@link Node#isURI()}). * @param binding the binding to process */ private void processSolution(Binding binding) { Node field = binding.get(fieldVar); if(field != null && field.isURI()){ //property MUST BE an URI Node value = binding.get(valueVar); if(value != null){ //add the pair data.add(field); data.add(value); } } else { //This may only happen if the Query used to create the ResultSet //containing this Solution does not link the variable //VARIABLE_NAME_FIELD to properties. log.error("Found Field {} for Entity {} that is not an URIResource",field,currentEntity); } } /** * Used to create the Representation the first time * {@link #getRepresentation()} is called for the current entity. The * information for the Representation are already stored in {@link #data} */ private Representation createRepresentation() { final String uri; if(currentEntity.isBlank()){ //STANBOL-765: support bNodes StringBuilder sb = new StringBuilder(bnodePrefix); sb.append(currentEntity.getBlankNodeId().getLabelString()); uri = sb.toString(); } else { uri = currentEntity.getURI(); } Representation representation = vf.createRepresentation(uri); Iterator<Node> it = data.iterator(); while(it.hasNext()){ //data contains field,value pairs //because of that we call two times next for String field = it.next().getURI(); //the field Node value = it.next();//and the value processValue(value, representation, field); } return representation; } } /* ---------------------------------------------------------------------- * RDF Backend implementation * ---------------------------------------------------------------------- */ @Override public Collection<Node> listObjects(Node subject, Node property) { Collection<Node> nodes = new ArrayList<Node>(); if(bnodePrefix != null && subject.isURI() && subject.getURI().startsWith(bnodePrefix)){ subject = NodeFactory.createAnon(new AnonId(subject.getURI().substring(bnodePrefix.length()))); } ExtendedIterator<Triple> it = indexingDataset.getDefaultGraph().find(subject, property, null); while(it.hasNext()){ //STANBOL-765: we need also to transform bnodes to URIs for the //RDFBackend implementation Node object = it.next().getObject(); if(bnodePrefix != null && object.isBlank()){ StringBuilder sb = new StringBuilder(bnodePrefix); sb.append(object.getBlankNodeId().getLabelString()); object = NodeFactory.createURI(sb.toString()); } nodes.add(object); } it.close(); return nodes; } @Override public Collection<Node> listSubjects(Node property, Node object) { Collection<Node> nodes = new ArrayList<Node>(); if(bnodePrefix != null && object.isURI() && object.getURI().startsWith(bnodePrefix)){ object = NodeFactory.createAnon(new AnonId(object.getURI().substring(bnodePrefix.length()))); } ExtendedIterator<Triple> it = indexingDataset.getDefaultGraph().find(null, property, object); while(it.hasNext()){ Node subject = it.next().getSubject(); //STANBOL-765: we need also to transform bnodes to URIs for the //RDFBackend implementation if(bnodePrefix != null && subject.isBlank()){ StringBuilder sb = new StringBuilder(bnodePrefix); sb.append(subject.getBlankNodeId().getLabelString()); subject = NodeFactory.createURI(sb.toString()); } nodes.add(subject); } it.close(); return nodes; } /** * Since STANBOL-765 BlankNodes are converted to URIs if a {@link #bnodePrefix} * is configured. This also means that one needs to expect calls to the * {@link RDFBackend} interface with transformed Nodes. <p> * This method ensures that if someone requests an uri {@link Node} for a * URI that represents a transformed Bnode (when the URI starts with * {@link #bnodePrefix}) that the according bnode {@link Node} is created * @param node the node * @return */ @Override public Node createURI(String uri) { if(bnodePrefix != null && uri.startsWith(bnodePrefix)){ return NodeFactory.createAnon(AnonId.create(uri.substring(bnodePrefix.length()))); } else { return super.createURI(uri); } } /** * used in case multiple {@link RdfImportFilter}s are configured. * @author Rupert Westenthaler * */ private class UnionImportFilter implements RdfImportFilter { RdfImportFilter[] filters; UnionImportFilter(RdfImportFilter[] filters){ this.filters = filters; } @Override public void setConfiguration(Map<String,Object> config) {} @Override public boolean needsInitialisation() { return false;} @Override public void initialise() {} @Override public void close() {} @Override public boolean accept(Node s, Node p, Node o) { boolean state = true; for(int i=0;state && i < filters.length;i++){ state = filters[i].accept(s, p, o); } return state; } } }