/* * Copyright 2014 reto. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package eu.fusepool.datalifecycle.core; import org.apache.clerezza.rdf.core.UriRef; import org.apache.clerezza.rdf.core.access.LockableMGraph; import org.apache.clerezza.rdf.core.access.NoSuchEntityException; import org.apache.clerezza.rdf.core.access.TcManager; import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl; import org.apache.clerezza.rdf.core.impl.TripleImpl; import org.apache.clerezza.rdf.ontologies.OWL; import org.apache.clerezza.rdf.ontologies.RDF; import org.apache.clerezza.rdf.ontologies.RDFS; import org.apache.felix.scr.annotations.Component; import org.apache.felix.scr.annotations.Reference; import org.apache.felix.scr.annotations.Service; import eu.fusepool.datalifecycle.ontologies.DLC; @Component @Service(DataSetFactory.class) public class DataSetFactory { @Reference private TcManager tcManager; @Reference private DlcGraphProvider dlcGraphProvider; // base graph uri public static final String GRAPH_URN_PREFIX = "urn:x-localinstance:/dlc/"; public DataSet getDataSet(UriRef dataSetUri) { return new DataSetImpl(dataSetUri); } public DataSet createDataSet(String datasetName) { if ((datasetName == null) || "".equals(datasetName)) { throw new IllegalArgumentException("DatasetName must be non null and non empty"); } final UriRef dataSetUri = new UriRef(GRAPH_URN_PREFIX + datasetName); final DataSetImpl dataSet = new DataSetImpl(dataSetUri); dataSet.initialize(datasetName); return dataSet; } /** * For each rdf triple collection uploaded 5 graphs are created. 1) a source * graph to store the rdf data 2) an enhancements graph to store the text * extracted for indexing and the entities extracted from the text by NLP * engines in the default enhancement chain 3) a graph to store the result * of the interlinking task 4) a graph to store the smushed graph 5) a graph * to store the published graph i.e. the smushed graph in a coherent state * with data in the content graph The name convention for these graphs is * GRAPH_URN_PREFIX + timestamp + SUFFIX where SUFFIX can be one of * SOURCE_GRAPH_URN_SUFFIX, ENHANCE_GRAPH_URN_SUFFIX, * INTERLINK_GRAPH_URN_SUFFIX, SMUSH_GRAPH_URN_SUFFIX, * PUBLISH_GRAPH_URN_SUFFIX */ class DataSetImpl implements DataSet { // graph suffix public static final String SOURCE_GRAPH_URN_SUFFIX = "/rdf.graph"; // digest graph suffix public static final String DIGEST_GRAPH_URN_SUFFIX = "/digest.graph"; // enhancements graph suffix public static final String ENHANCE_GRAPH_URN_SUFFIX = "/enhance.graph"; // log graph suffix public static final String LOG_GRAPH_URN_SUFFIX = "/log.graph"; // interlink graph suffix public static final String INTERLINK_GRAPH_URN_SUFFIX = "/interlink.graph"; // smushed graph suffix public static final String SMUSH_GRAPH_URN_SUFFIX = "/smush.graph"; // published graph suffix public static final String PUBLISH_GRAPH_URN_SUFFIX = "/publish.graph"; private UriRef dataSetUri; DataSetImpl(UriRef dataSetUri) { this.dataSetUri = dataSetUri; } /** * * @return the graph containing the enhanced data */ @Override public LockableMGraph getEnhancementsGraph() { try { return tcManager.getMGraph(getEnhancementsGraphRef()); } catch (NoSuchEntityException e) { return tcManager.createMGraph(getLogGraphRef()); } } @Override public UriRef getEnhancementsGraphRef() { return new UriRef(dataSetUri.getUnicodeString() + ENHANCE_GRAPH_URN_SUFFIX); } /** * * @return the graph containing the activity log of the dataset */ @Override public LockableMGraph getLogGraph() { try { return tcManager.getMGraph(getLogGraphRef()); } catch (NoSuchEntityException e) { return tcManager.createMGraph(getLogGraphRef()); } } @Override public UriRef getLogGraphRef() { return new UriRef(dataSetUri.getUnicodeString() + LOG_GRAPH_URN_SUFFIX); } /** * * @return the graph containing the digested content to be used for * enhancements and indexing */ @Override public LockableMGraph getDigestGraph() { try { return tcManager.getMGraph(getDigestGraphRef()); } catch (NoSuchEntityException e) { return tcManager.createMGraph(getDigestGraphRef()); } } @Override public UriRef getDigestGraphRef() { return new UriRef(dataSetUri.getUnicodeString() + DIGEST_GRAPH_URN_SUFFIX); } /** * * @return the graph containing the interlinks (owl:sameAs triples) */ @Override public LockableMGraph getInterlinksGraph() { return tcManager.getMGraph(getInterlinksGraphRef()); } @Override public UriRef getInterlinksGraphRef() { return new UriRef(dataSetUri.getUnicodeString() + INTERLINK_GRAPH_URN_SUFFIX); } public UriRef getSourceGraphRef() { return new UriRef(dataSetUri.getUnicodeString() + SOURCE_GRAPH_URN_SUFFIX); } @Override public LockableMGraph getSourceGraph() { return tcManager.getMGraph(getSourceGraphRef()); } @Override public UriRef getSmushGraphRef() { return new UriRef(dataSetUri.getUnicodeString() + SMUSH_GRAPH_URN_SUFFIX); } @Override public LockableMGraph getSmushGraph() { return tcManager.getMGraph(getSmushGraphRef()); } @Override public UriRef getPublishGraphRef() { return new UriRef(dataSetUri.getUnicodeString() + PUBLISH_GRAPH_URN_SUFFIX); } @Override public LockableMGraph getPublishGraph() { return tcManager.getMGraph(getPublishGraphRef()); } @Override public UriRef getUri() { return dataSetUri; } void initialize(final String datasetName) { dlcGraphProvider.getDlcGraph().add(new TripleImpl(dataSetUri, RDF.type, DLC.Pipe)); dlcGraphProvider.getDlcGraph().add(new TripleImpl(dataSetUri, RDFS.label, new PlainLiteralImpl(datasetName))); dlcGraphProvider.getDlcGraph().add(new TripleImpl(DlcGraphProvider.DATA_LIFECYCLE_GRAPH_REFERENCE, DLC.pipe, dataSetUri)); /* waht are tasks, and what are this tripples for?*/ // create tasks //rdf task UriRef rdfTaskRef = new UriRef(dataSetUri.getUnicodeString() + "/rdf"); dlcGraphProvider.getDlcGraph().add(new TripleImpl(dataSetUri, DLC.creates, rdfTaskRef)); dlcGraphProvider.getDlcGraph().add(new TripleImpl(rdfTaskRef, RDF.type, DLC.RdfTask)); // digest task UriRef digestTaskRef = new UriRef(dataSetUri.getUnicodeString() + "/digest"); dlcGraphProvider.getDlcGraph().add(new TripleImpl(dataSetUri, DLC.creates, digestTaskRef)); dlcGraphProvider.getDlcGraph().add(new TripleImpl(digestTaskRef, RDF.type, DLC.DigestTask)); // enhance task UriRef enhanceTaskRef = new UriRef(dataSetUri.getUnicodeString() + "/enhance"); dlcGraphProvider.getDlcGraph().add(new TripleImpl(dataSetUri, DLC.creates, enhanceTaskRef)); dlcGraphProvider.getDlcGraph().add(new TripleImpl(enhanceTaskRef, RDF.type, DLC.EnhanceTask)); // interlink task UriRef interlinkTaskRef = new UriRef(dataSetUri.getUnicodeString() + "/interlink"); dlcGraphProvider.getDlcGraph().add(new TripleImpl(dataSetUri, DLC.creates, interlinkTaskRef)); dlcGraphProvider.getDlcGraph().add(new TripleImpl(interlinkTaskRef, RDF.type, DLC.InterlinkTask)); // smush task UriRef smushTaskRef = new UriRef(dataSetUri.getUnicodeString() + "/smush"); dlcGraphProvider.getDlcGraph().add(new TripleImpl(dataSetUri, DLC.creates, smushTaskRef)); dlcGraphProvider.getDlcGraph().add(new TripleImpl(smushTaskRef, RDF.type, DLC.SmushTask)); // publish task UriRef publishTaskRef = new UriRef(dataSetUri.getUnicodeString() + "/publish"); dlcGraphProvider.getDlcGraph().add(new TripleImpl(dataSetUri, DLC.creates, publishTaskRef)); dlcGraphProvider.getDlcGraph().add(new TripleImpl(smushTaskRef, RDF.type, DLC.PublishTask)); // create the source graph for the dataset (result of transformation in RDF) tcManager.createMGraph(getSourceGraphRef()); //GraphNode dlcGraphNode = new GraphNode(DATA_LIFECYCLE_GRAPH_REFERENCE, getDlcGraph()); //dlcGraphNode.addProperty(DCTERMS.hasPart, graphRef); dlcGraphProvider.getDlcGraph().add(new TripleImpl(rdfTaskRef, DLC.deliverable, getSourceGraphRef())); dlcGraphProvider.getDlcGraph().add(new TripleImpl(getSourceGraphRef(), RDF.type, DLC.Dataset)); // create the graph to store text fields extract from properties in the source rdf tcManager.createMGraph(getDigestGraphRef()); dlcGraphProvider.getDlcGraph().add(new TripleImpl(enhanceTaskRef, DLC.deliverable, getDigestGraphRef())); dlcGraphProvider.getDlcGraph().add(new TripleImpl(getDigestGraphRef(), RDFS.label, new PlainLiteralImpl("Contains a sioc:content property with text " + "for indexing and references to entities found in the text by NLP enhancement engines"))); // create the graph to store enhancements found by NLP engines in the digest tcManager.createMGraph(getEnhancementsGraphRef()); dlcGraphProvider.getDlcGraph().add(new TripleImpl(enhanceTaskRef, DLC.deliverable, getEnhancementsGraphRef())); dlcGraphProvider.getDlcGraph().add(new TripleImpl(getEnhancementsGraphRef(), RDFS.label, new PlainLiteralImpl("Contains entities found " + "in digest by NLP enhancement engines"))); // create the graph to store the result of the interlinking task tcManager.createMGraph(getInterlinksGraphRef()); dlcGraphProvider.getDlcGraph().add(new TripleImpl(interlinkTaskRef, DLC.deliverable, getInterlinksGraphRef())); dlcGraphProvider.getDlcGraph().add(new TripleImpl(getInterlinksGraphRef(), RDF.type, DLC.Linkset)); dlcGraphProvider.getDlcGraph().add(new TripleImpl(getInterlinksGraphRef(), DLC.subjectsTarget, getSourceGraphRef())); dlcGraphProvider.getDlcGraph().add(new TripleImpl(getInterlinksGraphRef(), DLC.linkPredicate, OWL.sameAs)); dlcGraphProvider.getDlcGraph().add(new TripleImpl(getInterlinksGraphRef(), RDFS.label, new PlainLiteralImpl("Contains equivalence links"))); // create the graph to store the result of the smushing task tcManager.createMGraph(getSmushGraphRef()); dlcGraphProvider.getDlcGraph().add(new TripleImpl(smushTaskRef, DLC.deliverable, getSmushGraphRef())); // create the graph to store the result of the publishing task tcManager.createMGraph(getPublishGraphRef()); dlcGraphProvider.getDlcGraph().add(new TripleImpl(publishTaskRef, DLC.deliverable, getPublishGraphRef())); // set the initial dataset status as unpublished UriRef statusRef = new UriRef(dataSetUri.getUnicodeString() + "/Status"); dlcGraphProvider.getDlcGraph().add(new TripleImpl(dataSetUri, DLC.status, statusRef)); dlcGraphProvider.getDlcGraph().add(new TripleImpl(statusRef, RDF.type, DLC.Unpublished)); dlcGraphProvider.getDlcGraph().add(new TripleImpl(statusRef, RDFS.label, new PlainLiteralImpl("Unpublished"))); } } }