SourcingAdmin.java example

Explorer
datalifecycle-master
- dlc-api
  - src
    - main
      - java
        eu
        fusepool
        datalifecycle
        Interlinker.java
        RdfDigester.java
        Rdfizer.java
- dlc-core
  - src
    - main
      - java
        eu
        fusepool
        datalifecycle
        core
        DataSet.java
        DataSetFactory.java
        DlcGraphProvider.java
        DlcPermission.java
        DlcUploader.java
        PipesAdmin.java
        SmushingJob.java
        SourcingAdmin.java
        SourcingMenuItem.java
        Task.java
        utils
        FileUtil.java
        LinksRetriever.java
/*.
 * Copyright 2013 Fusepool Project.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package eu.fusepool.datalifecycle.core;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLConnection;
import java.security.AccessController;
import java.security.AllPermission;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Dictionary;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.locks.Lock;
import javax.ws.rs.DefaultValue;
import javax.ws.rs.FormParam;
import javax.ws.rs.GET;
import javax.ws.rs.HeaderParam;
import javax.ws.rs.POST;
import javax.ws.rs.Path;
import javax.ws.rs.Produces;
import javax.ws.rs.QueryParam;
import javax.ws.rs.WebApplicationException;
import javax.ws.rs.core.Context;
import javax.ws.rs.core.Response;
import javax.ws.rs.core.UriInfo;
import org.apache.clerezza.jaxrs.utils.RedirectUtil;
import org.apache.clerezza.jaxrs.utils.TrailingSlash;
import org.apache.clerezza.rdf.core.BNode;
import org.apache.clerezza.rdf.core.LiteralFactory;
import org.apache.clerezza.rdf.core.MGraph;
import org.apache.clerezza.rdf.core.NonLiteral;
import org.apache.clerezza.rdf.core.Resource;
import org.apache.clerezza.rdf.core.Triple;
import org.apache.clerezza.rdf.core.TripleCollection;
import org.apache.clerezza.rdf.core.TypedLiteral;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.clerezza.rdf.core.access.LockableMGraph;
import org.apache.clerezza.rdf.core.access.TcManager;
import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
import org.apache.clerezza.rdf.core.impl.TripleImpl;
import org.apache.clerezza.rdf.core.serializedform.Parser;
import org.apache.clerezza.rdf.ontologies.DC;
import org.apache.clerezza.rdf.ontologies.RDF;
import org.apache.clerezza.rdf.ontologies.RDFS;
import org.apache.clerezza.rdf.ontologies.SIOC;
import org.apache.clerezza.rdf.utils.GraphNode;
import org.apache.felix.scr.annotations.Activate;
import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.ConfigurationPolicy;
import org.apache.felix.scr.annotations.Deactivate;
import org.apache.felix.scr.annotations.Properties;
import org.apache.felix.scr.annotations.Property;
import org.apache.felix.scr.annotations.Reference;
import org.apache.felix.scr.annotations.ReferenceCardinality;
import org.apache.felix.scr.annotations.ReferencePolicy;
import org.apache.felix.scr.annotations.Service;
import org.apache.stanbol.commons.indexedgraph.IndexedMGraph;
import org.apache.stanbol.commons.web.viewable.RdfViewable;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
import org.apache.stanbol.enhancer.servicesapi.ContentSource;
import org.apache.stanbol.enhancer.servicesapi.EnhancementException;
import org.apache.stanbol.enhancer.servicesapi.EnhancementJobManager;
import org.apache.stanbol.enhancer.servicesapi.impl.ByteArraySource;
import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses;
import org.apache.stanbol.entityhub.model.clerezza.RdfValueFactory;
import org.apache.stanbol.entityhub.servicesapi.model.Entity;
import org.apache.stanbol.entityhub.servicesapi.model.Representation;
import org.apache.stanbol.entityhub.servicesapi.site.SiteManager;
import org.osgi.framework.BundleContext;
import org.osgi.framework.Constants;
import org.osgi.service.component.ComponentContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.fusepool.datalifecycle.Interlinker;
import eu.fusepool.datalifecycle.RdfDigester;
import eu.fusepool.datalifecycle.Rdfizer;
import eu.fusepool.datalifecycle.utils.FileUtil;
import eu.fusepool.datalifecycle.utils.LinksRetriever;
import eu.fusepool.datalifecycle.ontologies.DLC;
import org.apache.clerezza.rdf.core.Literal;
import org.apache.clerezza.rdf.core.access.LockableMGraphWrapper;


/**
 * This is the controller class of the fusepool data life cycle component. The
 * main functionalities provided are 1) XML2RDF transformation 2) Indexing and
 * Information Extraction 3) Reconciliation/Interlinking 4) Smushing
 */
@Component(immediate = true, metatype = true,
        policy = ConfigurationPolicy.OPTIONAL)
@Properties(value = {
    @Property(name = "javax.ws.rs", boolValue = true),
    @Property(name = Constants.SERVICE_RANKING, intValue = SourcingAdmin.DEFAULT_SERVICE_RANKING)
})

@Service(Object.class)
@Path("sourcing")
public class SourcingAdmin {

    // Service property attributes
    public static final int DEFAULT_SERVICE_RANKING = 101;

    // Base URI property attributes. This property is used to canonicalize URIs of type urn:x-temp.
    // The value of the property is updated at service activation from the service configuration panel.
    public static final String BASE_URI_DESCRIPTION = "Base http URI to be used when publishing data ( e.g. http://mydomain.com )";
    public static final String BASE_URI_LABEL = "Base URI";
    public static final String DEFAULT_BASE_URI = "http://localhost:8080";
    @Property(label = BASE_URI_LABEL, value = DEFAULT_BASE_URI, description = BASE_URI_DESCRIPTION)
    public static final String BASE_URI = "baseUri";
    // base uri updated at service activation from the service property in the osgi console
    private UriRef baseUri;

    // Confidence threshold for enhencements attributes. This property is used to set the minimum value of acceptance of
    // computed enhancements
    public static final String CONFIDENCE_THRESHOLD_DESCRIPTION = "Minimum value for acceptance of computed enhancements";
    public static final String CONFIDENCE_THRESHOLD_LABEL = "Confidence threshold";
    public static final String DEFAULT_CONFIDENCE_VALUE = "0.5";
    @Property(label = CONFIDENCE_THRESHOLD_LABEL, value = DEFAULT_CONFIDENCE_VALUE, description = CONFIDENCE_THRESHOLD_DESCRIPTION)
    public static final String CONFIDENCE_THRESHOLD = "confidenceThreshold";
    // confidence threshold value updated at service activation from the service property in the osgi console
    private double confidenceThreshold = 0.5;


    /**
     * Using slf4j for normal logging
     */
    private static final Logger log = LoggerFactory.getLogger(SourcingAdmin.class);

    BundleContext bundleCtx = null;

    @Reference
    private Parser parser;


    @Reference
    private ContentItemFactory contentItemFactory;

    @Reference
    private EnhancementJobManager enhancementJobManager;
    
    @Reference
    private DataSetFactory dataSetFactory;
    
    @Reference
    private DlcGraphProvider dlcGraphProvider;

    /**
     * This service allows to get entities from configures sites
     */
    @Reference
    private SiteManager siteManager;

    /**
     * This service allows accessing and creating persistent triple collections
     */
    @Reference
    private TcManager tcManager;

    // Stores bindings to different implementations of RdfDigester
    @Reference(cardinality = ReferenceCardinality.OPTIONAL_MULTIPLE,
            policy = ReferencePolicy.DYNAMIC,
            referenceInterface = eu.fusepool.datalifecycle.RdfDigester.class)
    private final Map<String, RdfDigester> digesters = new HashMap<String, RdfDigester>();

    // Stores bindings to different implementations of Rdfizer
    @Reference(cardinality = ReferenceCardinality.OPTIONAL_MULTIPLE,
            policy = ReferencePolicy.DYNAMIC,
            referenceInterface = eu.fusepool.datalifecycle.Rdfizer.class)
    private final Map<String, Rdfizer> rdfizers = new HashMap<String, Rdfizer>();

    // Stores bindings to different instances of Interlinker
    @Reference(cardinality = ReferenceCardinality.OPTIONAL_MULTIPLE,
            policy = ReferencePolicy.DYNAMIC,
            referenceInterface = eu.fusepool.datalifecycle.Interlinker.class)
    private final Map<String, Interlinker> interlinkers = new HashMap<String, Interlinker>();

    /**
     * This is the name of the graph in which we "log" the requests
     */
    //private UriRef REQUEST_LOG_GRAPH_NAME = new UriRef("http://example.org/resource-resolver-log.graph");
    

    /**
     * Register graph referencing graphs for life cycle monitoring;
     */
    public static final String CONTENT_GRAPH_NAME = "urn:x-localinstance:/content.graph";

    private UriRef CONTENT_GRAPH_REF = new UriRef(CONTENT_GRAPH_NAME);

    // data upload codes
    private final int UPLOAD_XML = 1;
    private final int UPLOAD_RDF = 2;
    // tasks codes
    private final int TEXT_EXTRACTION = 1;
    private final int COMPUTE_ENHANCEMENTS = 2;
    private final int RECONCILE_GRAPH_OPERATION = 3;
    private final int SMUSH_GRAPH_OPERATION = 4;
    private final int PUBLISH_DATA = 5;

    
    //mesage to show when base URI is invalid
    private final String INVALID_BASE_URI_ALERT = "A valid base URI has not been set. It can be set in the framework configuration panel (eu.fusepool.datalifecycle.SourcingAdmin)";

    // Validity of base Uri (enables interlinking, smushing and publishing tasks)
    private boolean isValidBaseUri = false;

    //all active and some other tasks
    final private Set<Task> tasks = Collections.synchronizedSet(new HashSet<Task>());

    @SuppressWarnings("unchecked")
    @Activate
    protected void activate(ComponentContext context) {
        log.info("The Sourcing Admin Service is being activated");
        // Get the value of the base uri from the service property set in the Felix console
        Dictionary<String, Object> dict = context.getProperties();
        Object baseUriObj = dict.get(BASE_URI);
        String baseUriString = baseUriObj.toString();
        if ((!"".equals(baseUriString)) && (baseUriString.startsWith("http://"))) {
            if (baseUriString.endsWith("/")) {
                baseUriString = baseUriString.substring(0, baseUriString.length() - 1);
            }
            isValidBaseUri = true;
            log.info("Base URI: {}", baseUriString);
        } else {
            isValidBaseUri = false;
        }
        baseUri = new UriRef((baseUriString));
        // Get the value of the confidence threshold from the service property set in the Felix console
        Object confidenceObj = dict.get(CONFIDENCE_THRESHOLD);
        if (confidenceObj != null) {
            confidenceThreshold = Double.valueOf(confidenceObj.toString());
        }

    }

    @Deactivate
    protected void deactivate(ComponentContext context) {
        log.info("The Sourcing Admin Service is being deactivated");
    }

    /**
     * Bind digesters used by this component. Adds a digester to an hashmap
     *
     * @param digester
     */
    protected void bindDigesters(RdfDigester digester) {

        log.info("Binding digester " + digester.getName());
        if (!digesters.containsKey(digester.getName())) {
            digesters.put(digester.getName(), digester);
            log.info("Digester " + digester.getName() + " bound");

        } else {
            log.info("Digester " + digester.getName() + " already bound.");
        }

    }

    /**
     * Unbind digesters used by this component. Removes a digester from the hash
     * map.
     *
     * @param digester
     */
    protected void unbindDigesters(RdfDigester digester) {
        if (digesters.containsKey(digester.getName())) {
            digesters.remove(digester.getName());
            log.info("Digester " + digester.getName() + " unbound.");
        }
    }

    /**
     * Bind interlinkers used by this component
     */
    protected void bindInterlinkers(Interlinker interlinker) {
        log.info("Binding interlinker " + interlinker.getName());
        if (!interlinkers.containsKey(interlinker.getName())) {
            interlinkers.put(interlinker.getName(), interlinker);
            log.info("Interlinker " + interlinker.getName() + " bound");

        } else {
            log.info("Interlinker " + interlinker.getName() + " already bound.");
        }

    }

    /**
     * Unbind interlinkers
     */
    protected void unbindInterlinkers(Interlinker interlinker) {

        if (interlinkers.containsKey(interlinker.getName())) {
            interlinkers.remove(interlinker.getName());
            log.info("Interlinker " + interlinker.getName() + " unbound.");
        }

    }

    /**
     * Bind rdfizers used by this component
     */
    protected void bindRdfizers(Rdfizer rdfizer) {
        log.info("Binding rdfizer " + rdfizer.getName());
        if (!rdfizers.containsKey(rdfizer.getName())) {
            rdfizers.put(rdfizer.getName(), rdfizer);
            log.info("Rdfizer " + rdfizer.getName() + " bound");

        } else {
            log.info("Rdfizer " + rdfizer.getName() + " already bound.");
        }

    }

    /**
     * Unbind rdfizers
     */
    protected void unbindRdfizers(Rdfizer rdfizer) {

        if (rdfizers.containsKey(rdfizer.getName())) {
            rdfizers.remove(rdfizer.getName());
            log.info("Rdfizer " + rdfizer.getName() + " unbound.");
        }

    }

    /**
     * This method return an RdfViewable, this is an RDF serviceUri with
     * associated presentational information.
     */
    @GET
    public RdfViewable serviceEntry(@Context final UriInfo uriInfo,
            @QueryParam("url") final UriRef url,
            @HeaderParam("user-agent") String userAgent) {
        //this makes sure this service is not invoked with a trailing slash which would affect
        //relative resolution of links (e.g. css)
        TrailingSlash.enforcePresent(uriInfo);

        final String resourcePath = uriInfo.getAbsolutePath().toString();
        if (url != null) {
            String query = url.toString();
            log.info(query);
        }

        //The URI at which this service was accessed, this will be the 
        //central serviceUri in the response
        final UriRef serviceUri = new UriRef(resourcePath);
        //the in memory graph to which the triples for the response are added
        final MGraph responseGraph = new IndexedMGraph();
        {
            final LockableMGraph dlcGraph = dlcGraphProvider.getDlcGraph();
            Lock rl = dlcGraph.getLock().readLock();
            rl.lock();
            try {
                responseGraph.addAll(dlcGraph);
            } finally {
                rl.unlock();
            }
        }

        // add available digesters 
        Iterator<String> digestersNames = digesters.keySet().iterator();
        while (digestersNames.hasNext()) {
            String digesterName = digestersNames.next();
            responseGraph.add(new TripleImpl(DlcGraphProvider.DATA_LIFECYCLE_GRAPH_REFERENCE, DLC.enhanceService, new UriRef("urn:x-temp:/" + digesterName)));
            responseGraph.add(new TripleImpl(new UriRef("urn:x-temp:/" + digesterName), RDFS.label, new PlainLiteralImpl(digesterName)));
        }

        // add available rdfizers 
        Iterator<String> rdfizersNames = rdfizers.keySet().iterator();
        while (rdfizersNames.hasNext()) {
            String rdfizerName = rdfizersNames.next();
            responseGraph.add(new TripleImpl(DlcGraphProvider.DATA_LIFECYCLE_GRAPH_REFERENCE, DLC.rdfizeService, new UriRef("urn:x-temp:/" + rdfizerName)));
            responseGraph.add(new TripleImpl(new UriRef("urn:x-temp:/" + rdfizerName), RDFS.label, new PlainLiteralImpl(rdfizerName)));
        }

        // add available interlinkers 
        Iterator<String> interlinkersNames = interlinkers.keySet().iterator();
        while (interlinkersNames.hasNext()) {
            String interlinkerName = interlinkersNames.next();
            NonLiteral interlinkerNode = new BNode();
            responseGraph.add(new TripleImpl(DlcGraphProvider.DATA_LIFECYCLE_GRAPH_REFERENCE, DLC.interlinkService, interlinkerNode));
            responseGraph.add(new TripleImpl(interlinkerNode, RDFS.label, new PlainLiteralImpl(interlinkerName)));
        }

        //This GraphNode represents the service within our result graph
        final GraphNode node = new GraphNode(DlcGraphProvider.DATA_LIFECYCLE_GRAPH_REFERENCE, responseGraph);

        // Adds information about base uri configuration
        if (!isValidBaseUri) {
            responseGraph.add(new TripleImpl(DlcGraphProvider.DATA_LIFECYCLE_GRAPH_REFERENCE, RDFS.comment, new PlainLiteralImpl(INVALID_BASE_URI_ALERT)));
        }

        // The DLC service uri (set in component config panel) should be the same as the base uri (otherwise there might be a base uri config error)
        String platformPort = (uriInfo.getBaseUri().getPort() > 0) ? ":" + String.valueOf(uriInfo.getBaseUri().getPort()) : "";
        String platformBaseUri = uriInfo.getBaseUri().getScheme() + "://" + uriInfo.getBaseUri().getHost() + platformPort;
        if (!platformBaseUri.equals((baseUri))) {
            String message = "The DLC service URI " + platformBaseUri + " is different from the base URI " + baseUri + " set in the component configuration.";
            responseGraph.add(new TripleImpl(DlcGraphProvider.DATA_LIFECYCLE_GRAPH_REFERENCE, RDFS.comment, new PlainLiteralImpl(message)));
        }

        for (Task task : tasks) {
            if (task.isActive()) {
                node.addProperty(DLC.activeTask, task.getUri());
                responseGraph.addAll(task.getNode().getGraph());
            }
        }
        //What we return is the GraphNode we created with a template path
        return new RdfViewable("SourcingAdmin", node, SourcingAdmin.class);
    }

    private LockableMGraph getContentGraph() {
        return tcManager.getMGraph(CONTENT_GRAPH_REF);
    }

    /**
     * Creates a new dataset with tasks and product graphs and adds its uri and
     * a label to the data life cycle graph. A graph will contain the RDF data
     * uploaded or sent by a transformation task that have to be processed (text
     * extraction, NLP processing, reconciliation, smushing). The following
     * graphs are created to store the results of the processing tasks
     * enhance.graph interlink.graph smush.graph publish.graph These graphs will
     * be empty at the beginning.
     *
     * @param uriInfo
     * @param graphName
     * @return
     */
    @POST
    @Path("create_pipe")
    @Produces("text/plain")
    public Response createPipeRequest(@Context final UriInfo uriInfo,
            @FormParam("pipe_label") final String pipeLabel) {

        AccessController.checkPermission(new AllPermission());

        // use dataset label as name after validation
        String datasetName = getValidDatasetName(pipeLabel);
        dataSetFactory.createDataSet(datasetName);
        return RedirectUtil.createSeeOtherResponse("./", uriInfo);
    }

    /**
     * Check whether a label can be used as a dataset name. To be a valid name a
     * label must be: 1) not null and at least one character long 2) without
     * white spaces 3) unique (no two dataset can have the same name)
     *
     * @return String
     */
    private String getValidDatasetName(String label) {
        String newDatasetName = null;
        //check validity 
        if (label == null || "".equals(label)) {
            return null;
        }

        // replace white space if present
        newDatasetName = label.replace(' ', '-');

        //check uniqueness of name
        Lock rl = dlcGraphProvider.getDlcGraph().getLock().readLock();
        rl.lock();
        try {
            Iterator<Triple> idatasets = dlcGraphProvider.getDlcGraph().filter(null, RDF.type, DLC.Pipe);
            while (idatasets.hasNext()) {
                GraphNode datasetNode = new GraphNode((UriRef) idatasets.next().getSubject(), dlcGraphProvider.getDlcGraph());
                String datasetName = datasetNode.getLiterals(RDFS.label).next().getLexicalForm();
                if (newDatasetName.equals(datasetName)) {
                    return null;
                }
            }
        } finally {
            rl.unlock();
        }

        return newDatasetName;
    }


    /**
     * Applies one of the following operations to a graph: - add triples
     * (operation code: 1) - remove all triples (operation code: 2) - delete
     * graph (operation code: 3) - reconcile (operation code: 4) - smush
     * (operation code: 5)
     */
    @POST
    @Path("dataUpload")
    @Produces("text/plain")
    public Response dataUpload(@Context final UriInfo uriInfo,
            @FormParam("pipe") final UriRef pipeRef,
            @FormParam("operation_code") final int operationCode,
            @FormParam("data_url") final URL dataUrl,
            @FormParam("rdfizer") final String rdfizer) throws IOException {
        AccessController.checkPermission(new AllPermission());

        // validate arguments and handle all the connection exceptions
        StringWriter stringWriter = new StringWriter();
        PrintWriter messageWriter = new PrintWriter(stringWriter);
        if (pipeExists(pipeRef)) {
            DataSet dataSet = dataSetFactory.getDataSet(pipeRef);
            switch (operationCode) {
                case UPLOAD_RDF:
                    uploadRdf(dataSet, dataUrl, messageWriter);
                    break;
                case UPLOAD_XML:
                    uploadXml(dataSet, dataUrl, rdfizer, messageWriter);
                    break;
            }
        } else {
            messageWriter.println("The dataset does not exist.");
        }
        //return stringWriter.toString();
        return RedirectUtil.createSeeOtherResponse("./", uriInfo);

    }

    /**
     * Uploads RDF files. Each file name must end with .rdf or .ttl,.nt,.n3. An
     * url that does not ends with the mentioned extensions or ends with a slash
     * is supposed to refer to a folder in a local file system or in a remote
     * one (http server).
     *
     * @param dataSet
     * @param dataUrl
     * @param messageWriter
     * @throws IOException
     */
    private void uploadRdf(DataSet dataSet, URL dataUrl, PrintWriter messageWriter) throws IOException {
        String[] fileNameExtensions = {".rdf", ".ttl", ".nt", ".n3"};
        // retrieves the list of file to be uploaded
        ArrayList<String> fileList = FileUtil.getFileList(dataUrl, fileNameExtensions);
        Iterator<String> ifile = fileList.iterator();
        while (ifile.hasNext()) {
            URL fileUrl = new URL(ifile.next());
            URLConnection connection = fileUrl.openConnection();
            String mediaType = guessContentTypeFromUri(fileUrl);
            InputStream stream = connection.getInputStream();
            if (stream != null) {
                parser.parse(dataSet.getSourceGraph(), stream, mediaType);
            }
        }
    }

    /**
     * Uploads XML files. Each file name must end with .xml or .nxml. An url
     * that does not ends with the mentioned extensions or ends with a slash is
     * supposed to refer to a folder in a local file system or in a remote one
     * (http server).
     *
     * @param dataSet
     * @param dataUrl
     * @param rdfizerName
     * @param messageWriter
     */
    private void uploadXml(DataSet dataSet, URL dataUrl, String rdfizerName, PrintWriter messageWriter) throws IOException {
        Rdfizer rdfizer = rdfizers.get(rdfizerName);
        String[] fileNameExtensions = {".xml", ".nxml"};
        // retrieves the list of file to be uploaded
        ArrayList<String> fileList = FileUtil.getFileList(dataUrl, fileNameExtensions);
        Iterator<String> ifile = fileList.iterator();
        while (ifile.hasNext()) {
            URL fileUrl = new URL(ifile.next());
            URLConnection connection = fileUrl.openConnection();
            InputStream stream = connection.getInputStream();
            if (stream != null) {
                dataSet.getSourceGraph().addAll(rdfizer.transform(stream));
            }
        }
    }

    /**
     * Applies one of the following operations to a graph: - add triples
     * (operation code: 1) - remove all triples (operation code: 2) - delete
     * graph (operation code: 3) - reconcile (operation code: 4) - smush
     * (operation code: 5)
     */
    @POST
    @Path("performTask")
    @Produces("text/plain")
    public Response performTaskRequest(@Context final UriInfo uriInfo,
            @FormParam("pipe") final UriRef pipeRef,
            @FormParam("task_code") final int taskCode,
            @FormParam("rdfdigester") final String rdfdigester,
            @FormParam("interlinker") final String interlinker) throws IOException {
        AccessController.checkPermission(new AllPermission());

        // validate arguments and handle all the connection exceptions
        StringWriter stringWriter = new StringWriter();
        PrintWriter messageWriter = new PrintWriter(stringWriter);
        performTask(pipeRef, taskCode, rdfdigester, interlinker, messageWriter);
        //return stringWriter.toString();
        return RedirectUtil.createSeeOtherResponse("./", uriInfo);

    }

    @POST
    @Path("processBatch")
    public Response processBatch(@Context final UriInfo uriInfo,
            @FormParam("dataSet") final UriRef dataSetRef,
            @FormParam("url") final URL url,
            @FormParam("rdfizer") final String rdfizerName,
            @FormParam("digester") final String digester,
            @FormParam("interlinker") final String interlinker,
            @FormParam("maxFiles") @DefaultValue("10") final int maxFiles,
            @FormParam("skipPreviouslyAdded") final String skipPreviouslyAddedValue,
            @FormParam("recurse") final String recurseValue,
            @FormParam("smushAndPublish") final String smushAndPublishValue) throws Exception {
        final boolean skipPreviouslyAdded = "on".equals(skipPreviouslyAddedValue);
        final boolean recurse = "on".equals(recurseValue);
        final boolean smushAndPublish = "on".equals(smushAndPublishValue);
        if (dataSetRef == null) {
            throw new WebApplicationException("Param dataSet must be specified", Response.Status.BAD_REQUEST);
        }

        AccessController.checkPermission(new DlcPermission());
        final DataSet dataSet = dataSetFactory.getDataSet(dataSetRef);
        final Rdfizer rdfizer = rdfizerName.equals("none") ? null : rdfizers.get(rdfizerName);
        Task task = new Task(uriInfo) {

            @Override
            public void execute() {
                try {
                    final int[] count = {0};
                    LinksRetriever.processLinks(url, recurse,
                            new LinksRetriever.LinkProcessor() {
                                public boolean process(URL dataUrl) {

                                    if (skipPreviouslyAdded) {
                                        Lock lock = dataSet.getLogGraph().getLock().readLock();
                                        lock.lock();
                                        try {
                                            if (dataSet.getLogGraph().filter(null,
                                                    DLC.retrievedURI,
                                                    new UriRef((dataUrl.toString()))).hasNext()) {
                                                return true;
                                            }
                                        } finally {
                                            lock.unlock();
                                        }
                                    }
                                    if (isTerminationRequested()) {
                                        return false;
                                    }
                                    if (++count[0] > maxFiles) {
                                        return false;
                                    }
                                    try {
                                        rdfUploadPublish(dataSet, dataUrl, rdfizer, digester, interlinker, smushAndPublish, log);
                                    } catch (Exception e) {
                                        log.println("Exception processing " + dataUrl);
                                        e.printStackTrace(log);
                                    }
                                    return true;
                                }
                            });
                } catch (Exception ex) {
                    ex.printStackTrace(log);
                }
            }

        };
        tasks.add(task);
        task.start();
        return Response.seeOther(new URI(task.getUri().getUnicodeString())).build();
    }

    @POST
    @Path("reprocess")
    public Response reprocess(@Context final UriInfo uriInfo,
            @FormParam("dataSet") final UriRef dataSetRef,
            @FormParam("interlinker") final String interlinkerName) throws Exception {
        if (dataSetRef == null) {
            throw new WebApplicationException("Param dataSet must be specified", Response.Status.BAD_REQUEST);
        }

        AccessController.checkPermission(new DlcPermission());
        final DataSet dataSet = dataSetFactory.getDataSet(dataSetRef);
        final Interlinker interlinker = interlinkerName.equals("none") ? null : interlinkers.get(interlinkerName);
        Task task = new Task(uriInfo) {

            @Override
            public void execute() {
                try {
                    if (interlinker != null) {
                        log.println("Interlinking with: " + interlinker);
                        final TripleCollection dataSetInterlinks = interlinker.interlink(dataSet.getDigestGraph(), dataSet.getDigestGraph());
                        dataSet.getInterlinksGraph().addAll(dataSetInterlinks);
                        log.println("Added " + dataSetInterlinks.size() + " data-set interlinks to " + dataSet.getInterlinksGraphRef().getUnicodeString());
                    } else {
                        log.println("No interlinker selected, proceding.");
                    }
                    // Smush
                    SmushingJob.perform(dataSet, log, baseUri);
                    // Publish
                    publishData(dataSet, log);
                } catch (Exception ex) {
                    ex.printStackTrace(log);
                }
            }

        };
        tasks.add(task);
        task.start();
        return Response.seeOther(new URI(task.getUri().getUnicodeString())).build();
    }

    @GET
    @Path("task/{id}")
    public RdfViewable describeTask(@Context final UriInfo uriInfo) {
        final String resourcePath = uriInfo.getAbsolutePath().toString();
        final UriRef taskUri = new UriRef(resourcePath);
        for (Task task : tasks) {
            if (task.getUri().equals(taskUri)) {
                return new RdfViewable("task", task.getNode(), SourcingAdmin.class);
            }
        }
        throw new WebApplicationException(Response.Status.NOT_FOUND);
    }

    @POST
    @Path("task/{id}")
    public Response actOnTaks(@Context final UriInfo uriInfo, @FormParam("action") String action) throws URISyntaxException {
        final String resourcePath = uriInfo.getAbsolutePath().toString();
        final UriRef taskUri = new UriRef(resourcePath);
        for (Task task : tasks) {
            if (task.getUri().equals(taskUri)) {
                if ("TERMINATE".equalsIgnoreCase(action)) {
                    task.requestTermination();
                    return Response.seeOther(new URI(task.getUri().getUnicodeString())).build();
                }
                throw new WebApplicationException(Response.Status.BAD_REQUEST);
            }
        }
        throw new WebApplicationException(Response.Status.NOT_FOUND);
    }

    /**
     * Performs a task on a dataset: digest, interlink, smush, publish.
     *
     * @param pipeRef
     * @param taskCode
     * @param rdfdigester
     * @param interlinker
     * @param messageWriter
     * @throws IOException
     */
    private void performTask(UriRef pipeRef,
            int taskCode,
            String rdfdigester,
            String interlinker,
            PrintWriter messageWriter) throws IOException {
        AccessController.checkPermission(new AllPermission());
        if (pipeExists(pipeRef)) {
            DataSet dataSet = dataSetFactory.getDataSet((pipeRef));

            switch (taskCode) {
                case TEXT_EXTRACTION:
                    extractTextFromRdf(dataSet, rdfdigester, messageWriter);
                    break;
                case COMPUTE_ENHANCEMENTS:
                    computeEnhancements(dataSet, messageWriter);
                    break;
                case RECONCILE_GRAPH_OPERATION:
                    reconcile(dataSet, interlinker, messageWriter);
                    break;
                case SMUSH_GRAPH_OPERATION:
                    SmushingJob.perform(dataSet, messageWriter, baseUri);
                    break;
                case PUBLISH_DATA:
                    publishData(dataSet, messageWriter);
                    break;
            }
        } else {
            messageWriter.println("The pipe does not exist.");
        }

    }

    @POST
    @Path("runsequence")
    @Produces("text/plain")
    public String runSequence(@Context final UriInfo uriInfo,
            @FormParam("pipe") final UriRef pipeRef,
            @FormParam("sequence_code") final int sequenceCode,
            @FormParam("digester") final String digester,
            @FormParam("interlinker") final String interlinker) throws IOException {

        AccessController.checkPermission(new AllPermission());

        StringWriter stringWriter = new StringWriter();
        PrintWriter messageWriter = new PrintWriter(stringWriter);
        messageWriter.println("Pipe: " + pipeRef.getUnicodeString()
                + " Digester: " + digester + " Interlinker: " + interlinker);

        if (pipeExists(pipeRef)) {
            DataSet dataSet = dataSetFactory.getDataSet(pipeRef);
            performAllTasks(dataSet, digester, interlinker, messageWriter);

        } else {
            messageWriter.println("The dataset does not exist.");
        }

        return stringWriter.toString();

    }

    /**
     * Uploads and transforms Patent or PubMed XML data into RDF.
     *
     * @param dataUrl
     * @param rdfizer
     * @return
     */
    private MGraph transformXml(DataSet dataSet, URL dataUrl, Rdfizer rdfizer, PrintWriter messageWriter) throws IOException {
        AccessController.checkPermission(new AllPermission());

        // create a graph to store the data after the document transformation        
        MGraph documentGraph = null;

        InputStream xmldata = null;

        if (isValidUrl(dataUrl)) {

            try {
                URLConnection connection = dataUrl.openConnection();
                connection.addRequestProperty("Accept", "application/xml; q=1");
                xmldata = connection.getInputStream();
            } catch (FileNotFoundException ex) {
                messageWriter.println("The file " + dataUrl.toString() + " has not been found.");
                throw ex;
            }
        } else {
            messageWriter.println("The URL " + dataUrl.toString() + " is not a valid one.\n");
        }

        int numberOfTriples = 0;

        if (xmldata != null) {
            documentGraph = rdfizer.transform(xmldata);
            numberOfTriples = documentGraph.size();

        }

        if (documentGraph != null && numberOfTriples > 0) {
            // add the triples of the document graph to the source graph of the selected dataset
            Lock wl = dataSet.getSourceGraph().getLock().writeLock();
            wl.lock();
            try {
                dataSet.getSourceGraph().addAll(documentGraph);
            } finally {
                wl.unlock();
            }
            messageWriter.println("Added " + numberOfTriples + " triples from " + dataUrl + " to " + dataSet.getSourceGraphRef().getUnicodeString());
        }
        return documentGraph;

    }

    /**
     * Load RDF data into an existing graph from a URL (schemes: "file://" or
     * "http://"). The arguments to be passed are: 1) graph in which the RDF
     * data must be stored 2) url of the dataset After the upload the input
     * graph is sent to a digester to extract text for indexing and adding
     * entities found by NLP components (in the default chain) as subject
     *
     * @return the added triples
     */
    private MGraph addTriples(DataSet dataSet, URL dataUrl, PrintWriter messageWriter) throws IOException {
        AccessController.checkPermission(new AllPermission());

        // add the triples of the temporary graph into the graph selected by the user
        if (isValidUrl(dataUrl)) {

            MGraph updatedGraph = addTriplesCommand(dataSet.getSourceGraph(), dataUrl);

            messageWriter.println("Added " + updatedGraph.size() + " triples from " + dataUrl + " to " + dataSet.getSourceGraphRef().getUnicodeString());
            return updatedGraph;

        } else {
            messageWriter.println("The URL of the data is not a valid one.");
            throw new RuntimeException("Invalid URL; " + dataUrl);
        }

    }

    /**
     *
     * Add triples to graph
     */
    private MGraph addTriplesCommand(LockableMGraph targetGraph, URL dataUrl) throws IOException {
        AccessController.checkPermission(new AllPermission());

        URLConnection connection = dataUrl.openConnection();
        connection.addRequestProperty("Accept", "application/rdf+xml; q=.9, text/turte;q=1");

        // create a temporary graph to store the data        
        SimpleMGraph tempGraph = new SimpleMGraph();
        String mediaType = connection.getHeaderField("Content-type");
        if ((mediaType == null) || mediaType.equals("application/octet-stream")) {
            mediaType = guessContentTypeFromUri(dataUrl);
        }
        InputStream data = connection.getInputStream();
        if (data != null) {
            parser.parse(tempGraph, data, mediaType);
            targetGraph.addAll(tempGraph);
        }
        return tempGraph;
    }


    /**
     * Reconciles a source graph against itself and against the content graph.
     * The result of the reconciliation is an equivalence set stored in the
     * interlink graph of the pipe.
     *
     * @param sourceGraphRef the URI of the referenced graph, i.e. the graph for
     * which the reconciliation should be performed.
     * @return String
     */
    private void reconcile(DataSet dataSet, String selectedInterlinker, PrintWriter messageWriter) {

        if (dataSet.getSourceGraph().size() > 0) {

            // size of interlink graph before reconciliations
            int interlinkGraphInitSize = dataSet.getInterlinksGraph().size();

            // reconcile the source graph against itself 
            reconcileCommand(dataSet, dataSet.getSourceGraphRef(), dataSet.getSourceGraphRef(), selectedInterlinker);

            // size of interlink graph after reconciliation of source graph against itself 
            int interlinkSourceGraphSize = dataSet.getInterlinksGraph().size();

            // new interlinks within source graph
            int numSourceInterlinks = interlinkSourceGraphSize - interlinkGraphInitSize;

            if (numSourceInterlinks > 0) {

                messageWriter.println("A reconciliation task has been done on " + dataSet.getSourceGraphRef().getUnicodeString() + "\n"
                        + numSourceInterlinks + " owl:sameAs statements have been created.");
            } else {
                messageWriter.println("A reconciliation task has been done on " + dataSet.getSourceGraphRef().getUnicodeString()
                        + ". No equivalent entities have been found.");
            }

            // reconcile the source graph against the content graph 
            if (getContentGraph().size() > 0) {

                reconcileCommand(dataSet, dataSet.getSourceGraphRef(), CONTENT_GRAPH_REF, selectedInterlinker);

                // size of interlink graph after reconciliation of source graph against content graph 
                int interlinkContentGraphSize = dataSet.getInterlinksGraph().size();

                // new interlinks with content graph
                int numContentInterlinks = interlinkContentGraphSize - interlinkSourceGraphSize;

                if (numContentInterlinks > 0) {

                    messageWriter.println("A reconciliation task has been done between " + dataSet.getSourceGraphRef().getUnicodeString() + " and " + CONTENT_GRAPH_NAME + "\n"
                            + numContentInterlinks + " owl:sameAs statements have been created.");
                } else {
                    messageWriter.println("A reconciliation task has been done between " + dataSet.getSourceGraphRef().getUnicodeString() + " and " + CONTENT_GRAPH_NAME + "\n"
                            + ". No equivalent entities have been found.");
                }
            }

        } else {
            messageWriter.println("The source graph does not exist or is empty.");
        }

    }

    /**
     * Reconciles a source graph with a target graph. The result of the
     * reconciliation is an equivalence set stored in the interlink graph of the
     * pipe. The graph used as source is the source rdf graph.
     */
    private void reconcileCommand(DataSet dataSet, UriRef sourceGraphRef, UriRef targetGraphRef, String selectedInterlinker) {

        if (graphExists(sourceGraphRef)) {

            // Get the source graph from the triple store
            LockableMGraph sourceGraph = dataSet.getSourceGraph();
            // reconcile the source graph with the target graph 
            Interlinker interlinker = interlinkers.get(selectedInterlinker);
            TripleCollection owlSameAs = interlinker.interlink(sourceGraph, targetGraphRef);

            if (owlSameAs.size() > 0) {

                LockableMGraph sameAsGraph = dataSet.getInterlinksGraph();
                sameAsGraph.addAll(owlSameAs);
                // add a reference of the equivalence set to the source graph 
                dlcGraphProvider.getDlcGraph().add(new TripleImpl(dataSet.getInterlinksGraphRef(), DLC.subjectsTarget, sourceGraphRef));
                // add a reference of the equivalence set to the target graph                
                dlcGraphProvider.getDlcGraph().add(new TripleImpl(dataSet.getInterlinksGraphRef(), DLC.objectsTarget, targetGraphRef));

            }
        }

    }


    /**
     * Extract text from dcterms:title and dcterms:abstract fields in the source
     * graph and adds a sioc:content property with that text in the enhance
     * graph. The text is used by the ECS for indexing. The keywords will be
     * related to the resource so that it could be retrieved.
     *
     * @return
     */
    private void extractTextFromRdf(DataSet dataSet, String selectedDigester, PrintWriter messageWriter) {
        RdfDigester digester = digesters.get(selectedDigester);
        MGraph tempGraph = new IndexedMGraph();
        LockableMGraph sourceGraph = dataSet.getSourceGraph();
        Lock rl = sourceGraph.getLock().readLock();
        rl.lock();
        try {
            tempGraph.addAll(sourceGraph);
        } finally {
            rl.unlock();
        }

        digester.extractText(tempGraph);
        tempGraph.removeAll(sourceGraph);
        dataSet.getDigestGraph().addAll(tempGraph);
        messageWriter.println("Extracted text from " + dataSet.getDigestGraphRef().getUnicodeString() + " by " + selectedDigester + " digester");

    }

    /**
     * Sends the digested content to the default chain to compute enhancements
     * them stores them in the dataset's enhancements graph
     *
     * @param dataSet
     * @param messageWriter
     */
    private void computeEnhancements(DataSet dataSet, PrintWriter messageWriter) {
        LockableMGraph digestGraph = dataSet.getDigestGraph();
        computeEnhancements(digestGraph, dataSet.getEnhancementsGraph(), messageWriter);
    }

    private void computeEnhancements(LockableMGraph sourceGraph, MGraph targetGraph, PrintWriter messageWriter) {

        Lock digestLock = sourceGraph.getLock().readLock();
        digestLock.lock();
        try {
            Iterator<Triple> isiocStmt = sourceGraph.filter(null, SIOC.content, null);
            while (isiocStmt.hasNext()) {
                Triple stmt = isiocStmt.next();
                UriRef itemRef = (UriRef) stmt.getSubject();
                String content = ((PlainLiteralImpl) stmt.getObject()).getLexicalForm();
                if (!"".equals(content) && content != null) {
                    try {
                        enhance(targetGraph, content, itemRef);
                    } catch (IOException e) {
                        throw new RuntimeException();
                    } catch (EnhancementException e) {
                        e.printStackTrace();
                    }
                }
            }
        } finally {
            digestLock.unlock();
        }

    }

    /**
     * Add dc:subject properties to an item pointing to entities which are
     * assumed to be related to content item. This method uses the
     * enhancementJobManager to extract related entities using NLP engines
     * available in the default chain. The node uri is also the uri of the
     * content item so that the enhancements will be referred that node. Each
     * enhancement found with a confidence value above a threshold is then added
     * as a dc:subject to the node
     */
    private void enhance(MGraph targetGraph, String content, UriRef itemRef) throws IOException, EnhancementException {
        final ContentSource contentSource = new ByteArraySource(
                content.getBytes(), "text/plain");
        final ContentItem contentItem = contentItemFactory.createContentItem(
                itemRef, contentSource);
        enhancementJobManager.enhanceContent(contentItem);
        // this contains the enhancement results
        final MGraph contentMetadata = contentItem.getMetadata();
        addSubjects(targetGraph, itemRef, contentMetadata);
    }

    /**
     * Add dc:subject property to items pointing to entities extracted by NLP
     * engines in the default chain. Given a node and a TripleCollection
     * containing fise:Enhancements about that node dc:subject properties are
     * added to an item pointing to entities referenced by those enhancements if
     * the enhancement confidence value is above a threshold.
     *
     * @param node
     * @param metadata
     */
    private void addSubjects(MGraph targetGraph, UriRef itemRef, TripleCollection metadata) {
        final GraphNode enhancementType = new GraphNode(TechnicalClasses.ENHANCER_ENHANCEMENT, metadata);
        final Set<UriRef> entities = new HashSet<UriRef>();
        // get all the enhancements
        final Iterator<GraphNode> enhancements = enhancementType.getSubjectNodes(RDF.type);
        while (enhancements.hasNext()) {
            final GraphNode enhhancement = enhancements.next();
            final Iterator<Literal> confidenceLiterals = enhhancement.getLiterals(org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE);
            //look the confidence value for each enhancement
            double enhancementConfidence = confidenceLiterals.hasNext() ? 
                    LiteralFactory.getInstance().createObject(Double.class,
                    (TypedLiteral) confidenceLiterals.next())  : 1;
            if (enhancementConfidence >= confidenceThreshold) {
                // get entities referenced in the enhancement 
                final Iterator<Resource> referencedEntities = enhhancement.getObjects(org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE);
                while (referencedEntities.hasNext()) {
                    final UriRef entity = (UriRef) referencedEntities.next();
                    // Add dc:subject to the patent for each referenced entity
                    targetGraph.add(new TripleImpl(itemRef, DC.subject, entity));
                    entities.add(entity);
                }
            }

        }
        for (UriRef uriRef : entities) {
            // We don't get the entity description directly from metadata
            // as the context there would include
            addResourceDescription(uriRef, targetGraph);
        }
    }

    /**
     * Add a description of the entities extracted from the text by NLP engines
     * in the default chain
     */
    private void addResourceDescription(UriRef iri, MGraph mGraph) {
        final Entity entity = siteManager.getEntity(iri.getUnicodeString());
        if (entity != null) {
            final RdfValueFactory valueFactory = new RdfValueFactory(mGraph);
            final Representation representation = entity.getRepresentation();
            if (representation != null) {
                valueFactory.toRdfRepresentation(representation);
            }
        }
    }

    /**
     * Moves data from smush.grah to content.graph. The triples (facts) in the
     * two graphs must be coherent, i.e. the same. Before publishing the current
     * smushed data must be compared with the last published data. New triples
     * in the smushed graph not in the published graph must be added while
     * triples in the published graph absent in the smushed graph must be
     * removed. The algorithm is as follows 1) make all URIs in smush.graph http
     * dereferencable (uri canonicalization) 2) find triples in smush.graph not
     * in publish.graph (new triples) 3) find triples in publish.graph not in
     * smush.graph (old triples) 4) add new triples to content.graph 5) remove
     * old triples from content.graph 6) delete all triples in publish.graph 7)
     * copy triples from smush.graph to publish.graph
     */
    private void publishData(DataSet dataSet, PrintWriter messageWriter) {

        // add these triples to the content.graph 
        MGraph triplesToAdd = new SimpleMGraph();
        // remove these triples from the content.graph
        MGraph triplesToRemove = new SimpleMGraph();

        // triples to add to the content.graph
        Lock ls = dataSet.getSmushGraph().getLock().readLock();
        ls.lock();
        try {

            Iterator<Triple> ismush = dataSet.getSmushGraph().iterator();
            while (ismush.hasNext()) {
                Triple smushTriple = ismush.next();
                if (!dataSet.getPublishGraph().contains(smushTriple)) {
                    triplesToAdd.add(smushTriple);
                }

            }
        } finally {
            ls.unlock();
        }

        // triples to remove from the content.graph
        Lock lp = dataSet.getPublishGraph().getLock().readLock();
        lp.lock();
        try {
            Iterator<Triple> ipublish = dataSet.getPublishGraph().iterator();
            while (ipublish.hasNext()) {
                Triple publishTriple = ipublish.next();
                if (!dataSet.getSmushGraph().contains(publishTriple)) {
                    triplesToRemove.add(publishTriple);
                }

            }
        } finally {
            lp.unlock();
        }

        if (triplesToRemove.size() > 0) {
            getContentGraph().removeAll(triplesToRemove);
            log.info("Removed " + triplesToRemove.size() + " triples from " + CONTENT_GRAPH_REF.getUnicodeString());
        } else {
            log.info("No triples to remove from " + CONTENT_GRAPH_REF.getUnicodeString());
        }
        if (triplesToAdd.size() > 0) {
            getContentGraph().addAll(triplesToAdd);
            log.info("Added " + triplesToAdd.size() + " triples to " + CONTENT_GRAPH_REF.getUnicodeString());
        } else {
            log.info("No triples to add to " + CONTENT_GRAPH_REF.getUnicodeString());
        }

        dataSet.getPublishGraph().clear();

        Lock rl = dataSet.getSmushGraph().getLock().readLock();
        rl.lock();
        try {
            dataSet.getPublishGraph().addAll(dataSet.getSmushGraph());
        } finally {
            rl.unlock();
        }

        // update the dataset status to published in the dlc meta graph
        updateDatasetStatus(dataSet);

        messageWriter.println("Copied " + triplesToAdd.size() + " triples from " + dataSet.getUri() + " to content-graph");

    }

    /**
     * Updates the dataset status to published in the dlc meta graph
     *
     * @param datasetName
     */
    private void updateDatasetStatus(DataSet dataSet) {
        UriRef statusRef = new UriRef(dataSet.getUri().getUnicodeString() + "/Status");
        dlcGraphProvider.getDlcGraph().remove(new TripleImpl(statusRef, RDF.type, DLC.Unpublished));
        dlcGraphProvider.getDlcGraph().remove(new TripleImpl(statusRef, RDFS.label, new PlainLiteralImpl("Unpublished")));
        dlcGraphProvider.getDlcGraph().add(new TripleImpl(statusRef, RDF.type, DLC.Published));
        dlcGraphProvider.getDlcGraph().add(new TripleImpl(statusRef, RDFS.label, new PlainLiteralImpl("Published")));
    }

    /**
     * Performs the following tasks in sequence - Enhance - Interlink - Smush -
     * Publish
     *
     * @param pipeRef
     * @param digester
     * @param interlinker
     * @param mediaType
     * @return
     */
    private void performAllTasks(DataSet dataSet, String digesterName, String interlinkerName, PrintWriter messageWriter) throws IOException {
        // Digest RDF data
        extractTextFromRdf(dataSet, digesterName, messageWriter);
        //compute enhacements
        computeEnhancements(dataSet, messageWriter);
        // Interlink (against itself and content.graph)
        reconcile(dataSet, interlinkerName, messageWriter);
        // Smush
        SmushingJob.perform(dataSet, messageWriter, baseUri);
        // Publish
        publishData(dataSet, messageWriter);

    }

    /**
     * Performs the following tasks in sequence - RDF data upload - Enhance -
     * Interlink - Smush - Publish
     *
     * @param pipeRef
     * @param dataUrl
     * @param digester
     * @param interlinker
     * @param mediaType
     * @return
     */
    private void rdfUploadPublish(DataSet dataSet, URL dataUrl, Rdfizer rdfizer, String digesterName, String interlinkerName, boolean smushAndPublish, PrintWriter messageWriter) throws IOException {

        // Transform to RDF
        TripleCollection addedTriples = rdfizer == null
                ? addTriples(dataSet, dataUrl, messageWriter)
                : transformXml(dataSet, dataUrl, rdfizer, messageWriter);

        // Digest. Add sioc:content and dc:subject predicates
        LockableMGraph digestedTriples = new LockableMGraphWrapper(new IndexedMGraph());
        digestedTriples.addAll(addedTriples);
        RdfDigester digester = digesters.get(digesterName);
        digester.extractText(digestedTriples);
        dataSet.getDigestGraph().addAll(digestedTriples);
        messageWriter.println("Added " + digestedTriples.size() + " digested triples to " + dataSet.getDigestGraphRef().getUnicodeString());
        MGraph enhancedTriples = new IndexedMGraph();
        computeEnhancements(digestedTriples, enhancedTriples, messageWriter);
        dataSet.getEnhancementsGraph().addAll(enhancedTriples);
        messageWriter.println("Added " + enhancedTriples.size() + " enahnced triples to " + dataSet.getEnhancementsGraphRef().getUnicodeString());
        // Interlink (self)
        if (!interlinkerName.equals("none")) {
            Interlinker interlinker = interlinkers.get(interlinkerName);
            final TripleCollection dataSetInterlinks = interlinker.interlink(digestedTriples, dataSet.getDigestGraphRef());
            dataSet.getInterlinksGraph().addAll(dataSetInterlinks);
            messageWriter.println("Added " + dataSetInterlinks.size() + " data-set interlinks to " + dataSet.getInterlinksGraphRef().getUnicodeString());
            // Interlink (content.graph)
            final TripleCollection contentGraphInterlinks = interlinker.interlink(digestedTriples, CONTENT_GRAPH_REF);
            dataSet.getInterlinksGraph().addAll(contentGraphInterlinks);
            messageWriter.println("Added " + contentGraphInterlinks.size() + " content-graph interlinks to " + dataSet.getInterlinksGraphRef().getUnicodeString());
        }
        if (smushAndPublish) {
            // Smush
            SmushingJob.perform(dataSet, messageWriter, baseUri);
            // Publish
            publishData(dataSet, messageWriter);
        }

        GraphNode logEntry = new GraphNode(new BNode(), dataSet.getLogGraph());
        logEntry.addProperty(RDF.type, DLC.LogEntry);
        logEntry.addProperty(DLC.retrievedURI, new UriRef(dataUrl.toString()));

    }

    

    /**
     * Validate URL A valid URL must start with file:/// or http://
     */
    private boolean isValidUrl(URL url) {
        boolean isValidUrl = false;
        if (url != null) {
            if (url.toString().startsWith("http://") || url.toString().startsWith("file:/")) {
                isValidUrl = true;
            }
        }

        return isValidUrl;
    }

    /**
     * Extracts the content type from the file extension
     *
     * @param url
     * @return
     */
    private String guessContentTypeFromUri(URL url) {
        String contentType = null;
        if (url.getFile().endsWith("ttl")) {
            contentType = "text/turtle";
        } else if (url.getFile().endsWith("nt")) {
            contentType = "text/rdf+nt";
        } else if (url.getFile().endsWith("n3")) {
            contentType = "text/rdf+n3";
        } else if (url.getFile().endsWith("rdf")) {
            contentType = "application/rdf+xml";
        } else if (url.getFile().endsWith("xml")) {
            contentType = "application/xml";
        }
        return contentType;
    }

    

    /**
     * Checks if a graph exists and returns a boolean value. true if graph exist
     * false if graph does not exist
     *
     * @param graph_ref
     * @return
     */
    private boolean graphExists(UriRef graph_ref) {
        Set<UriRef> graphs = tcManager.listMGraphs();
        Iterator<UriRef> igraphs = graphs.iterator();
        while (igraphs.hasNext()) {
            UriRef graphRef = igraphs.next();
            if (graph_ref.toString().equals(graphRef.toString())) {
                return true;
            }
        }

        return false;

    }

    /**
     * Checks whether a pipe exists
     */
    private boolean pipeExists(UriRef pipeRef) {
        boolean result = false;

        if (pipeRef != null) {
            GraphNode pipeNode = new GraphNode(pipeRef, dlcGraphProvider.getDlcGraph());
            if (pipeNode != null) {
                result = true;
            }
        }

        return result;

    }

   

}