/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
licenses@blazegraph.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package com.bigdata.rdf.sail.webapp;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.util.Arrays;
import java.util.Vector;
import java.util.concurrent.atomic.AtomicLong;
import java.util.zip.GZIPInputStream;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.log4j.Logger;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.impl.URIImpl;
import org.openrdf.rio.RDFFormat;
import org.openrdf.rio.RDFHandlerException;
import org.openrdf.rio.RDFParser;
import org.openrdf.rio.RDFParserFactory;
import org.openrdf.rio.RDFParserRegistry;
import org.openrdf.rio.helpers.RDFHandlerBase;
import org.openrdf.sail.SailException;
import com.bigdata.journal.ITx;
import com.bigdata.rdf.sail.BigdataSail.BigdataSailConnection;
import com.bigdata.rdf.sail.BigdataSailRepositoryConnection;
import com.bigdata.rdf.sail.webapp.client.MiniMime;
import com.bigdata.rdf.sparql.ast.eval.AST2BOpUpdate;
/**
* Handler for INSERT operations.
*
* @author martyncutcher
*/
public class InsertServlet extends BigdataRDFServlet {
/**
*
*/
private static final long serialVersionUID = 1L;
static private final transient Logger log = Logger.getLogger(InsertServlet.class);
public InsertServlet() {
}
/**
* <p>
* Perform an HTTP-POST, which corresponds to the basic CRUD operation
* "create" according to the generic interaction semantics of HTTP REST. The
* operation will be executed against the target namespace per the URI.
* </p>
*
* <pre>
* POST [/namespace/NAMESPACE]
* ...
* Content-Type:
* ...
*
* BODY
* </pre>
* <p>
* Where <code>BODY</code> is the new RDF content using the representation
* indicated by the <code>Content-Type</code>.
* </p>
* <p>
* -OR-
* </p>
*
* <pre>
* POST [/namespace/NAMESPACE] ?uri=URL
* </pre>
* <p>
* Where <code>URI</code> identifies a resource whose RDF content will be
* inserted into the database. The <code>uri</code> query parameter may
* occur multiple times. All identified resources will be loaded within a
* single native transaction. Bigdata provides snapshot isolation so you can
* continue to execute queries against the last commit point while this
* operation is executed.
* </p>
*/
@Override
protected void doPost(HttpServletRequest req, HttpServletResponse resp)
throws IOException {
if (!isWritable(getServletContext(), req, resp)) {
// Service must be writable.
return;
}
if (req.getParameter(BigdataRDFContext.URI) != null) {
doPostWithURIs(req, resp);
return;
} else {
doPostWithBody(req, resp);
return;
}
}
/**
* POST with request body containing statements to be inserted.
*
* @param req
* The request.
*
* @return The response.
*
* @throws Exception
*/
private void doPostWithBody(final HttpServletRequest req,
final HttpServletResponse resp) throws IOException {
final String baseURI = req.getRequestURL().toString();
final String contentType = req.getContentType();
if (contentType == null)
buildAndCommitResponse(resp, HTTP_BADREQUEST, MIME_TEXT_PLAIN,
"Content-Type not specified.");
if (log.isInfoEnabled())
log.info("Request body: " + contentType);
/**
* <a href="https://sourceforge.net/apps/trac/bigdata/ticket/620">
* UpdateServlet fails to parse MIMEType when doing conneg. </a>
*/
final String mimeTypeStr = new MiniMime(contentType).getMimeType();
final RDFFormat format = RDFFormat.forMIMEType(mimeTypeStr);
if (format == null) {
buildAndCommitResponse(resp, HTTP_BADREQUEST, MIME_TEXT_PLAIN,
"Content-Type not recognized as RDF: " + contentType);
return;
}
if (log.isInfoEnabled())
log.info("RDFFormat=" + format);
final RDFParserFactory rdfParserFactory = RDFParserRegistry
.getInstance().get(format);
if (rdfParserFactory == null) {
buildAndCommitResponse(resp, HTTP_INTERNALERROR, MIME_TEXT_PLAIN,
"Parser factory not found: Content-Type=" + contentType
+ ", format=" + format);
return;
}
/*
* Allow the caller to specify the default contexts.
*/
final Resource[] defaultContext;
{
final String[] s = req.getParameterValues(BigdataRDFContext.CONTEXT_URI);
if (s != null && s.length > 0) {
try {
defaultContext = toURIs(s);
} catch (IllegalArgumentException ex) {
buildAndCommitResponse(resp, HTTP_INTERNALERROR, MIME_TEXT_PLAIN,
ex.getLocalizedMessage());
return;
}
} else {
defaultContext = new Resource[0];
}
}
try {
submitApiTask(
new InsertWithBodyTask(req, resp, getNamespace(req),
ITx.UNISOLATED, baseURI, defaultContext,
rdfParserFactory)).get();
} catch (Throwable t) {
BigdataRDFServlet.launderThrowable(t, resp,
"INSERT-WITH-BODY: baseURI=" + baseURI + ", Content-Type="
+ contentType + ", " + BigdataRDFContext.CONTEXT_URI + "="
+ Arrays.toString(defaultContext));
}
}
/**
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan
* Thompson</a>
*
* TODO #1056 (Add ability to set RIO options to REST API and workbench)
*/
private static class InsertWithBodyTask extends AbstractRestApiTask<Void> {
private final String baseURI;
private final Resource[] defaultContext;
private final RDFParserFactory rdfParserFactory;
/**
*
* @param namespace
* The namespace of the target KB instance.
* @param timestamp
* The timestamp used to obtain a mutable connection.
* @param baseURI
* The base URI for the operation.
* @param defaultContext
* The context(s) for triples without an explicit named graph
* when the KB instance is operating in a quads mode.
* @param rdfParserFactory
* The factory for the {@link RDFParser}. This should have
* been chosen based on the caller's knowledge of the
* appropriate content type.
*/
public InsertWithBodyTask(final HttpServletRequest req,
final HttpServletResponse resp,
final String namespace, final long timestamp,
final String baseURI, final Resource[] defaultContext,
final RDFParserFactory rdfParserFactory) {
super(req, resp, namespace, timestamp);
this.baseURI = baseURI;
this.defaultContext = defaultContext;
this.rdfParserFactory = rdfParserFactory;
}
@Override
public boolean isReadOnly() {
return false;
}
@Override
public Void call() throws Exception {
final long begin = System.currentTimeMillis();
final AtomicLong nmodified = new AtomicLong(0L);
BigdataSailRepositoryConnection conn = null;
boolean success = false;
try {
conn = getConnection();
/**
* There is a request body, so let's try and parse it.
*
* FIXME This does not handle .gz or .zip files. We handle this
* in the
*
* @see <a href="http://trac.blazegraph.com/ticket/991" >REST API:
* INSERT does not handle .gz</a>
*/
final RDFParser rdfParser = rdfParserFactory.getParser();
rdfParser.setValueFactory(conn.getTripleStore()
.getValueFactory());
rdfParser.setVerifyData(true);
rdfParser.setStopAtFirstError(true);
rdfParser
.setDatatypeHandling(RDFParser.DatatypeHandling.IGNORE);
rdfParser.setRDFHandler(new AddStatementHandler(conn
.getSailConnection(), nmodified, defaultContext));
/*
* Run the parser, which will cause statements to be inserted.
*/
rdfParser.parse(req.getInputStream(), baseURI);
// Commit the mutation.
conn.commit();
success = true;
final long elapsed = System.currentTimeMillis() - begin;
reportModifiedCount(nmodified.get(), elapsed);
return (Void) null;
} finally {
if (conn != null) {
if (!success)
conn.rollback();
conn.close();
}
}
}
}
/**
* POST with URIs of resources to be inserted (loads the referenced
* resources).
*
* @param req
* The request.
*
* @return The response.
*
* @throws Exception
*/
private void doPostWithURIs(final HttpServletRequest req,
final HttpServletResponse resp) throws IOException {
final String namespace = getNamespace(req);
final String[] uris = req.getParameterValues(BigdataRDFContext.URI);
if (uris == null || uris.length == 0) {
buildAndCommitResponse(resp, HttpServletResponse.SC_BAD_REQUEST,
MIME_TEXT_PLAIN,
"Parameter must be specified one or more times: '"
+ BigdataRDFContext.URI + "'");
return;
}
if (log.isInfoEnabled())
log.info("URIs: " + Arrays.toString(uris));
// Before we do anything, make sure we have valid URLs.
final Vector<URL> urls = new Vector<URL>(uris.length);
for (String uri : uris) {
urls.add(new URL(uri));
}
/*
* Allow the caller to specify the default contexts.
*/
final Resource[] defaultContext;
{
final String[] s = req.getParameterValues(BigdataRDFContext.CONTEXT_URI);
if (s != null && s.length > 0) {
try {
defaultContext = toURIs(s);
} catch (IllegalArgumentException ex) {
buildAndCommitResponse(resp, HTTP_INTERNALERROR, MIME_TEXT_PLAIN,
ex.getLocalizedMessage());
return;
}
} else {
defaultContext = new Resource[0];
}
}
try {
submitApiTask(
new InsertWithURLsTask(req, resp, namespace,
ITx.UNISOLATED, defaultContext, urls)).get();
} catch (Throwable t) {
launderThrowable(
t,
resp,
BigdataRDFContext.URI + "=" + urls + ", "
+ BigdataRDFContext.CONTEXT_URI + "="
+ Arrays.toString(defaultContext));
}
}
private static class InsertWithURLsTask extends AbstractRestApiTask<Void> {
private final Vector<URL> urls;
private final Resource[] defaultContext;
/**
*
* @param namespace
* The namespace of the target KB instance.
* @param timestamp
* The timestamp used to obtain a mutable connection.
* @param baseURI
* The base URI for the operation.
* @param defaultContext
* The context(s) for triples without an explicit named graph
* when the KB instance is operating in a quads mode.
* @param urls
* The {@link URL}s whose contents will be parsed and loaded
* into the target KB.
*/
public InsertWithURLsTask(final HttpServletRequest req,
final HttpServletResponse resp, final String namespace,
final long timestamp, final Resource[] defaultContext,
final Vector<URL> urls) {
super(req, resp, namespace, timestamp);
this.urls = urls;
this.defaultContext = defaultContext;
}
@Override
public boolean isReadOnly() {
return false;
}
@Override
public Void call() throws Exception {
final long begin = System.currentTimeMillis();
BigdataSailRepositoryConnection conn = null;
boolean success = false;
try {
conn = getConnection();
final AtomicLong nmodified = new AtomicLong(0L);
for (URL url : urls) {
// Use the default context if one was given and otherwise
// the URI from which the data are being read.
// final Resource defactoContext = defaultContext == null ? new URIImpl(
// url.toExternalForm()) : defaultContext;
final Resource[] defactoContext = defaultContext.length == 0 ? new Resource[] { new URIImpl(
url.toExternalForm()) } : defaultContext;
URLConnection hconn = null;
try {
hconn = url.openConnection();
if (hconn instanceof HttpURLConnection) {
((HttpURLConnection) hconn).setRequestMethod("GET");
}
hconn.setDoInput(true);
hconn.setDoOutput(false);
hconn.setReadTimeout(0);// no timeout? http param?
/**
* There is a request body, so let's try and parse it.
*
* @see <a href=
* "https://sourceforge.net/apps/trac/bigdata/ticket/620"
* > UpdateServlet fails to parse MIMEType when
* doing conneg. </a>
*
* FIXME This does not handle .gz or .zip files. We
* handle this in the
*
* @see <a href="http://trac.blazegraph.com/ticket/991"
* >REST API: INSERT does not handle .gz</a>
*/
final String contentType = hconn.getContentType();
RDFFormat format = RDFFormat.forMIMEType(new MiniMime(
contentType).getMimeType());
final String fileName = url.getPath();
if (format == null) {
/*
* Try to get the RDFFormat from the URL's file
* path.
*/
//BLZG-1929
format = AST2BOpUpdate.rdfFormatForFile (fileName);
}
if (format == null) {
throw new HttpOperationException(HTTP_BADREQUEST,
MIME_TEXT_PLAIN,
"Content-Type not recognized as RDF: "
+ contentType);
}
final RDFParserFactory rdfParserFactory = RDFParserRegistry
.getInstance().get(format);
if (rdfParserFactory == null) {
throw new HttpOperationException(HTTP_INTERNALERROR,
MIME_TEXT_PLAIN,
"Parser not found: Content-Type="
+ contentType);
}
final RDFParser rdfParser = rdfParserFactory
.getParser();
rdfParser.setValueFactory(conn.getTripleStore()
.getValueFactory());
rdfParser.setVerifyData(true);
rdfParser.setStopAtFirstError(true);
rdfParser
.setDatatypeHandling(RDFParser.DatatypeHandling.IGNORE);
rdfParser
.setRDFHandler(new AddStatementHandler(conn
.getSailConnection(), nmodified,
defactoContext));
/*
* Run the parser, which will cause statements to be
* inserted.
*/
InputStream is = hconn.getInputStream();
if(fileName.endsWith(".gz")) {
is = new GZIPInputStream(hconn.getInputStream());
}
rdfParser.parse(is,
url.toExternalForm()/* baseURL */);
} finally {
if (hconn instanceof HttpURLConnection) {
/*
* Disconnect, but only after we have loaded all the
* URLs. Disconnect is optional for java.net. It is
* a hint that you will not be accessing more
* resources on the connected host. By disconnecting
* only after all resources have been loaded we are
* basically assuming that people are more likely to
* load from a single host.
*/
((HttpURLConnection) hconn).disconnect();
}
}
} // next URI.
// Commit the mutation.
conn.commit();
success = true;
final long elapsed = System.currentTimeMillis() - begin;
reportModifiedCount(nmodified.get(), elapsed);
return null;
} finally {
if (conn != null) {
if (!success)
conn.rollback();
conn.close();
}
}
}
}
/**
* Helper class adds statements to the sail as they are visited by a parser.
*/
static class AddStatementHandler extends RDFHandlerBase {
private final BigdataSailConnection conn;
private final AtomicLong nmodified;
private final Resource[] defaultContext;
/**
*
* @param conn
* @param nmodified
* @param defaultContexts
* Only used if the statements themselves do not have a context.
*/
public AddStatementHandler(final BigdataSailConnection conn,
final AtomicLong nmodified, final Resource... defaultContext) {
this.conn = conn;
this.nmodified = nmodified;
final boolean quads = conn.getTripleStore().isQuads();
if (quads && defaultContext != null) {
// The context may only be specified for quads.
this.defaultContext = defaultContext; //new Resource[] { defaultContext };
} else {
this.defaultContext = new Resource[0];
}
}
@Override
public void handleStatement(final Statement stmt)
throws RDFHandlerException {
final Resource[] c = (Resource[])
(stmt.getContext() == null
? defaultContext
: new Resource[] { stmt.getContext() });
try {
conn.addStatement(//
stmt.getSubject(), //
stmt.getPredicate(), //
stmt.getObject(), //
c
);
} catch (SailException e) {
throw new RDFHandlerException(e);
}
if (c.length >= 2) {
// added to more than one context
nmodified.addAndGet(c.length);
} else {
nmodified.incrementAndGet();
}
}
}
}