package eu.fbk.knowledgestore.internal.rdf;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.lang.reflect.Field;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
import javax.annotation.Nullable;
import com.google.common.base.Function;
import com.google.common.base.MoreObjects;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Ordering;
import org.openrdf.model.BNode;
import org.openrdf.model.Literal;
import org.openrdf.model.Namespace;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.model.impl.NamespaceImpl;
import org.openrdf.query.BindingSet;
import org.openrdf.query.QueryEvaluationException;
import org.openrdf.query.QueryResultHandlerException;
import org.openrdf.query.TupleQueryResultHandlerBase;
import org.openrdf.query.TupleQueryResultHandlerException;
import org.openrdf.query.resultio.BasicQueryWriterSettings;
import org.openrdf.query.resultio.BooleanQueryResultFormat;
import org.openrdf.query.resultio.BooleanQueryResultParser;
import org.openrdf.query.resultio.BooleanQueryResultWriter;
import org.openrdf.query.resultio.QueryResultIO;
import org.openrdf.query.resultio.TupleQueryResultFormat;
import org.openrdf.query.resultio.TupleQueryResultParser;
import org.openrdf.query.resultio.TupleQueryResultWriter;
import org.openrdf.rio.ParserConfig;
import org.openrdf.rio.RDFFormat;
import org.openrdf.rio.RDFHandler;
import org.openrdf.rio.RDFHandlerException;
import org.openrdf.rio.RDFParseException;
import org.openrdf.rio.RDFParser;
import org.openrdf.rio.RDFWriter;
import org.openrdf.rio.Rio;
import org.openrdf.rio.RioSetting;
import org.openrdf.rio.WriterConfig;
import org.openrdf.rio.helpers.BasicParserSettings;
import org.openrdf.rio.helpers.BasicWriterSettings;
import org.openrdf.rio.helpers.JSONLDMode;
import org.openrdf.rio.helpers.JSONLDSettings;
import org.openrdf.rio.helpers.NTriplesParserSettings;
import org.openrdf.rio.helpers.RDFHandlerBase;
import org.openrdf.rio.helpers.RDFJSONParserSettings;
import org.openrdf.rio.helpers.RDFParserBase;
import org.openrdf.rio.helpers.TriXParserSettings;
import org.openrdf.rio.helpers.XMLParserSettings;
import org.openrdf.rio.helpers.XMLWriterSettings;
import org.slf4j.Logger;
import info.aduna.iteration.CloseableIteration;
import info.aduna.iteration.Iteration;
import eu.fbk.knowledgestore.data.Data;
import eu.fbk.knowledgestore.data.Handler;
import eu.fbk.knowledgestore.data.ParseException;
import eu.fbk.knowledgestore.data.Stream;
import eu.fbk.knowledgestore.internal.Compression;
import eu.fbk.knowledgestore.internal.Logging;
import eu.fbk.knowledgestore.internal.Util;
import eu.fbk.rdfpro.jsonld.JSONLD;
import eu.fbk.rdfpro.tql.TQL;
// TODO: reorganize code in this class
public final class RDFUtil {
public static final String PROPERTY_VARIABLES = "variables";
private static boolean jsonldDisabled = false;
public static void toHtml(final Value value, @Nullable final Map<String, String> prefixes,
final Appendable sink) throws IOException {
if (value instanceof Literal) {
final Literal literal = (Literal) value;
sink.append("<span");
if (literal.getLanguage() != null) {
sink.append(" title=\"@").append(literal.getLanguage()).append("\"");
} else if (literal.getDatatype() != null) {
sink.append(" title=\"<").append(literal.getDatatype().stringValue())
.append(">\"");
}
sink.append(">").append(value.stringValue()).append("</span>");
} else if (value instanceof BNode) {
sink.append("_:").append(((BNode) value).getID());
} else if (value instanceof URI) {
final URI uri = (URI) value;
sink.append("<a href=\"").append(uri.stringValue()).append("\">");
String prefix = null;
if (prefixes != null) {
prefix = prefixes.get(uri.getNamespace());
}
if (prefix == null) {
prefix = Data.namespaceToPrefix(uri.getNamespace(), Data.getNamespaceMap());
}
if (prefix != null) {
sink.append(prefix).append(':').append(uri.getLocalName());
} else {
final int index = uri.stringValue().lastIndexOf('/');
if (index >= 0) {
sink.append("<..").append(uri.stringValue().substring(index))
.append(">");
} else {
sink.append("<").append(uri.stringValue()).append(">");
}
}
sink.append("</a>");
}
}
public static Stream<Statement> toStatementStream(
final Iteration<? extends BindingSet, ?> iteration) {
Preconditions.checkNotNull(iteration);
return Stream.create(iteration).transform(new Function<BindingSet, Statement>() {
@Override
@Nullable
public Statement apply(final BindingSet bindings) {
final Value subject = bindings.getValue("subject");
final Value predicate = bindings.getValue("predicate");
final Value object = bindings.getValue("object");
final Value context = bindings.getValue("context");
if (subject instanceof Resource && predicate instanceof URI && object != null) {
final Resource subj = (Resource) subject;
final URI pred = (URI) predicate;
if (context == null) {
return Data.getValueFactory().createStatement(subj, pred, object);
} else if (context instanceof Resource) {
final Resource ctx = (Resource) context;
return Data.getValueFactory().createStatement(subj, pred, object, ctx);
}
}
return null;
}
}, 0);
}
public static Stream<BindingSet> toBindingsStream(
final CloseableIteration<BindingSet, QueryEvaluationException> iteration,
final Iterable<? extends String> variables) {
Preconditions.checkNotNull(iteration);
final List<String> variableList = ImmutableList.copyOf(variables);
final CompactBindingSet.Builder builder = CompactBindingSet.builder(variableList);
return Stream.create(iteration).transform(new Function<BindingSet, BindingSet>() {
@Override
@Nullable
public BindingSet apply(final BindingSet bindings) {
final int variableCount = variableList.size();
for (int i = 0; i < variableCount; ++i) {
final String variable = variableList.get(i);
builder.set(variable, bindings.getValue(variable));
}
return builder.build();
}
}, 0).setProperty(PROPERTY_VARIABLES, variableList);
}
public static int detectSparqlProlog(final String string) {
final int length = string.length();
int index = 0;
while (index < length) {
final char ch = string.charAt(index);
if (ch == '#') { // comment
while (index < length && string.charAt(index) != '\n') {
++index;
}
} else if (ch == 'p' || ch == 'b' || ch == 'P' || ch == 'B') { // prefix or base
while (index < length && string.charAt(index) != '>') {
++index;
}
} else if (!Character.isWhitespace(ch)) { // found
return index;
}
++index;
}
throw new ParseException(string, "Cannot detect SPARQL prolog");
}
public static String detectSparqlForm(final String string) {
final int start = detectSparqlProlog(string);
for (int i = start; i < string.length(); ++i) {
final char ch = string.charAt(i);
if (Character.isWhitespace(ch)) {
final String form = string.substring(start, i).toLowerCase();
if (!form.equals("select") && !form.equals("construct")
&& !form.equals("describe") && !form.equals("ask")) {
throw new ParseException(string, "Invalid query form: " + form);
}
return form;
}
}
throw new ParseException(string, "Cannot detect query form");
}
public static long writeSparqlTuples(final TupleQueryResultFormat format,
final OutputStream out, final Stream<? extends BindingSet> stream) {
final TupleQueryResultWriter writer = RDFUtil.newSparqlTupleWriter(format, out);
try {
final AtomicLong result = new AtomicLong();
stream.toHandler(new Handler<BindingSet>() {
private boolean started = false;
private long count = 0L;
@Override
public void handle(final BindingSet bindings) throws QueryResultHandlerException {
if (!this.started) {
@SuppressWarnings("unchecked")
final List<String> variables = (List<String>) stream.getProperty(
PROPERTY_VARIABLES, Object.class);
writer.startDocument();
writer.startHeader();
writer.startQueryResult(variables);
this.started = true;
}
if (bindings != null) {
writer.handleSolution(bindings);
++this.count;
} else if (this.started) {
writer.endQueryResult();
result.set(this.count);
}
}
});
return result.get();
} catch (final Exception ex) {
throw Throwables.propagate(ex);
} finally {
Util.closeQuietly(stream);
}
}
public static Stream<BindingSet> readSparqlTuples(final TupleQueryResultFormat format,
final InputStream in) {
// Create a parser for the specified format
final TupleQueryResultParser parser = newSparqlTupleParser(format);
// Return a source over parsed bindings
final Map<String, String> mdc = Logging.getMDC();
return new Stream<BindingSet>() {
@Override
protected void doToHandler(final Handler<? super BindingSet> handler) throws Throwable {
final Map<String, String> oldMdc = Logging.getMDC();
try {
Logging.setMDC(mdc);
parser.setQueryResultHandler(new TupleQueryResultHandlerBase() {
private CompactBindingSet.Builder builder;
@Override
public void startQueryResult(final List<String> vars)
throws TupleQueryResultHandlerException {
final List<String> variables = ImmutableList.copyOf(vars);
setProperty(PROPERTY_VARIABLES, variables);
this.builder = CompactBindingSet.builder(variables);
}
@Override
public void handleSolution(final BindingSet bindings)
throws TupleQueryResultHandlerException {
if (bindings != null) {
emit(bindings);
}
}
@Override
public void endQueryResult() throws TupleQueryResultHandlerException {
emit(null);
}
private void emit(final BindingSet bindings)
throws TupleQueryResultHandlerException {
try {
BindingSet compactBindings = bindings;
if (bindings != null) {
this.builder.setAll(bindings);
compactBindings = this.builder.build();
}
handler.handle(compactBindings);
} catch (final Throwable ex) {
Throwables.propagateIfPossible(ex,
TupleQueryResultHandlerException.class);
throw new TupleQueryResultHandlerException(ex);
}
}
});
parser.parseQueryResult(in);
} finally {
Logging.setMDC(oldMdc);
}
}
};
}
public static void writeSparqlBoolean(final BooleanQueryResultFormat format,
final OutputStream out, final boolean value) {
final BooleanQueryResultWriter writer = newSparqlBooleanWriter(format, out);
try {
writer.startDocument();
writer.startHeader();
writer.handleBoolean(value);
} catch (final Exception ex) {
Throwables.propagate(ex);
}
}
public static boolean readSparqlBoolean(final BooleanQueryResultFormat format,
final InputStream in) {
final BooleanQueryResultParser parser = newSparqlBooleanReader(format);
try {
final AtomicBoolean resultHolder = new AtomicBoolean();
parser.setQueryResultHandler(new TupleQueryResultHandlerBase() {
@Override
public void handleBoolean(final boolean result) throws QueryResultHandlerException {
resultHolder.set(result);
}
});
parser.parseQueryResult(in);
return resultHolder.get();
} catch (final Exception ex) {
throw Throwables.propagate(ex);
}
}
public static long writeRDF(final OutputStream out, final RDFFormat format,
@Nullable final Map<String, String> namespaces,
@Nullable final Map<? extends RioSetting<?>, ? extends Object> settings,
final Stream<? extends Statement> stream) {
final Map<RioSetting<?>, Object> actualSettings = Maps.newHashMap();
if (settings != null) {
actualSettings.putAll(settings);
}
final Object types = stream.getProperty("types", Object.class);
if (types instanceof Set && !jsonldDisabled) {
try {
actualSettings.put(JSONLD.ROOT_TYPES, types);
} catch (final Throwable ex) {
jsonldDisabled = true; // rdfpro-jsonld not available
}
}
try {
final RDFHandler handler = writeRDF(out, format, namespaces, actualSettings);
final AtomicLong result = new AtomicLong();
stream.toHandler(new Handler<Statement>() {
private boolean started = false;
private long count = 0L;
@Override
public void handle(final Statement statement) throws RDFHandlerException {
if (!this.started) {
handler.startRDF();
this.started = true;
}
if (statement != null) {
handler.handleStatement(statement);
++this.count;
} else if (this.started) {
handler.endRDF();
result.set(this.count);
}
}
});
return result.get();
} catch (final Exception ex) {
throw Throwables.propagate(ex);
} finally {
Util.closeQuietly(stream);
}
}
@SuppressWarnings({ "unchecked", "rawtypes" })
public static RDFHandler writeRDF(final OutputStream out, final RDFFormat format,
@Nullable final Map<String, String> namespaces,
@Nullable final Map<? extends RioSetting<?>, ? extends Object> settings)
throws IOException, RDFHandlerException {
final RDFWriter writer = Rio.createWriter(format, out);
final WriterConfig config = writer.getWriterConfig();
config.set(BasicWriterSettings.PRETTY_PRINT, true);
config.set(BasicWriterSettings.RDF_LANGSTRING_TO_LANG_LITERAL, true);
config.set(BasicWriterSettings.XSD_STRING_TO_PLAIN_LITERAL, true);
if (format.equals(RDFFormat.RDFXML)) {
config.set(XMLWriterSettings.INCLUDE_XML_PI, true);
config.set(XMLWriterSettings.INCLUDE_ROOT_RDF_TAG, true);
}
if (settings != null) {
for (final Map.Entry entry : settings.entrySet()) {
config.set((RioSetting) entry.getKey(), entry.getValue());
}
}
return namespaces == null ? writer : newNamespaceHandler(writer, namespaces, null);
}
public static Stream<Statement> readRDF(final InputStream in, final RDFFormat format,
@Nullable final Map<String, String> namespaces, @Nullable final String base,
final boolean preserveBNodes) {
final Map<String, String> mdc = Logging.getMDC();
return new Stream<Statement>() {
@Override
protected void doToHandler(final Handler<? super Statement> handler) throws Throwable {
final Map<String, String> oldMdc = Logging.getMDC();
try {
Logging.setMDC(mdc);
final RDFHandler rdfHandler = new RDFHandlerBase() {
@Override
public void handleStatement(final Statement statement)
throws RDFHandlerException {
emit(statement);
}
@Override
public void endRDF() throws RDFHandlerException {
emit(null);
}
private void emit(final Statement statement) throws RDFHandlerException {
try {
handler.handle(statement);
} catch (final Throwable ex) {
Throwables.propagateIfPossible(ex, RDFHandlerException.class);
throw new RuntimeException(ex);
}
}
};
readRDF(in, format, namespaces, base, preserveBNodes, rdfHandler);
} finally {
Logging.setMDC(oldMdc);
}
}
};
}
public static void readRDF(final InputStream in, @Nullable final RDFFormat format,
@Nullable final Map<String, String> namespaces, @Nullable final String base,
final boolean preserveBNodes, final RDFHandler handler) throws IOException,
RDFParseException, RDFHandlerException {
final RDFParser parser = Rio.createParser(format);
parser.setValueFactory(Data.getValueFactory());
final ParserConfig config = parser.getParserConfig();
config.set(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES, true);
config.set(BasicParserSettings.FAIL_ON_UNKNOWN_LANGUAGES, true);
config.set(BasicParserSettings.VERIFY_DATATYPE_VALUES, false);
config.set(BasicParserSettings.VERIFY_LANGUAGE_TAGS, true);
config.set(BasicParserSettings.VERIFY_RELATIVE_URIS, true);
config.set(BasicParserSettings.NORMALIZE_DATATYPE_VALUES, true);
config.set(BasicParserSettings.NORMALIZE_LANGUAGE_TAGS, true);
config.set(BasicParserSettings.PRESERVE_BNODE_IDS, preserveBNodes);
if (format.equals(RDFFormat.NTRIPLES)) {
config.set(NTriplesParserSettings.FAIL_ON_NTRIPLES_INVALID_LINES, true);
} else if (format.equals(RDFFormat.JSONLD)) {
// following parameters are currently ignored by used library
config.set(JSONLDSettings.COMPACT_ARRAYS, true);
config.set(JSONLDSettings.OPTIMIZE, true);
config.set(JSONLDSettings.USE_NATIVE_TYPES, false);
config.set(JSONLDSettings.USE_RDF_TYPE, false);
config.set(JSONLDSettings.JSONLD_MODE, JSONLDMode.COMPACT);
} else if (format.equals(RDFFormat.RDFJSON)) {
config.set(RDFJSONParserSettings.FAIL_ON_MULTIPLE_OBJECT_DATATYPES, true);
config.set(RDFJSONParserSettings.FAIL_ON_MULTIPLE_OBJECT_LANGUAGES, true);
config.set(RDFJSONParserSettings.FAIL_ON_MULTIPLE_OBJECT_TYPES, true);
config.set(RDFJSONParserSettings.FAIL_ON_MULTIPLE_OBJECT_VALUES, true);
config.set(RDFJSONParserSettings.FAIL_ON_UNKNOWN_PROPERTY, true);
config.set(RDFJSONParserSettings.SUPPORT_GRAPHS_EXTENSION, true);
} else if (format.equals(RDFFormat.TRIX)) {
config.set(TriXParserSettings.FAIL_ON_TRIX_INVALID_STATEMENT, true);
config.set(TriXParserSettings.FAIL_ON_TRIX_MISSING_DATATYPE, false);
} else if (format.equals(RDFFormat.RDFXML)) {
config.set(XMLParserSettings.FAIL_ON_DUPLICATE_RDF_ID, true);
config.set(XMLParserSettings.FAIL_ON_INVALID_NCNAME, true);
config.set(XMLParserSettings.FAIL_ON_INVALID_QNAME, true);
config.set(XMLParserSettings.FAIL_ON_MISMATCHED_TAGS, true);
config.set(XMLParserSettings.FAIL_ON_NON_STANDARD_ATTRIBUTES, false);
config.set(XMLParserSettings.FAIL_ON_SAX_NON_FATAL_ERRORS, false);
}
if (namespaces != null && parser instanceof RDFParserBase) {
try {
final Field field = RDFParserBase.class.getDeclaredField("namespaceTable");
field.setAccessible(true);
field.set(parser, Data.newNamespaceMap(Data.newNamespaceMap(), namespaces));
} catch (final Throwable ex) {
// ignore
ex.printStackTrace();
}
}
parser.setRDFHandler(handler);
parser.parse(in, Strings.nullToEmpty(base));
}
public static void readRDF(final Map<File, ? extends RDFHandler> sources,
@Nullable final RDFFormat format, @Nullable final Map<String, String> namespaces,
@Nullable final String base, final boolean preserveBNodes,
@Nullable final Compression compression, final int parallelism) throws IOException,
RDFParseException, RDFHandlerException {
// Sort files based on size, placing larger files first to better parallelism
final Map<File, RDFHandler> actualSources = Maps.newHashMap(sources);
final List<File> sortedFiles = Lists.newArrayList(sources.keySet());
Collections.sort(sortedFiles, new Comparator<File>() {
@Override
public int compare(final File first, final File second) {
if (first == null) {
return second == null ? 0 : -1;
} else {
return second == null ? 1 : (int) (second.length() - first.length());
}
}
});
// Compute parallelism degree as minimum of supplied value and available files
final int actualParallelism = Math.max(1, Math.min(parallelism, sortedFiles.size()));
// If parallelism is not needed, just loop through the files using this thread
if (actualParallelism == 1) {
for (final File file : sortedFiles) {
final RDFHandler handler = actualSources.get(file);
readRDFHelper(file, format, namespaces, base, preserveBNodes, compression, handler);
}
return;
}
// Allocate a latch to wait for threads to finish, and a variable to store exceptions
final AtomicReference<Throwable> exceptionHolder = new AtomicReference<Throwable>(null);
final CountDownLatch latch = new CountDownLatch(actualParallelism);
// Parse the files using multiple threads until the list is empty or an error occurs
for (int i = 0; i < actualParallelism; ++i) {
Data.getExecutor().execute(new Runnable() {
@Override
public void run() {
try {
while (exceptionHolder.get() == null) {
final File file;
final RDFHandler handler;
synchronized (sortedFiles) {
if (sortedFiles.isEmpty() || exceptionHolder.get() != null) {
break;
}
file = sortedFiles.remove(0);
handler = actualSources.get(file);
}
readRDFHelper(file, format, namespaces, base, preserveBNodes,
compression, handler);
}
} catch (final Throwable ex) {
exceptionHolder.set(ex);
} finally {
latch.countDown();
}
}
});
}
try {
latch.await();
} catch (final InterruptedException ex) {
// restore interrupted status
Thread.currentThread().interrupt();
}
// Propagate an exception occurred during parsing
final Throwable ex = exceptionHolder.get();
if (ex != null) {
Throwables.propagateIfPossible(ex, IOException.class);
Throwables.propagateIfPossible(ex, RDFHandlerException.class);
Throwables.propagateIfPossible(ex, RDFParseException.class);
throw new RuntimeException(ex);
}
}
private static void readRDFHelper(@Nullable final File file, @Nullable final RDFFormat format,
@Nullable final Map<String, String> namespaces, @Nullable final String base,
final boolean preserveBNodes, @Nullable final Compression compression,
final RDFHandler handler) throws IOException, RDFParseException, RDFHandlerException {
// Detect file format
RDFFormat actualFormat = format;
if (actualFormat == null) {
if (file == null) {
throw new IllegalArgumentException("Cannot detect RDF format of STDIN");
}
actualFormat = RDFFormat.forFileName(file.getName());
}
// Detect file compression
Compression actualCompression = compression;
if (actualCompression == null) {
actualCompression = file == null ? Compression.NONE : Compression.forFileName(
file.getName(), Compression.NONE);
}
// Perform parsing, wrapping possible exceptions so to report the file name
InputStream stream = null;
try {
stream = file == null ? System.in : actualCompression.read(Data.getExecutor(), file);
readRDF(stream, actualFormat, namespaces, base, preserveBNodes, handler);
} catch (final Throwable ex) {
final String message = "Parsing of " + (file == null ? "STDIN" : file)
+ " using format " + actualFormat + " and compression " + actualCompression
+ " failed: " + ex.getMessage();
if (ex instanceof IOException) {
throw new IOException(message, ex);
} else if (ex instanceof RDFParseException) {
throw new RDFParseException(message, ex);
} else if (ex instanceof RDFHandlerException) {
throw new RDFHandlerException(message, ex);
}
throw new RuntimeException(message, ex);
} finally {
if (stream != System.in) {
Util.closeQuietly(stream);
}
}
}
public static TupleQueryResultWriter newSparqlTupleWriter(final TupleQueryResultFormat format,
final OutputStream stream) {
final TupleQueryResultWriter writer = QueryResultIO.createWriter(format, stream);
final WriterConfig config = writer.getWriterConfig();
if (format.equals(TupleQueryResultFormat.JSON)) {
config.set(BasicWriterSettings.PRETTY_PRINT, true);
config.set(BasicWriterSettings.XSD_STRING_TO_PLAIN_LITERAL, true);
config.set(BasicWriterSettings.RDF_LANGSTRING_TO_LANG_LITERAL, true);
} else if (format.equals(TupleQueryResultFormat.SPARQL)) {
config.set(BasicWriterSettings.PRETTY_PRINT, true);
config.set(BasicWriterSettings.XSD_STRING_TO_PLAIN_LITERAL, true);
config.set(BasicWriterSettings.RDF_LANGSTRING_TO_LANG_LITERAL, true);
config.set(BasicQueryWriterSettings.ADD_SESAME_QNAME, false);
}
return writer;
}
public static TupleQueryResultParser newSparqlTupleParser(final TupleQueryResultFormat format) {
final TupleQueryResultParser parser = QueryResultIO.createParser(format);
parser.setValueFactory(CompactValueFactory.getInstance());
return parser;
}
public static BooleanQueryResultWriter newSparqlBooleanWriter(
final BooleanQueryResultFormat format, final OutputStream stream) {
final BooleanQueryResultWriter writer = QueryResultIO.createWriter(format, stream);
final WriterConfig config = writer.getWriterConfig();
if (format.equals(BooleanQueryResultFormat.JSON)) {
config.set(BasicWriterSettings.PRETTY_PRINT, true);
config.set(BasicWriterSettings.XSD_STRING_TO_PLAIN_LITERAL, true);
config.set(BasicWriterSettings.RDF_LANGSTRING_TO_LANG_LITERAL, true);
} else if (format.equals(TupleQueryResultFormat.SPARQL)) {
config.set(BasicWriterSettings.PRETTY_PRINT, true);
config.set(BasicWriterSettings.XSD_STRING_TO_PLAIN_LITERAL, true);
config.set(BasicWriterSettings.RDF_LANGSTRING_TO_LANG_LITERAL, true);
config.set(BasicQueryWriterSettings.ADD_SESAME_QNAME, false);
}
return writer;
}
public static BooleanQueryResultParser newSparqlBooleanReader(
final BooleanQueryResultFormat format) {
final BooleanQueryResultParser parser = QueryResultIO.createParser(format);
parser.setValueFactory(Data.getValueFactory());
return parser;
}
public static RDFHandler newMergingHandler(final RDFHandler handler) {
return new MergingHandler(handler);
}
public static RDFHandler newDecouplingHandler(final RDFHandler handler,
@Nullable final Integer queueSize) {
return new DecouplingHandler(handler, queueSize);
}
public static RDFHandler newNamespaceHandler(final RDFHandler handler,
final Map<String, String> namespaces, @Nullable final Integer bufferSize) {
return new NamespaceHandler(handler, namespaces, bufferSize);
}
public static RDFHandler newLoggingHandler(final RDFHandler handler, final Logger logger,
@Nullable final String startMessage, @Nullable final String progressMessage,
@Nullable final String endMessage) {
Preconditions.checkNotNull(handler);
Preconditions.checkNotNull(logger);
if (startMessage == null && progressMessage == null && endMessage == null) {
return handler;
} else {
return new LoggingHandler(handler, logger, startMessage, progressMessage, endMessage);
}
}
private static final class MergingHandler implements RDFHandler {
private final RDFHandler handler;
private int depth;
MergingHandler(final RDFHandler handler) {
this.handler = Preconditions.checkNotNull(handler);
this.depth = 0;
}
@Override
public synchronized void startRDF() throws RDFHandlerException {
if (this.depth == 0) {
this.handler.startRDF();
}
++this.depth;
}
@Override
public synchronized void handleComment(final String comment) throws RDFHandlerException {
this.handler.handleComment(comment);
}
@Override
public synchronized void handleNamespace(final String prefix, final String uri)
throws RDFHandlerException {
this.handler.handleNamespace(prefix, uri);
}
@Override
public synchronized void handleStatement(final Statement statement)
throws RDFHandlerException {
this.handler.handleStatement(statement);
}
@Override
public synchronized void endRDF() throws RDFHandlerException {
--this.depth;
if (this.depth == 0) {
this.handler.endRDF();
}
}
}
private static final class DecouplingHandler implements RDFHandler {
private static final int DEFAULT_QUEUE_SIZE = 1024;
private static final Object EOF = new Object();
private final RDFHandler handler;
private final int queueSize;
private BlockingQueue<Object> queue;
private AtomicReference<Throwable> exception;
private Future<?> future;
private int depth;
DecouplingHandler(final RDFHandler handler, @Nullable final Integer queueSize) {
this.handler = Preconditions.checkNotNull(handler);
this.queueSize = MoreObjects.firstNonNull(queueSize, DEFAULT_QUEUE_SIZE);
this.queue = null;
this.exception = null;
this.future = null;
this.depth = 0;
}
@Override
public synchronized void startRDF() throws RDFHandlerException {
// Accept nested startRDF/endRDF calls
if (this.depth++ > 0) {
return;
}
// Initialize queue and exception holder
this.queue = new ArrayBlockingQueue<Object>(this.queueSize);
this.exception = new AtomicReference<Throwable>(null);
// Run a background task to move comments, namespaces and statements off the queue and
// forward it to the wrapped handler
this.future = Data.getExecutor().submit(new Runnable() {
@Override
public void run() {
Object object;
try {
DecouplingHandler.this.handler.startRDF();
while ((object = DecouplingHandler.this.queue.take()) != EOF) {
if (object instanceof Statement) {
DecouplingHandler.this.handler.handleStatement((Statement) object);
} else if (object instanceof Namespace) {
final Namespace ns = (Namespace) object;
DecouplingHandler.this.handler.handleNamespace(ns.getPrefix(),
ns.getName());
} else if (object instanceof String) {
DecouplingHandler.this.handler.handleComment((String) object);
}
}
DecouplingHandler.this.handler.endRDF();
} catch (final Throwable ex) {
DecouplingHandler.this.exception.set(ex);
}
}
});
}
@Override
public void handleComment(final String comment) throws RDFHandlerException {
// Enqueue comment and propagate exceptions from background task, if any
put(comment);
propagateOnFailure();
}
@Override
public void handleNamespace(final String prefix, final String uri)
throws RDFHandlerException {
// Enqueue namespace and propagate exceptions from background task, if any
put(new NamespaceImpl(prefix, uri));
propagateOnFailure();
}
@Override
public void handleStatement(final Statement statement) throws RDFHandlerException {
// Enqueue statement and propagate exceptions from background task, if any
put(statement);
propagateOnFailure();
}
@Override
public synchronized void endRDF() throws RDFHandlerException {
// Accept nested startRDF/endRDF calls
if (--this.depth > 0) {
return;
}
// Signal end of RDF
put(EOF);
// Wait for the background task to complete
try {
this.future.get();
} catch (final Throwable ex) {
Throwables.propagateIfPossible(ex, RDFHandlerException.class);
Throwables.propagate(ex);
}
// Propagate exception from background task, if any
propagateOnFailure();
}
private void put(final Object object) throws RDFHandlerException {
try {
this.queue.put(object);
} catch (final InterruptedException ex) {
throw new RDFHandlerException(ex);
}
}
private void propagateOnFailure() throws RDFHandlerException {
final Throwable ex = this.exception.get();
if (ex != null) {
Throwables.propagateIfPossible(ex, RDFHandlerException.class);
Throwables.propagate(ex);
}
}
}
private static final class NamespaceHandler implements RDFHandler {
private static final int DEFAULT_BUFFER_SIZE = 1024;
private final RDFHandler handler;
private final Map<String, String> namespaces;
private final int bufferSize;
private List<Statement> buffer;
private boolean buffering;
private Map<String, String> bindings;
NamespaceHandler(final RDFHandler handler, final Map<String, String> namespaces,
@Nullable final Integer bufferSize) {
this.handler = Preconditions.checkNotNull(handler);
this.namespaces = Preconditions.checkNotNull(namespaces);
this.bufferSize = MoreObjects.firstNonNull(bufferSize, DEFAULT_BUFFER_SIZE);
this.buffer = null;
this.buffering = false;
this.bindings = null;
}
@Override
public void startRDF() throws RDFHandlerException {
this.bindings = Maps.newHashMap();
this.buffer = Lists.newArrayListWithCapacity(this.bufferSize);
this.buffering = true;
this.handler.startRDF();
}
@Override
public void handleComment(final String comment) throws RDFHandlerException {
flush();
this.handler.handleComment(comment);
}
@Override
public void handleNamespace(final String prefix, final String uri)
throws RDFHandlerException {
if (this.buffering) {
this.bindings.put(uri, prefix);
}
}
@Override
public void handleStatement(final Statement statement) throws RDFHandlerException {
if (this.buffering) {
extractNamespace(statement.getSubject());
extractNamespace(statement.getPredicate());
extractNamespace(statement.getObject());
extractNamespace(statement.getContext());
this.buffer.add(statement);
if (this.buffer.size() == this.bufferSize) {
flush();
}
} else {
this.handler.handleStatement(statement);
}
}
@Override
public void endRDF() throws RDFHandlerException {
flush();
this.handler.endRDF();
}
private void extractNamespace(final Value value) {
if (value instanceof URI) {
final String ns = ((URI) value).getNamespace();
this.bindings.put(ns, this.bindings.get(ns));
} else if (value instanceof Literal) {
extractNamespace(((Literal) value).getDatatype());
}
}
private void flush() throws RDFHandlerException {
if (!this.buffering) {
return;
}
for (final String namespace : Ordering.natural().sortedCopy(this.bindings.keySet())) {
String prefix = this.bindings.get(namespace);
if (prefix == null) {
prefix = Data.namespaceToPrefix(namespace, this.namespaces);
}
if (prefix != null) {
this.handler.handleNamespace(prefix, namespace);
}
}
for (final Statement statement : this.buffer) {
this.handler.handleStatement(statement);
}
this.bindings = null;
this.buffer = null;
this.buffering = false;
}
}
private static final class LoggingHandler implements RDFHandler {
private final RDFHandler handler;
@Nullable
private final Logger logger;
@Nullable
private final String startMessage;
@Nullable
private final String progressMessage;
@Nullable
private final String endMessage;
private long totalTs;
private long totalCounter = 0;
private long lastTs;
private long lastCounter = 0;
LoggingHandler(final RDFHandler handler, final Logger logger,
@Nullable final String startMessage, @Nullable final String progressMessage,
@Nullable final String endMessage) {
this.handler = Preconditions.checkNotNull(handler);
this.logger = logger;
this.startMessage = startMessage;
this.progressMessage = progressMessage;
this.endMessage = endMessage;
}
@Override
public void startRDF() throws RDFHandlerException {
this.handler.startRDF();
this.totalTs = System.currentTimeMillis();
this.lastTs = this.totalTs;
if (this.startMessage != null) {
this.logger.info(this.startMessage);
}
}
@Override
public void handleComment(final String comment) throws RDFHandlerException {
this.handler.handleComment(comment);
}
@Override
public void handleNamespace(final String prefix, final String uri)
throws RDFHandlerException {
this.handler.handleNamespace(prefix, uri);
}
@Override
public void handleStatement(final Statement statement) throws RDFHandlerException {
this.handler.handleStatement(statement);
++this.totalCounter;
if (this.progressMessage != null && this.totalCounter % 1000 == 0) {
final long ts = System.currentTimeMillis();
if (ts - this.lastTs >= 1000) {
final long throughput = (this.totalCounter - this.lastCounter) * 1000
/ (ts - this.lastTs);
final long avgThroughput = this.totalCounter * 1000 / (ts - this.totalTs);
this.lastTs = ts;
this.lastCounter = this.totalCounter;
this.logger.info(String.format(this.progressMessage, this.totalCounter,
throughput, avgThroughput));
}
}
}
@Override
public void endRDF() throws RDFHandlerException {
if (this.endMessage != null) {
final long ts = System.currentTimeMillis();
final long avgThroughput = this.totalCounter * 1000 / (ts - this.totalTs + 1);
this.logger.info(String.format(this.endMessage, this.totalCounter, avgThroughput));
}
this.handler.endRDF();
}
}
private RDFUtil() {
}
{
TQL.register();
System.setProperty("entityExpansionLimit", "" + Integer.MAX_VALUE);
}
}