/** Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved. Contact: SYSTAP, LLC DBA Blazegraph 2501 Calvert ST NW #106 Washington, DC 20008 licenses@blazegraph.com This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* Portions of this code are: * * Copyright Aduna (http://www.aduna-software.com/) (c) 1997-2007. * * Licensed under the Aduna BSD-style license. */ /* * Created on Aug 24, 2011 */ package com.bigdata.rdf.sail.sparql; import java.math.BigDecimal; import java.math.BigInteger; import java.util.Arrays; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.UUID; import java.util.Map.Entry; import org.apache.log4j.Logger; import org.openrdf.model.URI; import org.openrdf.model.Value; import org.openrdf.model.impl.URIImpl; import org.openrdf.model.vocabulary.RDF; import org.openrdf.model.vocabulary.XMLSchema; import org.openrdf.query.MalformedQueryException; import com.bigdata.bop.IValueExpression; import com.bigdata.rdf.internal.DTE; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.IVUtility; import com.bigdata.rdf.internal.LexiconConfiguration; import com.bigdata.rdf.internal.VTE; import com.bigdata.rdf.internal.constraints.SPARQLConstraint; import com.bigdata.rdf.internal.impl.TermId; import com.bigdata.rdf.internal.impl.literal.AbstractLiteralIV; import com.bigdata.rdf.internal.impl.literal.FullyInlineTypedLiteralIV; import com.bigdata.rdf.internal.impl.literal.UUIDLiteralIV; import com.bigdata.rdf.internal.impl.literal.XSDBooleanIV; import com.bigdata.rdf.internal.impl.literal.XSDDecimalIV; import com.bigdata.rdf.internal.impl.literal.XSDIntegerIV; import com.bigdata.rdf.internal.impl.literal.XSDNumericIV; import com.bigdata.rdf.internal.impl.literal.XSDUnsignedByteIV; import com.bigdata.rdf.internal.impl.literal.XSDUnsignedIntIV; import com.bigdata.rdf.internal.impl.literal.XSDUnsignedLongIV; import com.bigdata.rdf.internal.impl.literal.XSDUnsignedShortIV; import com.bigdata.rdf.model.BigdataLiteral; import com.bigdata.rdf.model.BigdataValue; import com.bigdata.rdf.model.BigdataValueFactory; import com.bigdata.rdf.model.BigdataValueFactoryImpl; import com.bigdata.rdf.sail.BigdataValueReplacer; import com.bigdata.rdf.sail.sparql.ast.ASTBlankNode; import com.bigdata.rdf.sail.sparql.ast.ASTDatasetClause; import com.bigdata.rdf.sail.sparql.ast.ASTFalse; import com.bigdata.rdf.sail.sparql.ast.ASTIRI; import com.bigdata.rdf.sail.sparql.ast.ASTNumericLiteral; import com.bigdata.rdf.sail.sparql.ast.ASTOperationContainer; import com.bigdata.rdf.sail.sparql.ast.ASTQName; import com.bigdata.rdf.sail.sparql.ast.ASTRDFLiteral; import com.bigdata.rdf.sail.sparql.ast.ASTRDFValue; import com.bigdata.rdf.sail.sparql.ast.ASTString; import com.bigdata.rdf.sail.sparql.ast.ASTTrue; import com.bigdata.rdf.sail.sparql.ast.VisitorException; import com.bigdata.rdf.store.BD; /** * Visits the AST model and builds a map from each RDF {@link Value} to * {@link BigdataValue} objects that have mock IVs assigned to them. * <p> * Note: The {@link PrefixDeclProcessor} will rewrite {@link ASTQName} nodes as * {@link ASTIRI} nodes. It MUST run before this processor. * <p> * Note: Any {@link ASTRDFLiteral} or {@link ASTIRI} nodes are annotated by this * processor using {@link ASTRDFValue#setRDFValue(Value)}. This includes IRIrefs * in the {@link ASTDatasetClause}, which are matched as either {@link ASTIRI} * or {@link ASTQName}. * <p> * Note: This is a part of deferred IV batch resolution, which is intended to * replace the functionality of the {@link BigdataValueReplacer}. * <p> * Note: {@link IValueExpression} nodes used in {@link SPARQLConstraint}s are * allowed to use values not actually in the database. MP * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> * @openrdf * * @see https://jira.blazegraph.com/browse/BLZG-1176 (decouple SPARQL parser * from DB) * @see https://jira.blazegraph.com/browse/BLZG-1519 (Refactor test suite to * remove tight coupling with IVs while checking up parsed queries) */ public class ASTDeferredIVResolutionInitializer extends ASTVisitorBase { private final static Logger log = Logger .getLogger(ASTDeferredIVResolutionInitializer.class); private final static boolean INFO = log.isInfoEnabled(); private final static List<URI> RDF_VOCAB = Arrays.asList(RDF.FIRST, RDF.REST, RDF.NIL, BD.VIRTUAL_GRAPH); private final Map<Value, BigdataValue> vocab; private final BigdataValueFactory valueFactory; private final LinkedHashMap<ASTRDFValue, BigdataValue> nodes; /** * Return a map from openrdf {@link Value} objects to the corresponding * {@link BigdataValue} objects for all {@link Value}s that appear in the * parse tree. */ public Map<Value, BigdataValue> getValues() { return vocab; } public ASTDeferredIVResolutionInitializer() { // Unnamed BigdataValueFactory is used to provide instances // of BigdataValue, which are required by existing test suite. // See also task https://jira.blazegraph.com/browse/BLZG-1519 // this.valueFactory = BigdataValueFactoryImpl.getInstance("parser"+UUID.randomUUID().toString().replaceAll("-", "")); this.valueFactory = new BigdataValueFactoryImpl(); this.nodes = new LinkedHashMap<>(); this.vocab = new LinkedHashMap<>(); } /** * Visit the parse tree, locating and collecting references to all * {@link ASTRDFValue} nodes (including blank nodes iff we are in a told * bnodes mode). The {@link ASTRDFValue}s are collected in a {@link Map} * which associates each one with a {@link BigdataValue} object which is set * using {@link ASTRDFValue#setRDFValue(org.openrdf.model.Value)}. The * {@link BigdataValue}s will be resolved later (in ASTDeferredIVResolution) * in a batch against the database, obtaining their {@link IVs}. * Until then {@link BigdataValue}s in the parse tree have unresolved * {@link IV}s (TermID(0)). * * @param qc * * @throws MalformedQueryException */ @SuppressWarnings({ "rawtypes", "unchecked" }) public void process(final ASTOperationContainer qc) throws MalformedQueryException { try { /* * Collect all ASTRDFValue nodes into a map, paired with * BigdataValue objects. */ qc.jjtAccept(new RDFValueResolver(), null); } catch (final VisitorException e) { // Turn the exception into a Query exception. throw new MalformedQueryException(e); } { /* * RDF Values actually appearing in the parse tree. */ final Iterator<Entry<ASTRDFValue, BigdataValue>> itr = nodes.entrySet().iterator(); while (itr.hasNext()) { final Entry<ASTRDFValue, BigdataValue> entry = itr.next(); final ASTRDFValue value = entry.getKey(); IV iv = null; BigdataValue bigdataValue = null; if (value.getRDFValue()!=null && ((BigdataValue)value.getRDFValue()).getIV() != null) { bigdataValue = (BigdataValue) value.getRDFValue(); iv = bigdataValue.getIV(); } else if (value instanceof ASTIRI) { iv = new TermId<BigdataValue>(VTE.URI,0); bigdataValue = valueFactory.createURI(((ASTIRI)value).getValue()); if (!bigdataValue.isRealIV()) { bigdataValue.clearInternalValue(); bigdataValue.setIV(iv); } iv.setValue(bigdataValue); } else if (value instanceof ASTRDFLiteral) { final ASTRDFLiteral rdfNode = (ASTRDFLiteral) value; final String lang = rdfNode.getLang(); final ASTIRI dataTypeIri = rdfNode.getDatatype(); URIImpl dataTypeUri = null; DTE dte = null; if (dataTypeIri!=null && dataTypeIri.getValue()!=null) { dataTypeUri = new URIImpl(dataTypeIri.getValue()); dte = DTE.valueOf(dataTypeUri); } if (dte!=null) { bigdataValue = getBigdataValue(rdfNode.getLabel().getValue(), dte); if (!bigdataValue.stringValue().equals(rdfNode.getLabel().getValue())) { // Data loss could occur if inline IV will be used, as string representation of original value differ from decoded value bigdataValue = valueFactory.createLiteral(rdfNode.getLabel().getValue(), dataTypeUri); iv = TermId.mockIV(VTE.valueOf(bigdataValue)); bigdataValue.setIV(iv); iv.setValue(bigdataValue); } } else { iv = new TermId<BigdataValue>(VTE.LITERAL,0); if (lang!=null) { bigdataValue = valueFactory.createLiteral(rdfNode.getLabel().getValue(), lang); } else { bigdataValue = valueFactory.createLiteral(rdfNode.getLabel().getValue(), dataTypeUri); } iv.setValue(bigdataValue); bigdataValue.setIV(iv); } } else if (value instanceof ASTNumericLiteral) { final ASTNumericLiteral rdfNode = (ASTNumericLiteral) value; final URI dataTypeUri = rdfNode.getDatatype(); final DTE dte = DTE.valueOf(dataTypeUri); bigdataValue = getBigdataValue(rdfNode.getValue(), dte); if (!bigdataValue.stringValue().equals(rdfNode.getValue())) { // Data loss could occur if inline IV will be used, as string representation of original value differ from decoded value // iv = bigdataValue.getIV(); bigdataValue = valueFactory.createLiteral(rdfNode.getValue(), dataTypeUri); // bigdataValue.setIV(iv); } } else if (value instanceof ASTTrue) { bigdataValue = valueFactory.createLiteral(true); if (bigdataValue.isRealIV()) { iv = bigdataValue.getIV(); } else { iv = TermId.mockIV(VTE.valueOf(bigdataValue)); iv.setValue(bigdataValue); bigdataValue.setIV(iv); } } else if (value instanceof ASTFalse) { bigdataValue = valueFactory.createLiteral(false); if (bigdataValue.isRealIV()) { iv = bigdataValue.getIV(); } else { iv = TermId.mockIV(VTE.valueOf(bigdataValue)); iv.setValue(bigdataValue); bigdataValue.setIV(iv); } } else { iv = new FullyInlineTypedLiteralIV<BigdataLiteral>(value.toString(), true); bigdataValue = iv.getValue(); } if (bigdataValue!=null) { value.setRDFValue(bigdataValue); // filling in a dummy IV for BigdataExprBuilder // @see https://jira.blazegraph.com/browse/BLZG-1717 (IV not resolved) fillInDummyIV(bigdataValue); vocab.put(bigdataValue, bigdataValue); } } } /* * FIXME Why is this [vocab] still here? And why the IV assignment logic * if we are not doing any batch resolution? */ // RDF Collection syntactic sugar vocabulary items. for (Value value: RDF_VOCAB) { BigdataValue bigdataValue = valueFactory.asValue(value); fillInDummyIV(bigdataValue); vocab.put(value, bigdataValue); } } /* * Note: Batch resolution the BigdataValue objects against the database * DOES NOT happen here. It will be done in ASTDeferredIVResolution. * Mock IVs used until then. */ @SuppressWarnings({ "rawtypes", "unchecked" }) private void fillInDummyIV(BigdataValue value) { final IV iv = value.getIV(); if (iv == null) { /* * Since the term identifier is NULL this value is not known * to the kb. */ if (INFO) log.info("Not in knowledge base: " + value); /* * Create a dummy iv and cache the unknown value on it so * that it can be used during query evaluation. */ final IV dummyIV = TermId.mockIV(VTE.valueOf(value)); value.setIV(dummyIV); dummyIV.setValue(value); } else { iv.setValue(value); } } /** * Reconstructs BigdataValue out of IV, creating literals if needed * <p> * {@link IVUtility#decode(String, String)} is used by * {@link ASTDeferredIVResolutionInitializer} to convert parsed AST * objects (ASTRDFLiteral and ASTNumericalLiteral) to IVs wrapped up as * BigdataValues, which are required on later stages of processing. * <p> * There's no LexiconRelation available at this point, so all values * converted in inlined mode. {@link ASTDeferredIVResolution} converts these * inlined IVs to term IV by getLexiconRelation().addTerms in case if triple * store configured to not use inlined values. * * @param iv * the IV * * @param dte * data type of IV */ @SuppressWarnings({ "rawtypes", "unchecked" }) private BigdataValue getBigdataValue(final String value, final DTE dte) { // Check if lexical form is empty, and provide bigdata value // with FullyInlineTypedLiteralIV holding corresponding data type // @see https://jira.blazegraph.com/browse/BLZG-1716 (SPARQL Update parser fails on invalid numeric literals) if (value.isEmpty()) { BigdataLiteral bigdataValue = valueFactory.createLiteral(value, dte.getDatatypeURI()); IV iv = new FullyInlineTypedLiteralIV<BigdataLiteral>("", null, dte.getDatatypeURI(), true); bigdataValue.setIV(iv); iv.setValue(bigdataValue); return bigdataValue; } final IV iv = decode(value, dte.name()); BigdataValue bigdataValue; if (!iv.hasValue() && iv instanceof AbstractLiteralIV) { switch(dte) { case XSDByte: bigdataValue = valueFactory.createLiteral(((AbstractLiteralIV)iv).byteValue()); break; case XSDShort: bigdataValue = valueFactory.createLiteral(((AbstractLiteralIV)iv).shortValue()); break; case XSDInt: bigdataValue = valueFactory.createLiteral(((AbstractLiteralIV)iv).intValue()); break; case XSDLong: bigdataValue = valueFactory.createLiteral(((AbstractLiteralIV)iv).longValue()); break; case XSDFloat: bigdataValue = valueFactory.createLiteral(((AbstractLiteralIV)iv).floatValue()); break; case XSDDouble: bigdataValue = valueFactory.createLiteral(((AbstractLiteralIV)iv).doubleValue()); break; case XSDBoolean: bigdataValue = valueFactory.createLiteral(((AbstractLiteralIV)iv).booleanValue()); break; case XSDString: bigdataValue = valueFactory.createLiteral(((AbstractLiteralIV)iv).stringValue(), dte.getDatatypeURI()); break; case XSDInteger: bigdataValue = valueFactory.createLiteral(((AbstractLiteralIV)iv).stringValue(), XMLSchema.INTEGER); break; case XSDDecimal: bigdataValue = valueFactory.createLiteral(iv.stringValue(), DTE.XSDDecimal.getDatatypeURI()); break; case XSDUnsignedShort: bigdataValue = valueFactory.createLiteral(iv.stringValue(), DTE.XSDUnsignedShort.getDatatypeURI()); break; case XSDUnsignedInt: bigdataValue = valueFactory.createLiteral(iv.stringValue(), DTE.XSDUnsignedInt.getDatatypeURI()); break; case XSDUnsignedByte: bigdataValue = valueFactory.createLiteral(iv.stringValue(), DTE.XSDUnsignedByte.getDatatypeURI()); break; case XSDUnsignedLong: bigdataValue = valueFactory.createLiteral(iv.stringValue(), DTE.XSDUnsignedLong.getDatatypeURI()); break; default: throw new RuntimeException("unknown DTE " + dte); } bigdataValue.setIV(iv); iv.setValue(bigdataValue); } else { bigdataValue = iv.getValue(); } return bigdataValue; } /** * FIXME Should this be using the {@link LexiconConfiguration} to create * appropriate inline {@link IV}s when and where appropriate? */ private class RDFValueResolver extends ASTVisitorBase { @Override public Object visit(final ASTQName node, final Object data) throws VisitorException { throw new VisitorException( "QNames must be resolved before resolving RDF Values"); } /** * Note: Blank nodes within a QUERY are treated as anonymous variables, * even when we are in a told bnodes mode. */ @Override public Object visit(final ASTBlankNode node, final Object data) throws VisitorException { throw new VisitorException( "Blank nodes must be replaced with variables before resolving RDF Values"); } @Override public Void visit(final ASTIRI node, final Object data) throws VisitorException { try { nodes.put(node, valueFactory.createURI(node.getValue())); return null; } catch (final IllegalArgumentException e) { // invalid URI throw new VisitorException(e.getMessage()); } } @Override public Void visit(final ASTRDFLiteral node, final Object data) throws VisitorException { // Note: This is handled by this ASTVisitor (see below in this // class). final String label = (String) node.getLabel().jjtAccept(this, null); final String lang = node.getLang(); final ASTIRI datatypeNode = node.getDatatype(); final BigdataLiteral literal; if (datatypeNode != null) { final URI datatype; try { datatype = valueFactory.createURI(datatypeNode.getValue()); } catch (final IllegalArgumentException e) { // invalid URI throw new VisitorException(e); } literal = valueFactory.createLiteral(label, datatype); } else if (lang != null) { literal = valueFactory.createLiteral(label, lang); } else { literal = valueFactory.createLiteral(label); } nodes.put(node, literal); return null; } @Override public Void visit(final ASTNumericLiteral node, final Object data) throws VisitorException { nodes.put( node, valueFactory.createLiteral(node.getValue(), node.getDatatype())); return null; } @Override public Void visit(final ASTTrue node, final Object data) throws VisitorException { nodes.put(node, valueFactory.createLiteral(true)); return null; } @Override public Void visit(final ASTFalse node, final Object data) throws VisitorException { nodes.put(node, valueFactory.createLiteral(false)); return null; } /** * Note: This supports the visitor method for a Literal. */ @Override public String visit(final ASTString node, final Object data) throws VisitorException { return node.getValue(); } } /** * Decode an IV from its string representation and type, provided in as * ASTRDFLiteral node in AST model. * <p> * Note: This is a very special case method. Normally logic should go * through the ILexiconRelation to resolve inline IVs. This always uses * inline IVs, and thus defeats the ILexiconConfiguration for the namespace. * * @param val * the string representation * @param type * value type * @return the IV * * @see https://jira.blazegraph.com/browse/BLZG-1176 (SPARQL QUERY/UPDATE should not use db connection) * * This method was moved from IVUtility class, as it is not used anywhere except * AST Deferred resolution */ @SuppressWarnings("rawtypes") public static IV decode(final String val, final String type) { final DTE dte = Enum.valueOf(DTE.class, type); switch (dte) { case XSDBoolean: { return XSDBooleanIV.valueOf((Boolean.valueOf(val))); } case XSDByte: { final byte x = Byte.valueOf(val); return new XSDNumericIV<BigdataLiteral>(x); } case XSDShort: { final short x = Short.valueOf(val); return new XSDNumericIV<BigdataLiteral>(x); } case XSDInt: { final int x = Integer.valueOf(val); return new XSDNumericIV<BigdataLiteral>(x); } case XSDLong: { final long x = Long.valueOf(val); return new XSDNumericIV<BigdataLiteral>(x); } case XSDFloat: { final float x = Float.valueOf(val); return new XSDNumericIV<BigdataLiteral>(x); } case XSDDouble: { final double x = Double.valueOf(val); return new XSDNumericIV<BigdataLiteral>(x); } case UUID: { final UUID x = UUID.fromString(val); return new UUIDLiteralIV<BigdataLiteral>(x); } case XSDInteger: { final BigInteger x = new BigInteger(val); return new XSDIntegerIV<BigdataLiteral>(x); } case XSDDecimal: { final BigDecimal x = new BigDecimal(val); return new XSDDecimalIV<BigdataLiteral>(x); } case XSDString: { return new FullyInlineTypedLiteralIV(val, null, XMLSchema.STRING, true); } case XSDUnsignedByte: { return new XSDUnsignedByteIV<>((byte) (Byte.valueOf(val) + Byte.MIN_VALUE)); } case XSDUnsignedShort: { return new XSDUnsignedShortIV<>((short) (Short.valueOf(val) + Short.MIN_VALUE)); } case XSDUnsignedInt: { return new XSDUnsignedIntIV((int) (Integer.valueOf(val) + Integer.MIN_VALUE)); } case XSDUnsignedLong: { return new XSDUnsignedLongIV<>(Long.valueOf(val) + Long.MIN_VALUE); } default: throw new UnsupportedOperationException("dte=" + dte); } } }