/* * Copyright Aduna (http://www.aduna-software.com/) (c) 1997-2006. * * Licensed under the Aduna BSD-style license. */ package com.bigdata.rdf.sail.sparql; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.log4j.Logger; import org.openrdf.model.vocabulary.DC; import org.openrdf.model.vocabulary.FN; import org.openrdf.model.vocabulary.FOAF; import org.openrdf.model.vocabulary.OWL; import org.openrdf.model.vocabulary.RDF; import org.openrdf.model.vocabulary.RDFS; import org.openrdf.model.vocabulary.SESAME; import org.openrdf.model.vocabulary.XMLSchema; import org.openrdf.query.MalformedQueryException; import com.bigdata.rdf.graph.impl.bd.GASService; import com.bigdata.rdf.internal.XSD; import com.bigdata.rdf.sail.sparql.ast.ASTDeleteData; import com.bigdata.rdf.sail.sparql.ast.ASTIRI; import com.bigdata.rdf.sail.sparql.ast.ASTInsertData; import com.bigdata.rdf.sail.sparql.ast.ASTOperationContainer; import com.bigdata.rdf.sail.sparql.ast.ASTPrefixDecl; import com.bigdata.rdf.sail.sparql.ast.ASTQName; import com.bigdata.rdf.sail.sparql.ast.ASTServiceGraphPattern; import com.bigdata.rdf.sail.sparql.ast.ASTUnparsedQuadDataBlock; import com.bigdata.rdf.sail.sparql.ast.SyntaxTreeBuilderTreeConstants; import com.bigdata.rdf.sail.sparql.ast.VisitorException; import com.bigdata.rdf.sparql.ast.QueryHints; import com.bigdata.rdf.store.BD; import com.bigdata.rdf.store.BDS; import com.bigdata.rdf.vocab.decls.FOAFVocabularyDecl; /** * Processes the prefix declarations in a SPARQL query model. * * @author Arjohn Kampman * @openrdf */ public class PrefixDeclProcessor { private static final Logger log = Logger.getLogger(PrefixDeclProcessor.class); public static final Map<String,String> defaultDecls = new LinkedHashMap<String, String>(); public static interface Options { /** * * This optional property defines the path to a file containing the prefix declarations. * * Example file contents are shown below. * * <code> * PREFIX wdref: <http://www.wikidata.org/reference/> * PREFIX wikibase: <http://wikiba.se/ontology#> * </code> * * This should be passed as a Java Property as: * * <code> * -Dcom.bigdata.rdf.sail.sparql.PrefixDeclProcessor.additionalDeclsFile=/path/to/file * </code> * * {@see https://jira.blazegraph.com/browse/BLZG-1773} * */ public static final String ADDITIONAL_DECLS_FILE = PrefixDeclProcessor.class.getName() + ".additionalDeclsFile"; } static { defaultDecls.put("rdf", RDF.NAMESPACE); defaultDecls.put("rdfs", RDFS.NAMESPACE); defaultDecls.put("sesame", SESAME.NAMESPACE); defaultDecls.put("owl", OWL.NAMESPACE); defaultDecls.put("xsd", XMLSchema.NAMESPACE); defaultDecls.put("fn", FN.NAMESPACE); defaultDecls.put("foaf", FOAF.NAMESPACE); defaultDecls.put("dc", DC.NAMESPACE); defaultDecls.put("hint", QueryHints.NAMESPACE); defaultDecls.put("bd", BD.NAMESPACE); defaultDecls.put("bds", BDS.NAMESPACE); //Add any additional decls passed via property //SEE BLZG-1773 processAdditionalDecls(); } /** * Processes prefix declarations in queries. This method collects all * prefixes that are declared in the supplied query, verifies that prefixes * are not redefined and replaces any {@link ASTQName} nodes in the query * with equivalent {@link ASTIRI} nodes. * * @param qc * The query that needs to be processed. * @return A map containing the prefixes that are declared in the query (key) * and the namespace they map to (value). * @throws MalformedQueryException * If the query contains redefined prefixes or qnames that use * undefined prefixes. */ public static Map<String, String> process(ASTOperationContainer qc) throws MalformedQueryException { List<ASTPrefixDecl> prefixDeclList = qc.getPrefixDeclList(); // Build a prefix --> IRI map Map<String, String> prefixMap = new LinkedHashMap<String, String>(); for (ASTPrefixDecl prefixDecl : prefixDeclList) { String prefix = prefixDecl.getPrefix(); String iri = prefixDecl.getIRI().getValue(); if (prefixMap.containsKey(prefix)) { throw new MalformedQueryException("Multiple prefix declarations for prefix '" + prefix + "'"); } prefixMap.put(prefix, iri); } // insert some default prefixes (if not explicitly defined in the query) // insertDefaultPrefix(prefixMap, "rdf", RDF.NAMESPACE); // insertDefaultPrefix(prefixMap, "rdfs", RDFS.NAMESPACE); // insertDefaultPrefix(prefixMap, "sesame", SESAME.NAMESPACE); // insertDefaultPrefix(prefixMap, "owl", OWL.NAMESPACE); // insertDefaultPrefix(prefixMap, "xsd", XMLSchema.NAMESPACE); // insertDefaultPrefix(prefixMap, "fn", FN.NAMESPACE); // insertDefaultPrefix(prefixMap, "hint", QueryHints.NAMESPACE); // insertDefaultPrefix(prefixMap, "bd", BD.NAMESPACE); // insertDefaultPrefix(prefixMap, "bds", BDS.NAMESPACE); for (Map.Entry<String, String> e : defaultDecls.entrySet()) { insertDefaultPrefix(prefixMap, e.getKey(), e.getValue()); } ASTUnparsedQuadDataBlock dataBlock = null; if (qc.getOperation() instanceof ASTInsertData) { ASTInsertData insertData = (ASTInsertData)qc.getOperation(); dataBlock = insertData.jjtGetChild(ASTUnparsedQuadDataBlock.class); } else if (qc.getOperation() instanceof ASTDeleteData) { ASTDeleteData deleteData = (ASTDeleteData)qc.getOperation(); dataBlock = deleteData.jjtGetChild(ASTUnparsedQuadDataBlock.class); } if (dataBlock != null) { String prefixes = createPrefixesInSPARQLFormat(prefixMap); // TODO optimize string concat? dataBlock.setDataBlock(prefixes + dataBlock.getDataBlock()); } else { QNameProcessor visitor = new QNameProcessor(prefixMap); try { qc.jjtAccept(visitor, null); } catch (VisitorException e) { throw new MalformedQueryException(e); } } return prefixMap; } private static void insertDefaultPrefix(Map<String, String> prefixMap, String prefix, String namespace) { if (!prefixMap.containsKey(prefix) && !prefixMap.containsValue(namespace)) { prefixMap.put(prefix, namespace); } } private static String createPrefixesInSPARQLFormat(Map<String, String> prefixMap) { StringBuilder sb = new StringBuilder(); for (Entry<String, String> entry : prefixMap.entrySet()) { sb.append("PREFIX"); final String prefix = entry.getKey(); if (prefix != null) { sb.append(" " + prefix); } sb.append(":"); sb.append(" <" + entry.getValue() + ">"); } return sb.toString(); } private static class QNameProcessor extends ASTVisitorBase { private Map<String, String> prefixMap; public QNameProcessor(Map<String, String> prefixMap) { this.prefixMap = prefixMap; } @Override public Object visit(ASTQName qnameNode, Object data) throws VisitorException { String qname = qnameNode.getValue(); int colonIdx = qname.indexOf(':'); assert colonIdx >= 0 : "colonIdx should be >= 0: " + colonIdx; String prefix = qname.substring(0, colonIdx); String localName = qname.substring(colonIdx + 1); String namespace = prefixMap.get(prefix); if (namespace == null) { throw new VisitorException("QName '" + qname + "' uses an undefined prefix"); } localName = processEscapesAndHex(localName); // Replace the qname node with a new IRI node in the parent node ASTIRI iriNode = new ASTIRI(SyntaxTreeBuilderTreeConstants.JJTIRI); iriNode.setValue(namespace + localName); qnameNode.jjtReplaceWith(iriNode); return null; } private String processEscapesAndHex(String localName) { // first process hex-encoded chars. StringBuffer unencoded = new StringBuffer(); Pattern hexPattern = Pattern.compile("([^\\\\]|^)(%[A-F\\d][A-F\\d])", Pattern.CASE_INSENSITIVE); Matcher m = hexPattern.matcher(localName); boolean result = m.find(); while (result) { // we match the previous char because we need to be sure we are not // processing an escaped % char rather than // an actual hex encoding, for example: 'foo\%bar'. String previousChar = m.group(1); String encoded = m.group(2); int codePoint = Integer.parseInt(encoded.substring(1), 16); String decoded = String.valueOf(Character.toChars(codePoint)); m.appendReplacement(unencoded, previousChar + decoded); result = m.find(); } m.appendTail(unencoded); // then process escaped special chars. StringBuffer unescaped = new StringBuffer(); Pattern escapedCharPattern = Pattern.compile("\\\\[_~\\.\\-!\\$\\&\\'\\(\\)\\*\\+\\,\\;\\=\\:\\/\\?#\\@\\%]"); m = escapedCharPattern.matcher(unencoded.toString()); result = m.find(); while (result) { String escaped = m.group(); m.appendReplacement(unescaped, escaped.substring(1)); result = m.find(); } m.appendTail(unescaped); return unescaped.toString(); } @Override public Object visit(ASTServiceGraphPattern node, Object data) throws VisitorException { node.setPrefixDeclarations(prefixMap); return super.visit(node, data); } /** * Provide silent declaration for some well known namspaces. */ @SuppressWarnings("unused") private String checkForWellKnownNamespacePrefix(final String prefix) { final String namespace; if (prefix.equals("bd")) { prefixMap.put("bd", namespace = BD.NAMESPACE); } else if (prefix.equals("bds")) { prefixMap.put("bds", namespace = BDS.NAMESPACE); } else if (prefix.equals("hint")) { prefixMap.put("hint", namespace = QueryHints.NAMESPACE); } else if (prefix.equals("rdf")) { prefixMap.put("rdf", namespace = RDF.NAMESPACE); } else if (prefix.equals("rdfs")) { prefixMap.put("rdfs", namespace = RDFS.NAMESPACE); } else if (prefix.equals("xsd")) { prefixMap.put("xsd", namespace = XSD.NAMESPACE); } else if (prefix.equals("foaf")) { prefixMap.put("foaf", namespace = FOAFVocabularyDecl.NAMESPACE); } else if (prefix.equals("fn")) { // XPath Functions. prefixMap.put("fn", namespace = FN.NAMESPACE); } else if (prefix.equals("owl")) { prefixMap.put("owl", namespace = OWL.NAMESPACE); } else if (prefix.equals("sesame")) { prefixMap.put("sesame", namespace = SESAME.NAMESPACE); } else if (prefix.equals("gas")) { prefixMap.put("gas", namespace = GASService.Options.NAMESPACE); } else { // Unknown namespace = null; } return namespace; } } /** * * Static helper method to process the {@link PrefixDeclProcessor.Options.ADDTIONAL_DECLS_FILE} * property if present and add the decls. It ignores IO errors, etc. and * provides a warning. * * {@see BLZG-1773} * * @author beebs * */ public static void processAdditionalDecls() { final String declsFile = System .getProperty(Options.ADDITIONAL_DECLS_FILE); if (declsFile != null && !declsFile.equals("")) { final File f = new File(declsFile); if (!f.exists()) { log.warn(declsFile + " passed by -D" + Options.ADDITIONAL_DECLS_FILE + " does not exist. Ignoring. Additional Decls will not be set."); return; } if (!f.canRead()) { log.warn(declsFile + " passed by -D" + Options.ADDITIONAL_DECLS_FILE + " is not readable. Ignoring. Additional Decls will not be set."); return; } try { FileReader r = new FileReader(f); int ch = r.read(); final StringBuffer sb = new StringBuffer(); boolean done = false; while (!done) { if ((char) ch == '\n' || ch == -1) { // End of line or last line done = ch == -1; final String s = sb.toString(); if(sb.length() == 0) { //End of file with no input break; } sb.delete(0, sb.length()); // 0 1 2 // PREFIX wdref: <http://www.wikidata.org/reference/> final String[] decls = s.split(" "); if (decls.length != 3) { log.warn(declsFile + " line: " + s + " is not valid. Ignoring."); log.warn("Expecting lines formatted as: \"PREFIX wdref: <http://www.wikidata.org/reference/>\""); ch = r.read(); continue; // process the next line } if (!decls[1].endsWith(":")) { log.warn(declsFile + " line: " + s + " is not valid. Ignoring."); log.warn(decls[1] + " does not end with :"); ch = r.read(); continue; // process the next line } // String : final String prefix = decls[1].substring(0, decls[1].length() - 1); if (!decls[2].endsWith(">") && !decls[2].startsWith("<")) { log.warn(declsFile + " line: " + s + " is not valid. Ignoring."); log.warn(decls[2] + " does not start and end with < and >, respectively."); ch = r.read(); continue; // process the next line } // String < and > final String uri = decls[2].substring(1, decls[2].length() - 1); log.warn("Configured prefix: PREFIX " + prefix + ": " + uri); defaultDecls.put(prefix, uri); } else { sb.append((char)ch); } ch = r.read(); } r.close(); } catch (IOException e) { log.warn(e.toString() + "\n while processing " + declsFile + " passed by -D" + Options.ADDITIONAL_DECLS_FILE + "does not exist. Ignoring. Additional Decls will not be set."); } } } }