/*******************************************************************************
* Copyright (c) 2004, 2007 IBM Corporation and Cambridge Semantics Incorporated.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* File: $Source: /cvsroot/slrp/glitter/com.ibm.adtech.glitter/src/com/ibm/adtech/glitter/syntax/concrete/SPARQLParserBase.java,v $
* Created by: Lee Feigenbaum (<a href="mailto:feigenbl@us.ibm.com">feigenbl@us.ibm.com</a>)
* Created on: 10/23/06
* Revision: $Id: SPARQLParserBase.java 164 2007-07-31 14:11:09Z mroy $
*
* Contributors: IBM Corporation - initial API and implementation
* Cambridge Semantics Incorporated - Fork to Anzo
*******************************************************************************/
package org.openanzo.glitter.syntax.concrete;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.Stack;
import org.openanzo.exceptions.ExceptionConstants;
import org.openanzo.glitter.exception.GlitterRuntimeException;
import org.openanzo.glitter.exception.StringLiteralException;
import org.openanzo.glitter.query.QueryController;
import org.openanzo.glitter.query.SubqueryController;
import org.openanzo.glitter.syntax.abstrakt.TriplePatternNode;
import org.openanzo.rdf.BlankNode;
import org.openanzo.rdf.BlankNodeManager;
import org.openanzo.rdf.MemURI;
import org.openanzo.rdf.MemVariable;
import org.openanzo.rdf.TriplePatternComponent;
import org.openanzo.rdf.Variable;
import org.openanzo.rdf.vocabulary.RDF;
/**
* The base class for SPARQL parsers generated from a JavaCC grammar.
*
* @author lee <lee@cambridgesemantics.com>
*
*/
abstract public class SPARQLParserBase {
private BlankNodeManager blankNodeManager;
private Stack<QueryController> queryControllers;
private Map<String, Variable> variables;
/**
* Default constructor.
*/
public SPARQLParserBase() {
this.blankNodeManager = new BlankNodeManager(true);
this.queryControllers = new Stack<QueryController>();
this.queryControllers.push(new QueryController());
this.variables = new HashMap<String, Variable>();
}
/**
* Define a new scope which will have a new set of blank nodes. Pushes the new scope onto a stack. Should be paired with {@link #exitGroupScope()}
*/
public void enterGroupScope() {
this.blankNodeManager.enterLabelScope();
}
/**
* Ends a blank node label scope. Should be paired with {@link #exitGroupScope()}
*/
public void exitGroupScope() {
this.blankNodeManager.exitLabelScope();
}
/**
*
* @return A {@link BlankNodeManager} for the current group scope.
*/
public BlankNodeManager getBlankNodeManager() {
return this.blankNodeManager;
}
/**
*
* @return The {@link QueryController} for the query currently being parsed
*/
public QueryController getQueryController() {
return this.queryControllers.peek();
}
/**
* Begins a new subquery, meaning that subsequent calls to getQueryController will return
* a new controller for the subquery.
*
* @return The controller for the new subquery
*/
public SubqueryController enterSubquery() {
SubqueryController qc = new SubqueryController(getQueryController());
this.queryControllers.push(qc);
return qc;
}
/**
* Ends the current subquery, meaning that subsequent calls to getQueryController will return
* the parent controller of the completed subquery
*
*/
public void exitSubquery() {
this.queryControllers.pop();
}
/**
* This accessor ensures that all variables with the same name will share the same {@link Variable} reference.
*
* @param name
* name of variable to get
* @return {@link Variable} for the given name
*/
public Variable getVariable(String name) {
Variable v = this.variables.get(name);
if (v == null) {
v = MemVariable.createVariable(name);
this.variables.put(name, v);
}
return v;
}
/**
* (1) Strip quotes from the beginning and end of the string. (2) perform the following substitutions (a) \\uHHHH -> unicode code point at hex value HHHH
* (b) \UHHHHHHHH -> unicode code point at hex value HHHHHHHH (c) \t -> U+0009 (tab) (d) \n -> U+000A (line feed) (e) \r -> U+000D (carriage return) (f) \b
* -> U+0008 (backspace) (g) \f -> U+000C (form feed) (h) \" -> U+0022 (quotation mark, double quote mark) (i) \' -> U+0027 (apostrophe-quote, single quote
* mark) (j) \\ -> U+005C (backslash)
*
* @param s
* The lexical representation of the string from within a query.
* @return The value of the lexical representation.
* @throws StringLiteralException
*/
public String evaluateStringLiteral(String s) throws StringLiteralException {
// (1)
int quoteLength = 1;
if (s.length() >= 3 && (s.substring(0, 3).equals("'''") || s.substring(0, 3).equals("\"\"\"")))
quoteLength = 3;
s = s.substring(quoteLength, s.length() - quoteLength);
// (2)
StringBuilder buf = new StringBuilder(s.length());
int startIndex = 0;
int nextEscape, codePoint;
int sourceLength = s.length(); // cache
while (true) {
// if we're at the end of the string, then we're done
if (startIndex >= sourceLength)
break;
nextEscape = s.indexOf("\\", startIndex);
// we're done when we have no more escape characters
// to process
if (nextEscape == -1) {
// add on the rest of the source string
buf.append(s.substring(startIndex));
break;
}
// there better be at least one more character in the
// string
if (nextEscape + 1 >= sourceLength)
throw new StringLiteralException("invalid escape character at end of string literal");
// add everything between the start index and the escape
buf.append(s.substring(startIndex, nextEscape));
startIndex = nextEscape + 2; // character after \X
switch (s.charAt(nextEscape + 1)) {
case 'u':
// in this case, we better have an additional
// 4 hex characters coming our way
if (startIndex + 3 >= sourceLength)
throw new StringLiteralException("invalid Unicode escape sequence: \\u must be followed by four hex characters");
codePoint = string2hex(s.substring(startIndex, startIndex + 4));
buf.append((char) codePoint);
startIndex += 4;
break;
case 'U':
// in this case, we better have an additional
// 8 hex characters coming our way
if (startIndex + 7 >= sourceLength)
throw new StringLiteralException("invalid Unicode escape sequence: \\u must be followed by seven hex characters");
codePoint = string2hex(s.substring(startIndex, startIndex + 8));
buf.append((char) codePoint);
startIndex += 8;
break;
case 't':
buf.append('\t');
break;
case 'n':
buf.append('\n');
break;
case 'r':
buf.append('\r');
break;
case 'b':
buf.append('\b');
break;
case 'f':
buf.append('\f');
break;
case '"':
buf.append('\"');
break;
case '\'':
buf.append('\'');
break;
case '\\':
buf.append('\\');
break;
default:
throw new StringLiteralException("Unrecognized escape sequence");
}
}
return buf.toString();
}
// helpers
static int string2hex(String s) {
return Integer.parseInt(s, 16);
}
protected TriplePatternComponent nodes2collection(ArrayList<TriplePatternComponent> nodes) throws ParseException {
ArrayList<TriplePatternNode> fake = new ArrayList<TriplePatternNode>();
return nodes2collection(nodes, fake);
}
protected TriplePatternComponent nodes2collection(ArrayList<TriplePatternComponent> nodes, ArrayList<TriplePatternNode> triples) throws ParseException {
if (nodes.size() == 0)
return RDF.nil;
BlankNodeManager bnm = getBlankNodeManager();
BlankNode root = null, current = null, last = null;
TriplePatternComponent currentValue;
for (int i = 0; i < nodes.size(); i++) {
current = bnm.getBlankNode();
if (root == null)
root = current;
currentValue = nodes.get(i);
// add an rdf:first link to the current value
triples.add(new TriplePatternNode(current, RDF.first, currentValue));
if (last != null)
triples.add(new TriplePatternNode(last, RDF.rest, current));
last = current;
}
// add the nil cap at the end of the list
triples.add(new TriplePatternNode(last, RDF.rest, RDF.nil));
return root;
}
protected static Number negate(Number n) {
if (n instanceof BigDecimal) {
return ((BigDecimal) n).negate();
} else if (n instanceof BigInteger) {
return ((BigInteger) n).negate();
} else if (n instanceof Byte) {
return Byte.valueOf((byte) (n.byteValue() * -1));
} else if (n instanceof Double) {
return Double.valueOf(n.doubleValue() * -1);
} else if (n instanceof Float) {
return Float.valueOf(n.floatValue() * -1);
} else if (n instanceof Integer) {
return Integer.valueOf(n.intValue() * -1);
} else if (n instanceof Long) {
return Long.valueOf(n.longValue() * -1);
} else if (n instanceof Short) {
return Short.valueOf((short) (n.shortValue() * -1));
}
throw new GlitterRuntimeException(ExceptionConstants.GLITTER.NO_NEGATE, n.toString());
}
protected static java.net.URI token2uri(Token t) throws ParseException {
return java.net.URI.create(t.image.substring(1, t.image.length() - 1));
}
protected static org.openanzo.rdf.URI createUri(String s) throws ParseException {
return MemURI.create(s);
}
}