package org.rascalmpl.library.experiments.Compiler.RVM.Interpreter;
import java.io.IOException;
import java.io.Reader;
import java.net.URISyntaxException;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import org.rascalmpl.interpreter.IEvaluatorContext; // TODO: remove import: YES
import org.rascalmpl.interpreter.asserts.Ambiguous;
import org.rascalmpl.interpreter.types.NonTerminalType; // remove import: NO
import org.rascalmpl.interpreter.types.ReifiedType; // remove import: NO
import org.rascalmpl.library.lang.rascal.syntax.RascalParser;
import org.rascalmpl.library.util.PathConfig;
import org.rascalmpl.parser.gtd.IGTD;
import org.rascalmpl.parser.gtd.exception.ParseError;
import org.rascalmpl.parser.gtd.exception.UndeclaredNonTerminalException;
import org.rascalmpl.parser.gtd.io.InputConverter;
import org.rascalmpl.parser.gtd.recovery.IRecoverer;
import org.rascalmpl.parser.gtd.result.action.IActionExecutor;
import org.rascalmpl.parser.gtd.result.out.DefaultNodeFlattener;
import org.rascalmpl.parser.uptr.UPTRNodeFactory;
import org.rascalmpl.uri.URIResolverRegistry;
import org.rascalmpl.uri.URIUtil;
import org.rascalmpl.value.IConstructor;
import org.rascalmpl.value.IInteger;
import org.rascalmpl.value.IList;
import org.rascalmpl.value.IListWriter;
import org.rascalmpl.value.IMap;
import org.rascalmpl.value.IMapWriter;
import org.rascalmpl.value.INode;
import org.rascalmpl.value.ISet;
import org.rascalmpl.value.ISetWriter;
import org.rascalmpl.value.ISourceLocation;
import org.rascalmpl.value.IString;
import org.rascalmpl.value.IValue;
import org.rascalmpl.value.IValueFactory;
import org.rascalmpl.value.type.Type;
import org.rascalmpl.values.uptr.ITree;
import org.rascalmpl.values.uptr.ProductionAdapter;
import org.rascalmpl.values.uptr.RascalValueFactory;
import org.rascalmpl.values.uptr.SymbolAdapter;
import org.rascalmpl.values.uptr.TreeAdapter;
import org.rascalmpl.values.uptr.visitors.IdentityTreeVisitor;
import com.github.benmanes.caffeine.cache.Cache;
import com.github.benmanes.caffeine.cache.Caffeine;
public class ParsingTools {
private final IValueFactory vf;
//TODO this cache can move to RascalexecutionContext once we are fully boostrapped and independent of the Interpreter:
// reason: parseFragment called from the interpreter creates a new REX and destrouys caching.
private Cache<IValue, Class<IGTD<IConstructor, ITree, ISourceLocation>>> parserCache;
private final int parserCacheSize = 30;
private final boolean paserCacheEnabled = true;
public ParsingTools(IValueFactory vf){
super();
this.vf = vf;
parserCache = Caffeine.newBuilder()
// .weakKeys()
.weakValues()
// .recordStats()
.maximumSize(paserCacheEnabled ? parserCacheSize : 0)
.build();
}
private IGTD<IConstructor, ITree, ISourceLocation> getObjectParser(IString moduleName, IValue start, ISourceLocation loc, IMap syntax, RascalExecutionContext rex) throws IOException{
return getParser(moduleName.getValue(), start, loc, syntax, rex);
}
private boolean isBootstrapper() {
return false;
}
/**
* Parse text from a string
* @param start Start symbol
* @param input Text to be parsed as string
* @param currentFrame Frame that calls parse function
* @param rex RascalExecutionContext
* @return ParseTree or Exception
*/
public IValue parse(IString moduleName, IValue start, IString input, boolean allowAmbiguity, Frame currentFrame, RascalExecutionContext rex) {
return parse(moduleName, start, vf.mapWriter().done(), URIUtil.invalidLocation(), input.getValue().toCharArray(), allowAmbiguity, currentFrame, rex);
}
/**
* Parse text from a string
* @param start Start symbol
* @param input Text to be parsed as string
* @param location Location of that text
* @param currentFrame Frame that calls parse function
* @param rex RascalExecutionContext
* @return ParseTree or Exception
*/
public IValue parse(IString moduleName, IValue start, IString input, ISourceLocation location, boolean allowAmbiguity, Frame currentFrame, RascalExecutionContext rex) {
return parse(moduleName, start, vf.mapWriter().done(), location, input.getValue().toCharArray(), allowAmbiguity, currentFrame, rex);
}
/**
* Parse text at a location
* @param moduleName Name of module in which grammar is defined
* @param start Start symbol
* @param currentFrame Frame that calls parse function
* @param rex RascalExecutionContext
* @param input To be parsed as location
* @return ParseTree or Exception
*/
public IValue parse(IString moduleName, IValue start, ISourceLocation location, boolean allowAmbiguity, Frame currentFrame, RascalExecutionContext rex) {
// IRascalMonitor old = setMonitor(monitor);
try{
char[] input = getResourceContent(location);
return parse(moduleName, start, vf.mapWriter().done(), location, input, allowAmbiguity, currentFrame, rex);
}catch(IOException ioex){
throw RascalRuntimeException.io(vf.string(ioex.getMessage()), currentFrame);
} finally{
// setMonitor(old);
}
}
/**
* The actual parse work horse
* @param moduleName Name of module in which grammar is defined
* @param start Start symbol
* @param robust Error recovery map
* @param location Location where input text comes from
* @param input Input text as char array
* @param currentFrame Stacktrace of calling context
* @param rex RascalExecutionContext
* @return
*/
public IValue parse(IString moduleName, IValue start, IMap robust, ISourceLocation location, char[] input, boolean allowAmbiguity, Frame currentFrame, RascalExecutionContext rex) {
Type reified = start.getType();
IConstructor startSort = checkPreconditions(start, reified, currentFrame);
IMap syntax = (IMap) ((IConstructor) start).get(1);
try {
IConstructor pt = parseObject(moduleName, startSort, robust, location, input, syntax, allowAmbiguity, rex);
return pt;
}
catch (ParseError pe) {
ISourceLocation errorLoc = vf.sourceLocation(vf.sourceLocation(pe.getLocation()), pe.getOffset(), pe.getLength(), pe.getBeginLine() + 1, pe.getEndLine() + 1, pe.getBeginColumn(), pe.getEndColumn());
throw RascalRuntimeException.parseError(errorLoc, currentFrame);
}
catch (Ambiguous e) {
ITree tree = e.getTree();
throw RascalRuntimeException.ambiguity(e.getLocation(),
vf.string(SymbolAdapter.toString(TreeAdapter.getType(tree), false)),
vf.string(TreeAdapter.yield(tree)), currentFrame);
}
catch (UndeclaredNonTerminalException e){
throw new CompilerError("Undeclared non-terminal: " + e.getName() + ", " + e.getClassName(), currentFrame);
}
catch (Exception e) {
throw new CompilerError("Unexpected exception:" + e, currentFrame);
}
}
public IString unparse(IConstructor tree) {
return vf.string(TreeAdapter.yield(tree));
}
/**
* Chek that start symbol is valid
* @param start Start symbol, as IValue
* @param reified Reified type, that shoud represent a non-terminal type
* @return Start symbol represented as Symbol
*/
private static IConstructor checkPreconditions(IValue start, Type reified, Frame currentFrame) {
if (!(reified instanceof ReifiedType)) {
throw RascalRuntimeException.invalidArgument(start, currentFrame, "A reified type is required instead of " + reified);
}
Type nt = reified.getTypeParameters().getFieldType(0);
if (!(nt instanceof NonTerminalType)) {
throw RascalRuntimeException.invalidArgument(start, currentFrame, "A non-terminal type is required instead of " + nt);
}
IConstructor symbol = ((NonTerminalType) nt).getSymbol();
return symbol;
}
/**
* The actual parse object that is connected to a generated parser
* @param moduleName Name of module in which grammar is defined
* @param startSort Start symbol
* @param robust Error recovery map
* @param location Location where input text comes from
* @param input Actual input text as char array
* @param syntax Syntax as map[Symbol,Production]
* @param rex RascalExecutionContext
* @return ParseTree or Exception
* @throws IOException
*/
@SuppressWarnings("unchecked")
public ITree parseObject(IString moduleName, IConstructor startSort, IMap robust, ISourceLocation location, char[] input, IMap syntax, boolean allowAmbiguity, RascalExecutionContext rex) throws IOException{
IGTD<IConstructor, ITree, ISourceLocation> parser = getObjectParser(moduleName, startSort, location, syntax, rex);
String name = ""; moduleName.getValue();
if (SymbolAdapter.isStartSort(startSort)) {
name = "start__";
startSort = SymbolAdapter.getStart(startSort);
}
if (SymbolAdapter.isSort(startSort) || SymbolAdapter.isLex(startSort) || SymbolAdapter.isLayouts(startSort)) {
name += SymbolAdapter.getName(startSort);
}
int[][] lookaheads = new int[robust.size()][];
IConstructor[] robustProds = new IConstructor[robust.size()];
initializeRecovery(robust, lookaheads, robustProds);
//__setInterrupt(false);
IActionExecutor<ITree> exec = new RascalFunctionActionExecutor(rex);
String className = name;
Class<?> clazz;
for (ClassLoader cl: rex.getClassLoaders()) {
try {
clazz = cl.loadClass(className);
parser = (IGTD<IConstructor, ITree, ISourceLocation>) clazz.newInstance();
break;
} catch (ClassNotFoundException e) {
continue;
} catch (InstantiationException e) {
throw new CompilerError("could not instantiate " + className + " to valid IGTD parser: " + e);
} catch (IllegalAccessException e) {
throw new CompilerError("not allowed to instantiate " + className + " to valid IGTD parser: " + e);
} catch (LinkageError e){
continue;
}
//throw new ImplementationError("class for cached parser " + className + " could not be found");
}
return (ITree) parser.parse(name, location.getURI(), input, exec, new DefaultNodeFlattener<IConstructor, ITree, ISourceLocation>(), new UPTRNodeFactory(allowAmbiguity), (IRecoverer<IConstructor>) null);
}
/**
* This converts a map from productions to character classes to
* two pair-wise arrays, with char-classes unfolded as lists of ints.
*/
private void initializeRecovery(IMap robust, int[][] lookaheads, IConstructor[] robustProds) {
int i = 0;
for (IValue prod : robust) {
robustProds[i] = (IConstructor) prod;
List<Integer> chars = new LinkedList<Integer>();
IList ranges = (IList) robust.get(prod);
for (IValue range : ranges) {
int from = ((IInteger) ((IConstructor) range).get("begin")).intValue();
int to = ((IInteger) ((IConstructor) range).get("end")).intValue();
for (int j = from; j <= to; j++) {
chars.add(j);
}
}
lookaheads[i] = new int[chars.size()];
for (int k = 0; k < chars.size(); k++) {
lookaheads[i][k] = chars.get(k);
}
i++;
}
}
private ParserGenerator parserGenerator;
public ParserGenerator getParserGenerator(RascalExecutionContext rex) throws IOException {
//rex.startJob("Compiled -- Loading parser generator", 40);
if(parserGenerator == null ){
if (isBootstrapper()) {
throw new CompilerError("Cyclic bootstrapping is occurring, probably because a module in the bootstrap dependencies is using the concrete syntax feature.");
}
parserGenerator = new ParserGenerator(rex);
}
//rex.endJob(true);
return parserGenerator;
}
private char[] getResourceContent(ISourceLocation location) throws IOException{
try (Reader in = URIResolverRegistry.getInstance().getCharacterReader(location)) {
return InputConverter.toChar(in);
}
}
public IGTD<IConstructor, ITree, ISourceLocation> getParser(String name, IValue start, ISourceLocation loc, IMap syntax, RascalExecutionContext rex) throws IOException {
//String startAsString = start.toString();
//System.err.println("getParser: " + name + ", bootstrapParser = " + getBootstrap(name, rex) + ", start = " + startAsString.substring(0,Math.min(startAsString.length(), 50)));
if(getBootstrap(name, rex)){
return new RascalParser();
}
ParserGenerator pg = getParserGenerator(rex);
Class<IGTD<IConstructor, ITree, ISourceLocation>> parser = parserCache.get(syntax, k -> pg.getNewParser(rex.getMonitor(), loc, name, syntax, rex));
try {
return parser.newInstance();
} catch (InstantiationException e) {
throw new CompilerError(e.getMessage() + e);
} catch (IllegalAccessException e) {
throw new CompilerError(e.getMessage() + e);
} catch (ExceptionInInitializerError e) {
throw new CompilerError(e.getMessage() + e);
}
}
private boolean getBootstrap(String moduleName, RascalExecutionContext rex) {
return rex.bootstrapParser(moduleName);
}
// Rascal library function (interpreter version)
public ITree parseFragment(IString name, IMap moduleTags, IValue start, IConstructor tree, ISourceLocation loc, IMap grammar, IEvaluatorContext ctx) throws IOException{
IMapWriter w = vf.mapWriter();
w.insert(vf.tuple(name, moduleTags));
RascalExecutionContext rex = null;
try {
rex = RascalExecutionContextBuilder.normalContext(new PathConfig(), ctx.getStdOut(), ctx.getStdErr())
.withModuleTags(w.done())
// .customSearchPath(ctx.getEvaluator().getRascalResolver())
.build();
}
catch (URISyntaxException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
rex.getConfiguration().setRascalJavaClassPathProperty(ctx.getConfiguration().getRascalJavaClassPathProperty());
return parseFragment1(name, start, tree, loc, grammar, rex);
}
// Rascal library function (compiler version)
// TODO moduleTags is only needed in interpreted version
public ITree parseFragment(IString name, IMap moduleTags, IValue start, IConstructor tree, ISourceLocation loc, IMap grammar, RascalExecutionContext rex) throws IOException{
IMapWriter w = vf.mapWriter();
w.insert(vf.tuple(name, moduleTags));
RascalExecutionContext rex2 = RascalExecutionContextBuilder.normalContext(rex.getPathConfig(), rex.getStdOut(), rex.getStdErr())
.withModuleTags(w.done())
// .customSearchPath(rex.getRascalSearchPath())
.build();
return parseFragment1(name, start, tree, loc, grammar, rex2);
}
/**
* This function will reconstruct a parse tree of a single nested concrete syntax fragment
* that has been parsed and its original flat literal string is replaced by a fully structured parse tree.
* @param moduleName Name of module in which grammar is defined
* @param start Start symbol
* @param tree Paree tree to be reconstructed
* @param uri Location where input text comes from
* @param syntax Syntax as map[Symbol,Production]
* @param rex RascalExecutionContext
* @return ParseTree or Exception
* @throws IOException
*
*/
ITree parseFragment1(IString name, IValue start, IConstructor tree, ISourceLocation uri, IMap grammar, RascalExecutionContext rex) throws IOException {
IConstructor prod = (IConstructor) tree.get("prod");
IConstructor def = (IConstructor) prod.get("def");
if(def.getName().equals("label")){
String defName = ((IString) def.get("name")).getValue();
boolean b = defName.equals("$parsed");
if(b) return (ITree) tree;
}
ITree symTree = TreeAdapter.getArg((ITree) tree, "symbol");
ITree lit = TreeAdapter.getArg((ITree) tree, "parts");
Map<String, ITree> antiquotes = new HashMap<String,ITree>();
IGTD<IConstructor, ITree, ISourceLocation> parser = getBootstrap(name.getValue(), rex) ? new RascalParser() : getParser(name.getValue(), start, TreeAdapter.getLocation((ITree) tree), grammar, rex);
try {
String parserMethodName = getParserGenerator(rex).getParserMethodName(symTree, rex);
DefaultNodeFlattener<IConstructor, ITree, ISourceLocation> converter = new DefaultNodeFlattener<IConstructor, ITree, ISourceLocation>();
UPTRNodeFactory nodeFactory = new UPTRNodeFactory(false);
char[] input = replaceAntiQuotesByHoles(lit, antiquotes, rex);
ITree fragment = (ITree) parser.parse(parserMethodName, uri.getURI(), input, converter, nodeFactory);
fragment = replaceHolesByAntiQuotes(fragment, antiquotes);
return fragment;
}
catch (ParseError e) {
ISourceLocation loc = TreeAdapter.getLocation((ITree) tree);
ISourceLocation src = vf.sourceLocation(loc, loc.getOffset() + e.getOffset(), loc.getLength(), loc.getBeginLine() + e.getBeginLine() - 1, loc.getEndLine() + e.getEndLine() - 1, loc.getBeginColumn() + e.getBeginColumn(), loc.getBeginColumn() + e.getEndColumn());
throw RascalRuntimeException.parseError(src, null);
}
}
private char[] replaceAntiQuotesByHoles(ITree lit, Map<String, ITree> antiquotes, RascalExecutionContext rex) throws IOException {
IList parts = TreeAdapter.getArgs(lit);
StringBuilder b = new StringBuilder();
for (IValue elem : parts) {
ITree part = (ITree) elem;
String cons = TreeAdapter.getConstructorName(part);
if (cons.equals("text")) {
b.append(TreeAdapter.yield(part));
}
else if (cons.equals("newline")) {
b.append('\n');
}
else if (cons.equals("lt")) {
b.append('<');
}
else if (cons.equals("gt")) {
b.append('>');
}
else if (cons.equals("bq")) {
b.append('`');
}
else if (cons.equals("bs")) {
b.append('\\');
}
else if (cons.equals("hole")) {
b.append(createHole(part, antiquotes, rex));
}
}
return b.toString().toCharArray();
}
public String createHole(ITree part, Map<String, ITree> antiquotes, RascalExecutionContext rex) throws IOException {
String ph = getParserGenerator(rex).createHole(part, antiquotes.size(), rex);
antiquotes.put(ph, part);
return ph;
}
private ITree replaceHolesByAntiQuotes(ITree fragment, final Map<String, ITree> antiquotes) {
return (ITree) fragment.accept(new IdentityTreeVisitor<CompilerError>() {
@Override
public ITree visitTreeAppl(ITree tree) {
String cons = TreeAdapter.getConstructorName(tree);
if (cons == null || !cons.equals("$MetaHole") ) {
IListWriter w = vf.listWriter();
IList args = TreeAdapter.getArgs(tree);
for (IValue elem : args) {
w.append(elem.accept(this));
}
args = w.done();
return TreeAdapter.setArgs(tree, args);
}
IConstructor type = retrieveHoleType(tree);
return (ITree) antiquotes.get(TreeAdapter.yield(tree)).asAnnotatable().setAnnotation("holeType", type);
}
private IConstructor retrieveHoleType(ITree tree) {
IConstructor prod = TreeAdapter.getProduction(tree);
ISet attrs = ProductionAdapter.getAttributes(prod);
for (IValue attr : attrs) {
if (((IConstructor) attr).getConstructorType() == RascalValueFactory.Attr_Tag) {
IValue arg = ((IConstructor) attr).get(0);
if (arg.getType().isNode() && ((INode) arg).getName().equals("holeType")) {
return (IConstructor) ((INode) arg).get(0);
}
}
}
throw new CompilerError("expected to find a holeType, but did not: " + tree);
}
@Override
public ITree visitTreeAmb(ITree arg) {
ISetWriter w = vf.setWriter();
for (IValue elem : TreeAdapter.getAlternatives(arg)) {
w.insert(elem.accept(this));
}
return (ITree) arg.set("alternatives", w.done());
}
});
}
}