/**
* (C) Copyright IBM Corp. 2010, 2015
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package com.ibm.bi.dml.parser.python;
import java.io.ByteArrayInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
import org.antlr.v4.runtime.ANTLRInputStream;
import org.antlr.v4.runtime.BailErrorStrategy;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.DefaultErrorStrategy;
import org.antlr.v4.runtime.atn.PredictionMode;
import org.antlr.v4.runtime.misc.ParseCancellationException;
import org.antlr.v4.runtime.tree.ParseTree;
import org.antlr.v4.runtime.tree.ParseTreeWalker;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.ibm.bi.dml.api.DMLScript;
import com.ibm.bi.dml.parser.AParserWrapper;
import com.ibm.bi.dml.parser.DMLProgram;
import com.ibm.bi.dml.parser.ForStatement;
import com.ibm.bi.dml.parser.ForStatementBlock;
import com.ibm.bi.dml.parser.FunctionStatementBlock;
import com.ibm.bi.dml.parser.IfStatement;
import com.ibm.bi.dml.parser.IfStatementBlock;
import com.ibm.bi.dml.parser.ImportStatement;
import com.ibm.bi.dml.parser.LanguageException;
import com.ibm.bi.dml.parser.ParForStatement;
import com.ibm.bi.dml.parser.ParForStatementBlock;
import com.ibm.bi.dml.parser.ParseException;
import com.ibm.bi.dml.parser.Statement;
import com.ibm.bi.dml.parser.StatementBlock;
import com.ibm.bi.dml.parser.WhileStatement;
import com.ibm.bi.dml.parser.WhileStatementBlock;
import com.ibm.bi.dml.parser.antlr4.DMLParserWrapper;
import com.ibm.bi.dml.parser.python.PydmlParser.FunctionStatementContext;
import com.ibm.bi.dml.parser.python.PydmlParser.PmlprogramContext;
import com.ibm.bi.dml.parser.python.PydmlParser.StatementContext;
import com.ibm.bi.dml.parser.python.PydmlSyntacticErrorListener.CustomDmlErrorListener;
/**
* Logic of this wrapper is similar to DMLParserWrapper.
*
* Note: ExpressionInfo and StatementInfo are simply wrapper objects and are reused in both DML and PyDML parsers.
*
*/
public class PyDMLParserWrapper extends AParserWrapper
{
private static final Log LOG = LogFactory.getLog(DMLScript.class.getName());
/**
* Custom wrapper to convert statement into statement blocks. Called by doParse and in PydmlSyntacticValidator for for, parfor, while, ...
* @param current a statement
* @return corresponding statement block
*/
public static StatementBlock getStatementBlock(com.ibm.bi.dml.parser.Statement current) {
StatementBlock blk = null;
if(current instanceof ParForStatement) {
blk = new ParForStatementBlock();
blk.addStatement(current);
}
else if(current instanceof ForStatement) {
blk = new ForStatementBlock();
blk.addStatement(current);
}
else if(current instanceof IfStatement) {
blk = new IfStatementBlock();
blk.addStatement(current);
}
else if(current instanceof WhileStatement) {
blk = new WhileStatementBlock();
blk.addStatement(current);
}
else {
// This includes ImportStatement
blk = new StatementBlock();
blk.addStatement(current);
}
return blk;
}
/**
* Parses the passed file with command line parameters. You can either pass both (local file) or just dmlScript (hdfs) or just file name (import command)
* @param fileName either full path or null --> only used for better error handling
* @param dmlScript required
* @param argVals
* @return
* @throws ParseException
*/
@Override
public DMLProgram parse(String fileName, String dmlScript, HashMap<String,String> argVals) throws ParseException {
DMLProgram prog = null;
if(dmlScript == null || dmlScript.trim().isEmpty()) {
throw new ParseException("Incorrect usage of parse. Please pass dmlScript not just filename");
}
// Set the pipeline required for ANTLR parsing
PyDMLParserWrapper parser = new PyDMLParserWrapper();
prog = parser.doParse(fileName, dmlScript, argVals);
if(prog == null) {
throw new ParseException("One or more errors found during parsing. (could not construct AST for file: " + fileName + "). Cannot proceed ahead.");
}
return prog;
}
/**
* This function is supposed to be called directly only from PydmlSyntacticValidator when it encounters 'import'
* @param fileName
* @return null if atleast one error
*/
public DMLProgram doParse(String fileName, String dmlScript, HashMap<String,String> argVals) throws ParseException {
DMLProgram dmlPgm = null;
ANTLRInputStream in;
try {
if(dmlScript == null) {
dmlScript = DMLParserWrapper.readDMLScript(fileName);
}
InputStream stream = new ByteArrayInputStream(dmlScript.getBytes());
in = new org.antlr.v4.runtime.ANTLRInputStream(stream);
// else {
// if(!(new File(fileName)).exists()) {
// throw new ParseException("ERROR: Cannot open file:" + fileName);
// }
// in = new ANTLRInputStream(new FileInputStream(fileName));
// }
} catch (FileNotFoundException e) {
throw new ParseException("ERROR: Cannot find file:" + fileName);
} catch (IOException e) {
throw new ParseException("ERROR: Cannot open file:" + fileName);
} catch (LanguageException e) {
throw new ParseException("ERROR: " + e.getMessage());
}
PmlprogramContext ast = null;
CustomDmlErrorListener errorListener = new CustomDmlErrorListener();
try {
PydmlLexer lexer = new PydmlLexer(in);
CommonTokenStream tokens = new CommonTokenStream(lexer);
PydmlParser antlr4Parser = new PydmlParser(tokens);
boolean tryOptimizedParsing = false; // For now no optimization, since it is not able to parse integer value.
if(tryOptimizedParsing) {
// Try faster and simpler SLL
antlr4Parser.getInterpreter().setPredictionMode(PredictionMode.SLL);
antlr4Parser.removeErrorListeners();
antlr4Parser.setErrorHandler(new BailErrorStrategy());
try{
ast = antlr4Parser.pmlprogram();
// If successful, no need to try out full LL(*) ... SLL was enough
}
catch(ParseCancellationException ex) {
// Error occurred, so now try full LL(*) for better error messages
tokens.reset();
antlr4Parser.reset();
if(fileName != null) {
errorListener.pushCurrentFileName(fileName);
}
else {
errorListener.pushCurrentFileName("MAIN_SCRIPT");
}
// Set our custom error listener
antlr4Parser.addErrorListener(errorListener);
antlr4Parser.setErrorHandler(new DefaultErrorStrategy());
antlr4Parser.getInterpreter().setPredictionMode(PredictionMode.LL);
ast = antlr4Parser.pmlprogram();
}
}
else {
// Set our custom error listener
antlr4Parser.removeErrorListeners();
antlr4Parser.addErrorListener(errorListener);
errorListener.pushCurrentFileName(fileName);
// Now do the parsing
ast = antlr4Parser.pmlprogram();
}
}
catch(Exception e) {
throw new ParseException("ERROR: Cannot parse the program:" + fileName);
}
try {
// Now convert the parse tree into DMLProgram
// Do syntactic validation while converting
ParseTree tree = ast;
// And also do syntactic validation
ParseTreeWalker walker = new ParseTreeWalker();
PydmlSyntacticValidatorHelper helper = new PydmlSyntacticValidatorHelper(errorListener);
PydmlSyntacticValidator validator = new PydmlSyntacticValidator(helper, fileName, argVals);
walker.walk(validator, tree);
errorListener.popFileName();
if(errorListener.isAtleastOneError()) {
return null;
}
dmlPgm = createDMLProgram(ast);
}
catch(Exception e) {
throw new ParseException("ERROR: Cannot translate the parse tree into DMLProgram:" + e.getMessage());
}
return dmlPgm;
}
private DMLProgram createDMLProgram(PmlprogramContext ast) {
DMLProgram dmlPgm = new DMLProgram();
// First add all the functions
for(FunctionStatementContext fn : ast.functionBlocks) {
FunctionStatementBlock functionStmtBlk = new FunctionStatementBlock();
functionStmtBlk.addStatement(fn.info.stmt);
try {
// TODO: currently the logic of nested namespace is not clear.
String namespace = DMLProgram.DEFAULT_NAMESPACE;
dmlPgm.addFunctionStatementBlock(namespace, fn.info.functionName, functionStmtBlk);
} catch (LanguageException e) {
LOG.error("line: " + fn.start.getLine() + ":" + fn.start.getCharPositionInLine() + " cannot process the function " + fn.info.functionName);
return null;
}
}
// Then add all the statements
for(StatementContext stmtCtx : ast.blocks) {
Statement current = stmtCtx.info.stmt;
if(current == null) {
LOG.error("line: " + stmtCtx.start.getLine() + ":" + stmtCtx.start.getCharPositionInLine() + " cannot process the statement");
return null;
}
// Ignore Newline logic
if(current.isEmptyNewLineStatement()) {
continue;
}
if(current instanceof ImportStatement) {
// Handle import statements separately
if(stmtCtx.info.namespaces != null) {
// Add the DMLProgram entries into current program
for(Map.Entry<String, DMLProgram> entry : stmtCtx.info.namespaces.entrySet()) {
dmlPgm.getNamespaces().put(entry.getKey(), entry.getValue());
// // Don't add DMLProgram into the current program, just add function statements
// // dmlPgm.getNamespaces().put(entry.getKey(), entry.getValue());
// // Add function statements to current dml program
// DMLProgram importedPgm = entry.getValue();
//
// try {
// for(FunctionStatementBlock importedFnBlk : importedPgm.getFunctionStatementBlocks()) {
// if(importedFnBlk.getStatements() != null && importedFnBlk.getStatements().size() == 1) {
// String functionName = ((FunctionStatement)importedFnBlk.getStatement(0)).getName();
// dmlPgm.addFunctionStatementBlock(entry.getKey(), functionName, importedFnBlk);
// }
// else {
// LOG.error("line: " + stmtCtx.start.getLine() + ":" + stmtCtx.start.getCharPositionInLine() + " incorrect number of functions in the imported function block .... strange");
// return null;
// }
// }
// if(importedPgm.getStatementBlocks() != null && importedPgm.getStatementBlocks().size() > 0) {
// LOG.warn("Only the functions can be imported from the namespace " + entry.getKey());
// }
// } catch (LanguageException e) {
// LOG.error("line: " + stmtCtx.start.getLine() + ":" + stmtCtx.start.getCharPositionInLine() + " cannot import functions from the file in the import statement");
// return null;
// }
}
}
else {
LOG.error("line: " + stmtCtx.start.getLine() + ":" + stmtCtx.start.getCharPositionInLine() + " cannot process the import statement");
return null;
}
}
// Now wrap statement into individual statement block
// merge statement will take care of merging these blocks
dmlPgm.addStatementBlock(getStatementBlock(current));
}
dmlPgm.mergeStatementBlocks();
return dmlPgm;
}
}