/**
* Copyright (c) 2005-2013 by Appcelerator, Inc. All Rights Reserved.
* Licensed under the terms of the Eclipse Public License (EPL).
* Please see the license.txt included with this distribution for details.
* Any modifications to this file must keep this entire header intact.
*/
package org.python.pydev.parser.fastparser;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import org.python.pydev.core.ObjectsInternPool;
import org.python.pydev.core.ObjectsInternPool.ObjectsPoolMap;
import org.python.pydev.core.docutils.ParsingUtils;
import org.python.pydev.core.docutils.PySelection;
import org.python.pydev.core.docutils.SyntaxErrorException;
import org.python.pydev.core.log.Log;
import org.python.pydev.parser.jython.SimpleNode;
import org.python.pydev.parser.jython.ast.Assign;
import org.python.pydev.parser.jython.ast.Attribute;
import org.python.pydev.parser.jython.ast.ClassDef;
import org.python.pydev.parser.jython.ast.FunctionDef;
import org.python.pydev.parser.jython.ast.Module;
import org.python.pydev.parser.jython.ast.Name;
import org.python.pydev.parser.jython.ast.NameTok;
import org.python.pydev.parser.jython.ast.exprType;
import org.python.pydev.parser.jython.ast.stmtType;
import org.python.pydev.shared_core.callbacks.ICallback;
import org.python.pydev.shared_core.string.FastStringBuffer;
import org.python.pydev.shared_core.string.StringUtils;
import org.python.pydev.shared_core.structure.FastStack;
import org.python.pydev.shared_core.structure.LowMemoryArrayList;
import org.python.pydev.shared_core.structure.Tuple;
/**
* @note: Unfinished
*
* This class should be able to gather the definitions found in a module in a very fast way.
*
* The target is having a performance around 5x faster than doing a regular parse, focusing on getting
* the name tokens for:
*
* classes, functions, class attributes, instance attributes -- basically the tokens that provide a
* definition that can be 'globally' accessed.
*
* This should work the following way:
*
* We should have a single stack where all the statements we find are added. When we find a column
* which indicates a new statement, we close any statement with a column > than the new statement
* and in the process add those statements to the parent statement as needed (or in some cases,
* discard it -- i.e.: method inside method is discarded, but attribute inside method is not).
*
* This means that we usually do not put the final element there, but a wrapper which has a body
* where we can add elements (i.e.: array list), which is converted to a body when its own scope ends.
*
* @author Fabio
*/
public final class FastDefinitionsParser {
private static class NodeEntry {
public final stmtType node;
public final List<SimpleNode> body = new LowMemoryArrayList<>();
public final int logicalColumn;
/**
* leadingTabs: how many cols in node.beginColumn were found as tabs.
*/
public NodeEntry(stmtType stmt, int leadingTabs) {
this.node = stmt;
this.logicalColumn = (stmt.beginColumn - leadingTabs) + (leadingTabs * 8);
}
/**
* Assign the body if we have something.
*/
public void onEndScope() {
if (body.size() > 0) {
stmtType[] array = body.toArray(new stmtType[body.size()]);
if (this.node instanceof ClassDef) {
ClassDef classDef = (ClassDef) this.node;
classDef.body = array;
} else if (this.node instanceof FunctionDef) {
FunctionDef functionDef = (FunctionDef) this.node;
functionDef.body = array;
} else {
String msg = "Assign statement is not expected to have body!";
if (throwErrorOnWarnings) {
throw new RuntimeException(msg);
} else {
Log.log(msg);
}
return;
}
}
}
}
/**
* Set and kept in the constructor
*/
/**
* The chars we should iterate through.
*/
final private char[] cs;
/**
* May be null (just used for reporting errors).
*/
final private File file;
/**
* The length of the buffer we're iterating.
*/
final private int length;
/**
* Current iteration index
*/
private int currIndex = 0;
/**
* The current column
*/
private int col;
/**
* How many leading tabs we've found.
*/
private int leadingTabsInLine;
/**
* The current row
*/
private int row = 0;
/**
* The column where the 1st char was found
*/
private int firstCharCol = 1;
/**
* Holds things added to the 'global' module
*/
private final ArrayList<stmtType> body = new ArrayList<stmtType>(16);
/**
* Holds a stack of classes so that we create a new one in each new scope to be filled and when the scope is ended,
* it should have its body filled with the stackBody contents related to each
*/
private final FastStack<NodeEntry> stack = new FastStack<NodeEntry>(10);
/**
* Buffer with the contents of a line.
*/
private final FastStringBuffer lineBuffer = new FastStringBuffer();
private final String moduleName;
public static boolean throwErrorOnWarnings = false;
/**
* Should we debug?
*/
private final static boolean DEBUG = false;
private FastDefinitionsParser(char[] cs, String moduleName, File f) {
this(cs, cs.length, moduleName, f);
}
/**
* Constructor
*
* @param cs array of chars that should be considered.
* @param len the number of chars to be used (usually cs.length).
*/
private FastDefinitionsParser(char[] cs, int len, String moduleName, File f) {
this.cs = cs;
this.length = len;
this.moduleName = moduleName;
this.file = f;
}
/**
* This is the method that actually extracts things from the passed buffer.
* @throws SyntaxErrorException
*/
private void extractBody() throws SyntaxErrorException {
ParsingUtils parsingUtils = ParsingUtils.create(cs, false, length);
if (currIndex < length) {
handleNewLine(parsingUtils);
}
//in the 1st attempt to handle the 1st line, if it had nothing we could actually go backward 1 char
if (currIndex < 0) {
currIndex = 0;
}
for (; currIndex < length; currIndex++, col++) {
char c = cs[currIndex];
switch (c) {
case '\'':
case '"':
if (DEBUG) {
System.out.println("literal");
}
//go to the end of the literal
int initialIndex = currIndex;
currIndex = parsingUtils.getLiteralEnd(currIndex, c);
//keep the row count correct
updateCountRow(initialIndex, currIndex);
break;
case '#':
if (DEBUG) {
System.out.println("comment");
}
//go to the end of the comment
while (currIndex < length) {
c = cs[currIndex];
if (c == '\r' || c == '\n') {
currIndex--;
break;
}
currIndex++;
}
break;
case '{':
case '[':
case '(':
//starting some call, dict, list, tuple... those don't count on getting some actual definition
initialIndex = currIndex;
currIndex = parsingUtils.eatPar(currIndex, null, c);
//keep the row count correct
updateCountRow(initialIndex, currIndex);
break;
case '\r':
if (currIndex < length - 1 && cs[currIndex + 1] == '\n') {
currIndex++;
}
/*FALLTHROUGH**/
case '\n':
currIndex++;
handleNewLine(parsingUtils);
if (currIndex < length) {
c = cs[currIndex];
}
break;
case '=':
if ((currIndex < length - 1 && cs[currIndex + 1] != '=' && currIndex > 0
&& cs[currIndex - 1] != '=')) {
//should not be ==
//other cases such as !=, +=, -= are already treated because they don't constitute valid
//chars for an identifier.
if (DEBUG) {
System.out.println("Found possible attribute:" + lineBuffer + " col:" + firstCharCol);
}
//if we've an '=', let's get the whole line contents to analyze...
//Note: should have stopped just before the new line (so, as we'll do currIndex++ in the
//next loop, that's ok).
initialIndex = currIndex;
currIndex = parsingUtils.getFullFlattenedLine(currIndex, lineBuffer);
//keep the row count correct
updateCountRow(initialIndex, currIndex);
String equalsLine = lineBuffer.toString().trim();
if (!PySelection.startsWithIndentToken(equalsLine)) {
lineBuffer.clear();
final List<String> splitted = StringUtils.split(equalsLine, '=');
final int splittedLen = splitted.size();
ArrayList<exprType> targets = new ArrayList<exprType>(2);
for (int j = 0; j < splittedLen - 1 || (splittedLen == 1 && j == 0); j++) { //we don't want to get the last one.
String lineContents = splitted.get(j).trim();
if (lineContents.length() == 0) {
continue;
}
boolean add = true;
int lineContentsLen = lineContents.length();
for (int i = 0; i < lineContentsLen; i++) {
char lineC = lineContents.charAt(i);
//can only be made of valid java chars (no spaces or similar things)
if (lineC != '.' && !Character.isJavaIdentifierPart(lineC)) {
add = false;
break;
}
}
if (add) {
//only add if it was something valid
if (lineContents.indexOf('.') != -1) {
List<String> dotSplit = StringUtils.dotSplit(lineContents);
if (dotSplit.size() == 2 && dotSplit.get(0).equals("self")) {
Attribute attribute = new Attribute(new Name("self", Name.Load, false),
new NameTok(dotSplit.get(1), NameTok.Attrib), Attribute.Load);
targets.add(attribute);
}
} else {
Name name = new Name(lineContents, Name.Store, false);
targets.add(name);
}
}
}
if (targets.size() > 0) {
Assign assign = new Assign(targets.toArray(new exprType[targets.size()]), null, null);
assign.beginColumn = this.firstCharCol;
assign.beginLine = this.row;
stack.push(new NodeEntry(assign, leadingTabsInLine));
}
}
}
//No default
}
lineBuffer.append(c);
}
endScopesInStack(0);
}
public void updateCountRow(int initialIndex, int currIndex) {
char c;
int len = length;
for (int k = initialIndex; k < len && k <= currIndex; k++) {
c = cs[k];
switch (c) {
case '\n':
row += 1;
break;
case '\r':
row += 1;
if (k < len - 1 && k <= currIndex - 1) {
if (cs[k + 1] == '\n') {
k++; //skip the \n after the \r
}
}
break;
}
}
}
/**
* Called when a new line is found. Tries to make the match of function and class definitions.
* @throws SyntaxErrorException
*/
private void handleNewLine(ParsingUtils parsingUtils) throws SyntaxErrorException {
if (currIndex >= length - 1) {
return;
}
col = 1;
leadingTabsInLine = 0;
row++;
if (DEBUG) {
System.out.println("Handling new line:" + row);
}
lineBuffer.clear();
char c = cs[currIndex];
while (currIndex < length - 1 && Character.isWhitespace(c) && c != '\r' && c != '\n') {
currIndex++;
col++;
if (c == '\t') {
leadingTabsInLine++;
}
c = cs[currIndex];
}
if (!Character.isWhitespace(c) && c != '#') {
endScopesInStack((col - leadingTabsInLine) + (leadingTabsInLine * 8));
}
int funcDefIndex = -1;
if (c == 'c' && matchClass()) {
int startClassCol = col;
currIndex += 6;
col += 6;
if (this.length <= currIndex) {
return;
}
startClass(getNextIdentifier(c), row, startClassCol, leadingTabsInLine);
} else if ((c == 'd' && (funcDefIndex = matchFunction()) != -1) ||
(c == 'a' && (funcDefIndex = matchAsyncFunction()) != -1)) {
if (DEBUG) {
System.out.println("Found method");
}
int startMethodCol = col;
currIndex = funcDefIndex + 1;
col = funcDefIndex + 1;
if (this.length <= currIndex) {
return;
}
startMethod(getNextIdentifier(c), row, startMethodCol, leadingTabsInLine);
}
firstCharCol = col;
if (currIndex < length) {
//starting some call, dict, list, tuple... those don't count on getting some actual definition
int initialIndex = currIndex;
int tempIndex = skipWhitespaces(currIndex);
if (tempIndex >= length) {
return;
}
c = cs[tempIndex];
boolean updateIndex = false;
switch (c) {
case '(':
tempIndex = parsingUtils.eatPar(tempIndex, null, c);
if (tempIndex < length) {
tempIndex = skipWhitespaces(tempIndex);
c = cs[tempIndex];
if (c == ')') {
tempIndex++;
}
}
if (tempIndex < length) {
tempIndex = skipWhitespaces(tempIndex);
c = cs[tempIndex];
if (c == ':') {
tempIndex++;
tempIndex = skipWhitespaces(tempIndex);
if (tempIndex < length) {
c = cs[tempIndex];
if (c != '\r' && c != '\n') {
updateIndex = true;
}
}
}
}
if (updateIndex) {
tempIndex = skipWhitespaces(tempIndex);
currIndex = tempIndex;
//keep the row count correct
updateCountRow(initialIndex, currIndex);
//now, update the first char col to be the char after the ':' in "def m2(self):", in a line as
//def m2(self): self.a = 10 (all in a single line)
int i = tempIndex;
while (i > 0 && i < length) {
c = cs[i];
if (c == '\r' || c == '\n') {
break;
}
i--;
}
firstCharCol = tempIndex - i;
} else {
currIndex--;
}
break;
default:
currIndex--;
break;
}
}
}
/**
* Note that it'll only skip whitespaces (not newlines)
*/
private int skipWhitespaces(int tempIndex) {
char c;
while (tempIndex < length) {
c = cs[tempIndex];
if (c == ' ' || c == '\t') {
tempIndex++;
} else {
break;
}
}
return tempIndex;
}
/**
* Get the next identifier available.
* @param c the current char
* @return the identifier found
*/
private String getNextIdentifier(char c) {
c = this.cs[currIndex];
while (currIndex < length && Character.isWhitespace(c)) {
currIndex++;
c = this.cs[currIndex];
}
int currClassNameCol = currIndex;
while (Character.isJavaIdentifierPart(c)) {
currIndex++;
if (currIndex >= length) {
break;
}
c = this.cs[currIndex];
}
return ObjectsInternPool.internLocal(interned,
new String(this.cs, currClassNameCol, currIndex - currClassNameCol));
}
private final ObjectsPoolMap interned = new ObjectsPoolMap();
/**
* Start a new method scope with the given row and column.
* @param startMethodRow the row where the scope should start
* @param startMethodCol the column where the scope should start
*/
private void startMethod(String name, int startMethodRow, int startMethodCol, int leadingTabs) {
NameTok nameTok = new NameTok(name, NameTok.ClassName);
FunctionDef functionDef = new FunctionDef(nameTok, null, null, null, null, false);
functionDef.beginLine = startMethodRow;
functionDef.beginColumn = startMethodCol;
stack.push(new NodeEntry(functionDef, leadingTabs));
}
/**
* Start a new class scope with the given row and column.
* @param startClassRow the row where the scope should start
* @param startClassCol the column where the scope should start
*/
private void startClass(String name, int startClassRow, int startClassCol, int leadingTabs) {
NameTok nameTok = new NameTok(name, NameTok.ClassName);
ClassDef classDef = new ClassDef(nameTok, null, null, null, null, null, null);
classDef.beginLine = startClassRow;
classDef.beginColumn = startClassCol;
stack.push(new NodeEntry(classDef, leadingTabs));
}
private void endScopesInStack(int currLogicalCol) {
while (stack.size() > 0) {
NodeEntry peek = stack.peek();
if (peek.logicalColumn < currLogicalCol) {
break;
}
NodeEntry currNode = stack.pop();
currNode.onEndScope();
if (stack.size() > 0) {
NodeEntry parentNode = stack.peek();
if (parentNode.node instanceof FunctionDef) {
// Inside a function def, only deal with attributes (if func inside class)
if (currNode.node instanceof Assign) {
if (stack.size() > 1) {
Assign assign = (Assign) currNode.node;
exprType target = assign.targets[0];
if (target instanceof Attribute) {
NodeEntry parentParents = stack.peek(1);
if (parentParents.node instanceof ClassDef) {
parentNode.body.add(currNode.node);
}
}
}
}
} else if (parentNode.node instanceof ClassDef) {
parentNode.body.add(currNode.node);
} else {
String msg = "Did not expect to find item below node: " + parentNode.node + " (module: "
+ this.moduleName + " file: " + this.file + " row: " + row
+ ").";
if (throwErrorOnWarnings) {
throw new RuntimeException(msg);
} else {
Log.log(msg);
}
}
} else {
body.add(currNode.node);
}
}
}
/**
* @return true if we have a match for 'class' in the current index (the 'c' must be already matched at this point)
*/
private boolean matchClass() {
if (currIndex + 5 >= this.length) {
return false;
}
return (this.cs[currIndex + 1] == 'l' && this.cs[currIndex + 2] == 'a' && this.cs[currIndex + 3] == 's'
&& this.cs[currIndex + 4] == 's' && Character.isWhitespace(this.cs[currIndex + 5]));
}
/**
* @return true if we have a match for 'def' in the current index (the 'd' must be already matched at this point)
*/
private int matchFunction() {
if (currIndex + 3 >= this.length) {
return -1;
}
if (this.cs[currIndex + 1] == 'e' && this.cs[currIndex + 2] == 'f' && Character
.isWhitespace(this.cs[currIndex + 3])) {
return currIndex + 3;
}
return -1;
}
/**
* @return true if we have a match for 'async def' in the current index (the 'a' must be already matched at this point)
*/
private int matchAsyncFunction() {
if (currIndex + 5 >= this.length) {
return -1;
}
if (this.cs[currIndex + 1] == 's' && this.cs[currIndex + 2] == 'y'
&& this.cs[currIndex + 3] == 'n' && this.cs[currIndex + 4] == 'c' && Character
.isWhitespace(this.cs[currIndex + 5])) {
int i = currIndex + 6;
while (i < this.length && Character.isWhitespace(this.cs[i])) {
i += 1;
}
if (i + 3 >= this.length) {
return -1;
}
if (this.cs[i] == 'd' && this.cs[i + 1] == 'e' && this.cs[i + 2] == 'f' && Character
.isWhitespace(this.cs[i + 3])) {
return i + 3;
}
}
return -1;
}
/**
* Callbacks called just before returning a parsed object. Used for tests
*/
public static List<ICallback<Object, Tuple<String, SimpleNode>>> parseCallbacks = new ArrayList<ICallback<Object, Tuple<String, SimpleNode>>>();
/**
* Convenience method for parse(s.toCharArray())
* @param s the string to be parsed
* @return a Module node with the structure found
*/
public static SimpleNode parse(String s, String moduleName, File f) {
return parse(s.toCharArray(), moduleName, f);
}
/**
* This method will parse the char array passed and will build a structure with the contents of the file.
* @param cs the char array to be parsed
* @return a Module node with the structure found
*/
public static SimpleNode parse(char[] cs, String moduleName, File f) {
return parse(cs, moduleName, cs.length, f);
}
public static SimpleNode parse(char[] cs, String moduleName, int len, File f) {
FastDefinitionsParser parser = new FastDefinitionsParser(cs, len, moduleName, f);
try {
parser.extractBody();
} catch (SyntaxErrorException e) {
throw new RuntimeException(e);
} catch (StackOverflowError e) {
RuntimeException runtimeException = new RuntimeException(e);
Log.log("Error parsing: " + moduleName + " - " + f + "\nContents:\n"
+ new String(cs, 0, len > 1000 ? 1000 : len),
runtimeException); //report at most 1000 chars...
throw runtimeException;
}
List<stmtType> body = parser.body;
Module ret = new Module(body.toArray(new stmtType[body.size()]));
if (parseCallbacks.size() > 0) {
Tuple<String, SimpleNode> arg = new Tuple<String, SimpleNode>(moduleName, ret);
for (ICallback<Object, Tuple<String, SimpleNode>> c : parseCallbacks) {
c.call(arg);
}
}
return ret;
}
public static SimpleNode parse(String s) {
return parse(s.toCharArray(), null, null);
}
}