/** * Copyright (c) 2005-2011 by Appcelerator, Inc. All Rights Reserved. * Licensed under the terms of the Eclipse Public License (EPL). * Please see the license.txt included with this distribution for details. * Any modifications to this file must keep this entire header intact. */ package org.python.pydev.parser.fastparser; import java.util.ArrayList; import java.util.List; import org.python.pydev.core.ObjectsPool; import org.python.pydev.core.ObjectsPool.ObjectsPoolMap; import org.python.pydev.core.docutils.ParsingUtils; import org.python.pydev.core.docutils.StringUtils; import org.python.pydev.core.docutils.SyntaxErrorException; import org.python.pydev.core.log.Log; import org.python.pydev.core.structure.FastStack; import org.python.pydev.parser.jython.SimpleNode; import org.python.pydev.parser.jython.ast.Assign; import org.python.pydev.parser.jython.ast.Attribute; import org.python.pydev.parser.jython.ast.ClassDef; import org.python.pydev.parser.jython.ast.FunctionDef; import org.python.pydev.parser.jython.ast.Module; import org.python.pydev.parser.jython.ast.Name; import org.python.pydev.parser.jython.ast.NameTok; import org.python.pydev.parser.jython.ast.exprType; import org.python.pydev.parser.jython.ast.stmtType; import com.aptana.shared_core.callbacks.ICallback; import com.aptana.shared_core.string.FastStringBuffer; import com.aptana.shared_core.structure.Tuple; /** * @note: Unfinished * * This class should be able to gather the definitions found in a module in a very fast way. * * The target is having a performance around 5x faster than doing a regular parse, focusing on getting * the name tokens for: * * classes, functions, class attributes, instance attributes -- basically the tokens that provide a * definition that can be 'globally' accessed. * * @author Fabio */ public final class FastDefinitionsParser { /** * Set and kept in the constructor */ /** * The chars we should iterate through. */ final private char[] cs; /** * The length of the buffer we're iterating. */ final private int length; /** * Current iteration index */ private int currIndex = 0; /** * The current column */ private int col; /** * The current row */ private int row = 0; /** * The column where the 1st char was found */ private int firstCharCol = 1; /** * Holds things added to the 'global' module */ private final ArrayList<stmtType> body = new ArrayList<stmtType>(16); /** * Holds a stack of classes so that we create a new one in each new scope to be filled and when the scope is ended, * it should have its body filled with the stackBody contents related to each */ private final FastStack<SimpleNode> stack = new FastStack<SimpleNode>(20); /** * For each item in the stack, there's a stackBody that has the contents to be added later to that class. */ private final FastStack<List<stmtType>> stackBody = new FastStack<List<stmtType>>(20); /** * Buffer with the contents of a line. */ private final FastStringBuffer lineBuffer = new FastStringBuffer(); /** * Should we debug? */ private final static boolean DEBUG = false; private FastDefinitionsParser(char[] cs) { this(cs, cs.length); } /** * Constructor * * @param cs array of chars that should be considered. * @param len the number of chars to be used (usually cs.length). */ private FastDefinitionsParser(char[] cs, int len) { this.cs = cs; this.length = len; } /** * This is the method that actually extracts things from the passed buffer. * @throws SyntaxErrorException */ private void extractBody() throws SyntaxErrorException { ParsingUtils parsingUtils = ParsingUtils.create(cs, false, length); if (currIndex < length) { handleNewLine(parsingUtils); } //in the 1st attempt to handle the 1st line, if it had nothing we could actually go backward 1 char if (currIndex < 0) { currIndex = 0; } for (; currIndex < length; currIndex++, col++) { char c = cs[currIndex]; switch (c) { case '\'': case '"': if (DEBUG) { System.out.println("literal"); } //go to the end of the literal int initialIndex = currIndex; currIndex = parsingUtils.getLiteralEnd(currIndex, c); //keep the row count correct updateCountRow(initialIndex, currIndex); break; case '#': if (DEBUG) { System.out.println("comment"); } //go to the end of the comment while (currIndex < length) { c = cs[currIndex]; if (c == '\r' || c == '\n') { currIndex--; break; } currIndex++; } break; case '{': case '[': case '(': //starting some call, dict, list, tuple... those don't count on getting some actual definition initialIndex = currIndex; currIndex = parsingUtils.eatPar(currIndex, null, c); //keep the row count correct updateCountRow(initialIndex, currIndex); break; case '\r': if (currIndex < length - 1 && cs[currIndex + 1] == '\n') { currIndex++; } /*FALLTHROUGH**/ case '\n': currIndex++; handleNewLine(parsingUtils); if (currIndex < length) { c = cs[currIndex]; } break; case '=': if (currIndex < length - 1 && cs[currIndex + 1] != '=') { //should not be == //other cases such as !=, +=, -= are already treated because they don't constitute valid //chars for an identifier. if (DEBUG) { System.out.println("Found possible attribute:" + lineBuffer + " col:" + firstCharCol); } //if we've an '=', let's get the whole line contents to analyze... //Note: should have stopped just before the new line (so, as we'll do currIndex++ in the //next loop, that's ok). initialIndex = currIndex; currIndex = parsingUtils.getFullFlattenedLine(currIndex, lineBuffer); //keep the row count correct updateCountRow(initialIndex, currIndex); String equalsLine = lineBuffer.toString().trim(); lineBuffer.clear(); final List<String> splitted = StringUtils.split(equalsLine, '='); final int splittedLen = splitted.size(); ArrayList<exprType> targets = new ArrayList<exprType>(2); for (int j = 0; j < splittedLen - 1 || (splittedLen == 1 && j == 0); j++) { //we don't want to get the last one. String lineContents = splitted.get(j).trim(); if (lineContents.length() == 0) { continue; } boolean add = true; for (int i = 0; i < lineContents.length(); i++) { char lineC = lineContents.charAt(i); //can only be made of valid java chars (no spaces or similar things) if (lineC != '.' && !Character.isJavaIdentifierPart(lineC)) { add = false; break; } } if (add) { //only add if it was something valid if (lineContents.indexOf('.') != -1) { List<String> dotSplit = StringUtils.dotSplit(lineContents); if (dotSplit.size() == 2 && dotSplit.get(0).equals("self")) { Attribute attribute = new Attribute(new Name("self", Name.Load, false), new NameTok(dotSplit.get(1), NameTok.Attrib), Attribute.Load); targets.add(attribute); } } else { Name name = new Name(lineContents, Name.Store, false); targets.add(name); } } } if (targets.size() > 0) { Assign assign = new Assign(targets.toArray(new exprType[targets.size()]), null); assign.beginColumn = this.firstCharCol; assign.beginLine = this.row; addToPertinentScope(assign); } } //No default } lineBuffer.append(c); } endScopesInStack(); } public void updateCountRow(int initialIndex, int currIndex) { char c; int len = length; for (int k = initialIndex; k < len && k <= currIndex; k++) { c = cs[k]; switch (c) { case '\n': row += 1; break; case '\r': row += 1; if (k < len - 1 && k <= currIndex - 1) { if (cs[k + 1] == '\n') { k++; //skip the \n after the \r } } break; } } } /** * Called when a new line is found. Tries to make the match of function and class definitions. * @throws SyntaxErrorException */ private void handleNewLine(ParsingUtils parsingUtils) throws SyntaxErrorException { if (currIndex >= length - 1) { return; } col = 1; row++; if (DEBUG) { System.out.println("Handling new line:" + row); } lineBuffer.clear(); char c = cs[currIndex]; while (currIndex < length - 1 && Character.isWhitespace(c) && c != '\r' && c != '\n') { currIndex++; col++; c = cs[currIndex]; } if (c == 'c' && matchClass()) { int startClassCol = col; currIndex += 6; col += 6; startClass(getNextIdentifier(c), row, startClassCol); } else if (c == 'd' && matchFunction()) { int startMethodCol = col; currIndex += 4; col += 4; startMethod(getNextIdentifier(c), row, startMethodCol); } firstCharCol = col; if (currIndex < length) { //starting some call, dict, list, tuple... those don't count on getting some actual definition int initialIndex = currIndex; int tempIndex = skipWhitespaces(currIndex); if (tempIndex >= length) { return; } c = cs[tempIndex]; boolean updateIndex = false; switch (c) { case '(': tempIndex = parsingUtils.eatPar(tempIndex, null, c); if (tempIndex < length) { tempIndex = skipWhitespaces(tempIndex); c = cs[tempIndex]; if (c == ')') { tempIndex++; } } if (tempIndex < length) { tempIndex = skipWhitespaces(tempIndex); c = cs[tempIndex]; if (c == ':') { tempIndex++; if (tempIndex < length) { c = cs[tempIndex]; if (c != '\r' && c != '\n') { updateIndex = true; } } } } if (updateIndex) { tempIndex = skipWhitespaces(tempIndex); currIndex = tempIndex; //keep the row count correct updateCountRow(initialIndex, currIndex); //now, update the first char col to be the char after the ':' in "def m2(self):", in a line as //def m2(self): self.a = 10 (all in a single line) int i = tempIndex; while (i > 0 && i < length) { c = cs[i]; if (c == '\r' || c == '\n') { break; } i--; } firstCharCol = tempIndex - i; } else { currIndex--; } break; default: currIndex--; break; } } } /** * Note that it'll only skip whitespaces (not newlines) */ private int skipWhitespaces(int tempIndex) { char c; while (tempIndex < length) { c = cs[tempIndex]; if (c == ' ' || c == '\t') { tempIndex++; } else { break; } } return tempIndex; } /** * Get the next identifier available. * @param c the current char * @return the identifier found */ private String getNextIdentifier(char c) { c = this.cs[currIndex]; while (currIndex < length && Character.isWhitespace(c)) { currIndex++; c = this.cs[currIndex]; } int currClassNameCol = currIndex; while (Character.isJavaIdentifierPart(c)) { currIndex++; if (currIndex >= length) { break; } c = this.cs[currIndex]; } return ObjectsPool.internLocal(interned, new String(this.cs, currClassNameCol, currIndex - currClassNameCol)); } private final ObjectsPoolMap interned = new ObjectsPoolMap(); /** * Start a new method scope with the given row and column. * @param startMethodRow the row where the scope should start * @param startMethodCol the column where the scope should start */ private void startMethod(String name, int startMethodRow, int startMethodCol) { if (startMethodCol == 1) { endScopesInStack(); } NameTok nameTok = new NameTok(name, NameTok.ClassName); FunctionDef functionDef = new FunctionDef(nameTok, null, null, null, null); functionDef.beginLine = startMethodRow; functionDef.beginColumn = startMethodCol; addToPertinentScope(functionDef); if (stack.size() == 0) { stack.push(functionDef); } } /** * Start a new class scope with the given row and column. * @param startClassRow the row where the scope should start * @param startClassCol the column where the scope should start */ private void startClass(String name, int startClassRow, int startClassCol) { if (startClassCol == 1) { endScopesInStack(); } NameTok nameTok = new NameTok(name, NameTok.ClassName); ClassDef classDef = new ClassDef(nameTok, null, null, null, null, null, null); classDef.beginLine = startClassRow; classDef.beginColumn = startClassCol; stack.push(classDef); stackBody.push(new ArrayList<stmtType>(10)); } private void endScopesInStack() { while (stack.size() > 0) { endScope(); } } /** * Finish the current scope in the stack. * * May close many scopes in a single call depending on where the class should be added to. */ private void endScope() { SimpleNode pop = stack.pop(); if (!(pop instanceof ClassDef)) { return; } ClassDef def = (ClassDef) pop; List<stmtType> body = stackBody.pop(); def.body = body.toArray(new stmtType[body.size()]); addToPertinentScope(def); } /** * This is the definition to be added to a given scope. * * It'll find a correct scope based on the column it has to be added to. * * @param newStmt the definition to be added */ private void addToPertinentScope(stmtType newStmt) { //see where it should be added (global or class scope) while (stack.size() > 0) { SimpleNode parent = stack.peek(); if (parent.beginColumn < newStmt.beginColumn) { if (parent instanceof FunctionDef) { return; } List<stmtType> peek = stackBody.peek(); if (newStmt instanceof FunctionDef) { int size = peek.size(); if (size > 0) { stmtType existing = peek.get(size - 1); if (existing.beginColumn < newStmt.beginColumn) { //we don't want to add a method inside a method at this point. //all the items added should have the same column. return; } } } else if (newStmt instanceof Assign) { Assign assign = (Assign) newStmt; exprType target = assign.targets[0]; //an assign could be in a method or in a class depending on where we're right now... int size = peek.size(); if (size > 0) { stmtType existing = peek.get(size - 1); if (existing.beginColumn < assign.beginColumn) { //add the assign to the correct place if (existing instanceof FunctionDef) { FunctionDef functionDef = (FunctionDef) existing; if (target instanceof Attribute) { addAssignToFunctionDef(assign, functionDef); } return; } } } //if it still hasn't returned and it's a name, add it to the global scope. if (target instanceof Name) { } } peek.add(newStmt); return; } else { endScope(); } } //if it still hasn't returned, add it to the global this.body.add(newStmt); } /** * Adds an assign statement to the given function definition. * * @param assign the assign to be added * @param functionDef the function definition where it should be added */ private void addAssignToFunctionDef(Assign assign, FunctionDef functionDef) { //if it's an attribute at this point, it'll always start with self! if (functionDef.body == null) { if (functionDef.specialsAfter == null) { functionDef.specialsAfter = new ArrayList<Object>(3); } functionDef.body = new stmtType[10]; functionDef.body[0] = assign; functionDef.specialsAfter.add(1); //real len } else { //already exists... let's add it... as it's an array, we may have to reallocate it Integer currLen = (Integer) functionDef.specialsAfter.get(0); currLen += 1; functionDef.specialsAfter.set(0, currLen); if (functionDef.body.length < currLen) { stmtType[] newBody = new stmtType[functionDef.body.length * 2]; System.arraycopy(functionDef.body, 0, newBody, 0, functionDef.body.length); functionDef.body = newBody; } functionDef.body[currLen - 1] = assign; } } /** * @return true if we have a match for 'class' in the current index (the 'c' must be already matched at this point) */ private boolean matchClass() { if (currIndex + 5 > this.length) { return false; } return (this.cs[currIndex + 1] == 'l' && this.cs[currIndex + 2] == 'a' && this.cs[currIndex + 3] == 's' && this.cs[currIndex + 4] == 's' && Character.isWhitespace(this.cs[currIndex + 5])); } /** * @return true if we have a match for 'def' in the current index (the 'd' must be already matched at this point) */ private boolean matchFunction() { if (currIndex + 3 > this.length) { return false; } return (this.cs[currIndex + 1] == 'e' && this.cs[currIndex + 2] == 'f' && Character .isWhitespace(this.cs[currIndex + 3])); } /** * Callbacks called just before returning a parsed object. Used for tests */ public static List<ICallback<Object, Tuple<String, SimpleNode>>> parseCallbacks = new ArrayList<ICallback<Object, Tuple<String, SimpleNode>>>(); /** * Convenience method for parse(s.toCharArray()) * @param s the string to be parsed * @return a Module node with the structure found */ public static SimpleNode parse(String s, String moduleName) { return parse(s.toCharArray(), moduleName); } /** * This method will parse the char array passed and will build a structure with the contents of the file. * @param cs the char array to be parsed * @return a Module node with the structure found */ public static SimpleNode parse(char[] cs, String moduleName) { return parse(cs, moduleName, cs.length); } public static SimpleNode parse(char[] cs, String moduleName, int len) { FastDefinitionsParser parser = new FastDefinitionsParser(cs, len); try { parser.extractBody(); } catch (SyntaxErrorException e) { throw new RuntimeException(e); } catch (StackOverflowError e) { RuntimeException runtimeException = new RuntimeException(e); Log.log("Error parsing: " + moduleName + "\nContents:\n" + new String(cs, 0, len > 1000 ? 1000 : len), runtimeException); //report at most 1000 chars... throw runtimeException; } List<stmtType> body = parser.body; Module ret = new Module(body.toArray(new stmtType[body.size()])); if (parseCallbacks.size() > 0) { Tuple<String, SimpleNode> arg = new Tuple<String, SimpleNode>(moduleName, ret); for (ICallback<Object, Tuple<String, SimpleNode>> c : parseCallbacks) { c.call(arg); } } return ret; } public static SimpleNode parse(String s) { return parse(s.toCharArray(), null); } }