/** * Copyright (c) 2005-2013 by Appcelerator, Inc. All Rights Reserved. * Licensed under the terms of the Eclipse Public License (EPL). * Please see the license.txt included with this distribution for details. * Any modifications to this file must keep this entire header intact. */ package org.python.pydev.parser.fastparser; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.eclipse.jface.text.IDocument; import org.python.pydev.core.IPythonPartitions; import org.python.pydev.core.docutils.ParsingUtils; import org.python.pydev.core.docutils.PySelection; import org.python.pydev.parser.jython.ast.ClassDef; import org.python.pydev.parser.jython.ast.FunctionDef; import org.python.pydev.parser.jython.ast.Name; import org.python.pydev.parser.jython.ast.NameTok; import org.python.pydev.parser.jython.ast.argumentsType; import org.python.pydev.parser.jython.ast.decoratorsType; import org.python.pydev.parser.jython.ast.exprType; import org.python.pydev.parser.jython.ast.stmtType; import org.python.pydev.parser.visitors.NodeUtils; import org.python.pydev.shared_core.string.DocIterator; import org.python.pydev.shared_core.structure.FastStack; /** * This class is able to obtain the classes and function definitions as a tree structure (only filled with * classes and methods). * * @author Fabio */ public final class FastParser { private static final exprType[] EMTPY_EXPR_TYPE = new exprType[0]; private static final decoratorsType[] EMTPY_DECORATORS_TYPE = new decoratorsType[0]; private static final stmtType[] EMTPY_STMT_TYPE = new stmtType[0]; //spaces* 'def' space+ identifier private static final Pattern FUNCTION_PATTERN = Pattern.compile("(\\s+|^)(def\\s+)(\\w*)"); private static final Pattern FUNCTION_PATTERN_CYTHON = Pattern.compile("(\\s+|^)(cdef\\s+)(.*)"); private static final Pattern FUNCTION_PATTERN_CYTHON2 = Pattern.compile("(\\s+|^)(ctypedef\\s+)(.*)"); //spaces* 'class' space+ identifier private static final Pattern CLASS_PATTERN = Pattern.compile("(\\s+|^)(class\\s+)(\\w*)"); //constant with the prefix position in the pattern (class or def) @SuppressWarnings("unused") private static final int PREFIX_GROUP = 2; //constant with the name position in the pattern private static final int NAME_GROUP = 3; /** * doc the document to be parsed */ private IDocument doc; /** * currentLine the line where the parsing should begin (inclusive -- starts at 0) */ private int currentLine; /** * forward determines whether we should be iterating forward or backward */ private boolean forward; /** * stopOnFirstMatch if true, will return right after getting the 1st match */ private boolean stopOnFirstMatch; /** * If true, we'll stop when we're able to find the match to the globally accessible way for the current line. */ private boolean findGloballyAccessiblePath; private int firstCharCol = -1; private boolean cythonParse = false; /** * Use the parse* methods to access what you need to create the parse. */ private FastParser(IDocument doc, int currentLine, boolean forward, boolean stopOnFirstMatch) { this.doc = doc; this.currentLine = currentLine; this.forward = forward; this.stopOnFirstMatch = stopOnFirstMatch; } /** * @param doc the document to be parsed * @return a list of statements with the classes and functions for this document */ public static List<stmtType> parseClassesAndFunctions(IDocument doc) { return new FastParser(doc, 0, true, false).parse(); } /** * @param doc the document to be parsed * @return a list of statements with the classes and functions for this document */ public static List<stmtType> parseCython(IDocument doc) { FastParser fastParser = new FastParser(doc, 0, true, false); fastParser.cythonParse = true; return fastParser.parse(); } /** * Note: Used from jython scripts. * * @param doc the document to be parsed * @param currentLine the line where the parsing should begin (inclusive -- starts at 0) * @return the path to the current statement (where the current is the last element and the top-level is the 1st). * If it's empty that means that we're already in the top-level. */ public static List<stmtType> parseToKnowGloballyAccessiblePath(IDocument doc, int currentLine) { FastParser parser = new FastParser(doc, currentLine, false, false); parser.findGloballyAccessiblePath = true; return parser.parse(); } /** * @param doc the document to be parsed * @param currentLine the line where the parsing should begin (inclusive -- starts at 0) * @param forward determines whether we should be iterating forward or backward * @param stopOnFirstMatch if true, will return right after getting the 1st match * @return a list of statements with the classes and functions for this document */ private static List<stmtType> parseClassesAndFunctions(IDocument doc, int currentLine, boolean forward, boolean stopOnFirstMatch) { return new FastParser(doc, currentLine, forward, stopOnFirstMatch).parse(); } private List<stmtType> parse() { List<stmtType> body = new ArrayList<stmtType>(); FastStack<stmtType> stack = new FastStack<>(5); Map<Integer, List<stmtType>> objectIdToBody = new HashMap<>(); PySelection ps = new PySelection(doc); DocIterator it = new DocIterator(forward, ps, currentLine, false); Matcher functionMatcher = FUNCTION_PATTERN.matcher(""); List<Matcher> cythonMatchers = null; if (this.cythonParse) { cythonMatchers = new ArrayList<Matcher>(); cythonMatchers.add(FUNCTION_PATTERN_CYTHON.matcher("")); cythonMatchers.add(FUNCTION_PATTERN_CYTHON2.matcher("")); } Matcher classMatcher = CLASS_PATTERN.matcher(""); while (it.hasNext()) { Matcher functionFound = null; String line = it.next(); //we don't care about empty lines if (line.trim().length() == 0) { continue; } if (findGloballyAccessiblePath) { int currentFirstCharCol = PySelection.getFirstCharPosition(line); if (firstCharCol == -1) { firstCharCol = currentFirstCharCol; } else { //We must validate if this is a line we can accept based on the initial indentation //E.g.: // //def m1(): // def m2(): // pass // pass <- If we're here, m2() should not be considered when getting the path // to the global scope. if (firstCharCol <= currentFirstCharCol) { continue; // don't check this line as it's not valid in the current context. } } } functionMatcher.reset(line); if (functionMatcher.find()) { functionFound = functionMatcher; } else if (cythonMatchers != null) { for (Matcher matcher : cythonMatchers) { matcher.reset(line); if (matcher.find()) { functionFound = matcher; break; } } } if (functionFound != null) { int lastReturnedLine = it.getLastReturnedLine(); NameTok nameTok = createNameTok(functionFound, lastReturnedLine, NameTok.FunctionName, ps); if (nameTok != null) { FunctionDef functionDef = createFunctionDef(lastReturnedLine, nameTok, PySelection.getFirstCharPosition(line)); if (!addStatement(body, stack, objectIdToBody, functionDef)) { return body; } if (stopOnFirstMatch) { return body; } } continue; } classMatcher.reset(line); if (classMatcher.find()) { int lastReturnedLine = it.getLastReturnedLine(); NameTok nameTok = createNameTok(classMatcher, lastReturnedLine, NameTok.ClassName, ps); if (nameTok != null) { ClassDef classDef = createClassDef(lastReturnedLine, nameTok, PySelection.getFirstCharPosition(line)); if (!addStatement(body, stack, objectIdToBody, classDef)) { return body; } if (stopOnFirstMatch) { return body; } } continue; } } if (cythonParse) { for (stmtType t : body) { buildBody(t, objectIdToBody); } } return body; } private void buildBody(stmtType t, Map<Integer, List<stmtType>> objectIdToBody) { int id = System.identityHashCode(t); List<stmtType> list = objectIdToBody.get(id); if (list != null) { NodeUtils.setBody(t, list.toArray(new stmtType[0])); for (stmtType stmtType : list) { buildBody(stmtType, objectIdToBody); } } } /** * @param objectIdToBody * @return whether we should continue iterating. */ private boolean addStatement(List<stmtType> body, FastStack<stmtType> stack, Map<Integer, List<stmtType>> objectIdToBody, stmtType stmt) { if (cythonParse) { if (stack.empty()) { stack.push(stmt); body.add(stmt); // Globals added to body } else { stmtType prev = stack.peek(); while (prev.beginColumn >= stmt.beginColumn) { stack.pop(); if (stack.empty()) { stack.push(stmt); body.add(stmt); // Globals added to body return true; } prev = stack.peek(); } //If it got here we are inside some context... stack.push(stmt); int id = System.identityHashCode(prev); List<stmtType> prevBody = objectIdToBody.get(id); if (prevBody == null) { prevBody = new ArrayList<>(); objectIdToBody.put(id, prevBody); } //Inside some other: add to its context (and not to global). prevBody.add(stmt); } return true; } else if (!findGloballyAccessiblePath) { body.add(stmt); return true; } else { if (body.size() > 0) { if (stmt.beginColumn == body.get(0).beginColumn) { //don't add one that's in the same column of the last found (we need only the path to the parent, not siblings) return true; } } body.add(0, stmt); if (stmt.beginColumn == 1) { //gotten to root return false; } return true; } } private FunctionDef createFunctionDef(int lastReturnedLine, NameTok nameTok, int matchedCol) { argumentsType args; if (cythonParse) { Name name = new Name("self", Name.Store, false); exprType[] selfExprType = new exprType[] { name }; name.beginLine = lastReturnedLine + 1; name.beginColumn = matchedCol + 1 + 4 + 1 + nameTok.id.length(); // 4 for 'def ' and 1 for '(' args = new argumentsType(selfExprType, null, null, EMTPY_EXPR_TYPE, null, null, null, null, null, null); } else { args = new argumentsType(EMTPY_EXPR_TYPE, null, null, EMTPY_EXPR_TYPE, null, null, null, null, null, null); } FunctionDef functionDef = new FunctionDef(nameTok, args, EMTPY_STMT_TYPE, EMTPY_DECORATORS_TYPE, null, false); functionDef.beginLine = lastReturnedLine + 1; functionDef.beginColumn = matchedCol + 1; return functionDef; } private ClassDef createClassDef(int lastReturnedLine, NameTok nameTok, int matchedCol) { ClassDef classDef = new ClassDef(nameTok, EMTPY_EXPR_TYPE, EMTPY_STMT_TYPE, null, null, null, null); classDef.beginLine = lastReturnedLine + 1; classDef.beginColumn = matchedCol + 1; return classDef; } /** * @param doc the document where the search should take place * @param currentLine the line where the parsing should begin (inclusive) * @param forward determines if the search should be forward or backward in the document considering the * current position. * @return the first class or function definition found on the given document */ public static stmtType firstClassOrFunction(IDocument doc, int currentLine, boolean forward, boolean isCython) { boolean stopOnFirstMatch = true; FastParser fastParser = new FastParser(doc, currentLine, forward, stopOnFirstMatch); fastParser.cythonParse = isCython; List<stmtType> found = fastParser.parse(); if (found.size() > 0) { return found.get(0); } return null; } /** * @param matcher this is the class that just matched the class or function * @param lastReturnedLine the line it has done the match * @param type the type of the name token (@see NameTok constants) * @param ps the pyselection that has the document * @return null if the location is not a valid location for a function or class or a NameTok to * be used with the ClassDef / FunctionDef */ private NameTok createNameTok(Matcher matcher, int lastReturnedLine, int type, PySelection ps) { int col = matcher.start(NAME_GROUP); int absoluteCursorOffset = ps.getAbsoluteCursorOffset(lastReturnedLine, col); if (!IPythonPartitions.PY_DEFAULT.equals(ParsingUtils.getContentType(ps.getDoc(), absoluteCursorOffset))) { return null; } NameTok nameTok = new NameTok(matcher.group(NAME_GROUP), type); nameTok.beginLine = lastReturnedLine + 1; nameTok.beginColumn = col + 1; return nameTok; } }