package org.yinwang.pysonar;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.yinwang.pysonar.ast.*;
import org.yinwang.pysonar.types.FunType;
import org.yinwang.pysonar.types.ModuleType;
import org.yinwang.pysonar.types.Type;
import org.yinwang.pysonar.types.UnionType;
import java.io.File;
import java.util.*;
import java.util.Map.Entry;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* Indexes a set of Python files and builds a code graph. <p>
* This class is not thread-safe.
*/
public class Indexer {
public static Indexer idx;
@NotNull
public Scope moduleTable = new Scope(null, Scope.ScopeType.GLOBAL);
@NotNull
public Scope globaltable = new Scope(null, Scope.ScopeType.GLOBAL);
@NotNull
public Map<String, List<Binding>> allBindings = new HashMap<>();
@NotNull
private Map<Ref, List<Binding>> references = new HashMap<>();
@NotNull
public Map<String, List<Diagnostic>> semanticErrors = new HashMap<>();
@NotNull
public Map<String, List<Diagnostic>> parseErrors = new HashMap<>();
@Nullable
public String cwd = null;
public int nCalled = 0;
public boolean multilineFunType = false;
@NotNull
public List<String> path = new ArrayList<>();
@NotNull
private Set<FunType> uncalled = new HashSet<>();
@NotNull
private Set<Object> callStack = new HashSet<>();
@NotNull
private Set<Object> importStack = new HashSet<>();
private int threadCounter = 0;
public int newThread() {
threadCounter++;
return threadCounter;
}
private AstCache astCache;
public String cacheDir;
@NotNull
public Set<String> failedModules = new HashSet<>();
@NotNull
public Set<String> failedToParse = new HashSet<>();
@NotNull
public Stats stats = new Stats();
/**
* Manages the built-in modules -- that is, modules from the standard Python
* library that are implemented in C and consequently have no Python source.
*/
public Builtins builtins;
public int nLoadedFiles = 0;
private Logger logger;
private FancyProgress loadingProgress = null;
public Indexer() {
stats.putInt("startTime", System.currentTimeMillis());
logger = Logger.getLogger(Indexer.class.getCanonicalName());
idx = this;
builtins = new Builtins();
builtins.init();
addPythonPath();
createCacheDir();
getAstCache();
}
public void setCWD(String cd) {
if (cd != null) {
cwd = Util.unifyPath(cd);
}
}
public void addPaths(@NotNull List<String> p) {
for (String s : p) {
addPath(s);
}
}
public void addPath(String p) {
path.add(Util.unifyPath(p));
}
public void setPath(@NotNull List<String> path) {
this.path = new ArrayList<>(path.size());
addPaths(path);
}
private void addPythonPath() {
String path = System.getenv("PYTHONPATH");
if (path != null) {
String[] segments = path.split(":");
for (String p : segments) {
addPath(p);
}
}
}
/**
* Returns the module search path. Put cwd on top.
*/
@NotNull
public List<String> getLoadPath() {
List<String> loadPath = new ArrayList<>();
if (cwd != null) {
loadPath.add(cwd);
}
loadPath.addAll(path);
return loadPath;
}
public boolean inStack(Object f) {
return callStack.contains(f);
}
public void pushStack(Object f) {
callStack.add(f);
}
public void popStack(Object f) {
callStack.remove(f);
}
public boolean inImportStack(Object f) {
return importStack.contains(f);
}
public void pushImportStack(Object f) {
importStack.add(f);
}
public void popImportStack(Object f) {
importStack.remove(f);
}
@NotNull
public Map<String, List<Binding>> getAllBindings() {
return allBindings;
}
@Nullable
ModuleType getCachedModule(String file) {
Type t = moduleTable.lookupType(file);
if (t == null) {
return null;
} else if (t.isUnionType()) {
for (Type tt : t.asUnionType().getTypes()) {
if (tt.isModuleType()) {
return (ModuleType)tt;
}
}
return null;
} else if (t.isModuleType()){
return (ModuleType)t;
} else {
return null;
}
}
/**
* Returns (loading/resolving if necessary) the module for a given source path.
* @param file absolute file path
*/
@Nullable
public ModuleType getModuleForFile(String file) {
if (failedModules.contains(file)) {
return null;
}
ModuleType m = getCachedModule(file);
if (m != null) {
return m;
}
return loadFile(file);
}
/**
* Returns the list, possibly empty but never {@code null}, of
* errors and warnings generated in the file.
*/
public List<Diagnostic> getDiagnosticsForFile(String file) {
List<Diagnostic> errs = semanticErrors.get(file);
if (errs != null) {
return errs;
}
return new ArrayList<>();
}
/**
* Add a reference to binding {@code b} at AST node {@code node}.
* @param node a node referring to a name binding. Typically a
* {@link org.yinwang.pysonar.ast.Name}, {@link org.yinwang.pysonar.ast.Str} or {@link org.yinwang.pysonar.ast.Url}.
*/
public void putRef(@Nullable Node node, @Nullable Binding b) {
if (node == null || node instanceof Url || b == null) {
return;
}
Ref ref = new Ref(node);
List<Binding> bindings = references.get(ref);
if (bindings == null) {
// The indexer is heavily memory-constrained, so we need small overhead.
// Empirically using a capacity-1 ArrayList for the binding set
// uses about 1/2 the memory of a LinkedList, and 1/4 the memory
// of a default HashSet.
bindings = new ArrayList<>(1);
references.put(ref, bindings);
}
if (!bindings.contains(b)) {
bindings.add(b);
}
b.addRef(ref);
}
@NotNull
public Map<Ref, List<Binding>> getReferences() {
return references;
}
public void putProblem(@NotNull Node loc, String msg) {
String file = loc.getFile();
if (file != null) {
addFileErr(file, loc.start, loc.end, msg);
}
}
// for situations without a Node
public void putProblem(@Nullable String file, int begin, int end, String msg) {
if (file != null) {
addFileErr(file, begin, end, msg);
}
}
void addFileErr(String file, int begin, int end, String msg) {
Diagnostic d = new Diagnostic(file, Diagnostic.Category.ERROR, begin, end, msg);
getFileErrs(file, semanticErrors).add(d);
}
List<Diagnostic> getParseErrs(String file) {
return getFileErrs(file, parseErrors);
}
List<Diagnostic> getFileErrs(String file, @NotNull Map<String, List<Diagnostic>> map) {
List<Diagnostic> msgs = map.get(file);
if (msgs == null) {
msgs = new ArrayList<>();
map.put(file, msgs);
}
return msgs;
}
@Nullable
public ModuleType loadString(String path, String contents) {
ModuleType module = getCachedModule(path);
if (module != null) {
finer("\nusing cached module " + path + " [succeeded]");
return module;
}
return parseAndResolve(path, contents);
}
@Nullable
public ModuleType loadFile(String path) {
// Util.msg("loading: " + path);
File f = new File(Util.unifyPath(path));
if (!f.canRead()) {
finer("\nfile not not found or cannot be read: " + path);
return null;
}
ModuleType module = getCachedModule(path);
if (module != null) {
finer("\nusing cached module " + path + " [succeeded]");
return module;
}
// detect circular import
if (Indexer.idx.inImportStack(path)) {
return null;
}
// set new CWD and save the old one on stack
String oldcwd = cwd;
setCWD(f.getParent());
Indexer.idx.pushImportStack(path);
ModuleType mod = parseAndResolve(path);
// restore old CWD
setCWD(oldcwd);
return mod;
}
private boolean isInLoadPath(File dir) {
for (String s : getLoadPath()) {
if (new File(s).equals(dir)) {
return true;
}
}
return false;
}
@Nullable
private ModuleType parseAndResolve(String file) {
finer("Indexing: " + file);
// progress.tick();
loadingProgress.tick();
return parseAndResolve(file, null);
}
/**
* Parse a file or string and return its module parse tree.
* @param file the filename
* @param contents optional file contents. If {@code null}, loads the
* file contents from disk.
*/
@Nullable
private ModuleType parseAndResolve(String file, @Nullable String contents) {
// Avoid infinite recursion if any caller forgets this check. (Has happened.)
ModuleType cached = (ModuleType)moduleTable.lookupType(file);
if (cached != null) {
return cached;
}
try {
Module ast;
if (contents != null) {
ast = getAstForFile(file, contents);
} else {
ast = getAstForFile(file);
}
if (ast == null) {
failedModules.add(file);
return null;
} else {
finer("resolving: " + file);
ModuleType mod = (ModuleType)ast.resolve(moduleTable, 0);
finer("[success]");
nLoadedFiles++;
return mod;
}
} catch (OutOfMemoryError e) {
if (astCache != null) {
astCache.clear();
}
System.gc();
return null;
}
}
private void createCacheDir() {
cacheDir = Util.makePathString(Util.getSystemTempDir(), "pysonar2", "ast_cache");
File f = new File(cacheDir);
Util.msg("AST cache is at: " + cacheDir);
if (!f.exists()) {
if (!f.mkdirs()) {
Util.die("Failed to create tmp directory: " + cacheDir +
".Please check permissions");
}
}
}
private AstCache getAstCache() {
if (astCache == null) {
astCache = AstCache.get();
}
return astCache;
}
/**
* Returns the syntax tree for {@code file}. <p>
*/
@Nullable
public Module getAstForFile(String file) {
return getAstCache().getAST(file);
}
/**
* Returns the syntax tree for {@code file}. <p>
*/
@Nullable
public Module getAstForFile(String file, String contents) {
return getAstCache().getAST(file, contents);
}
@Nullable
public ModuleType getBuiltinModule(@NotNull String qname) {
return builtins.get(qname);
}
@Nullable
public String makeQname(@NotNull List<Name> names) {
if (names.isEmpty()) {
return "";
}
String ret = "";
for (int i = 0; i < names.size() - 1; i++) {
ret += names.get(i).id + ".";
}
ret += names.get(names.size() - 1).id;
return ret;
}
/**
* Find the path that contains modname. Used to find the starting point of locating a qname.
* @param headName first module name segment
*/
public String locateModule(String headName) {
List<String> loadPath = getLoadPath();
for (String p : loadPath) {
File startDir = new File(p, headName);
File initFile = new File(Util.joinPath(startDir, "__init__.py").getPath());
if (initFile.exists()) {
return p;
}
File startFile = new File(startDir + ".py");
if (startFile.exists()) {
return p;
}
}
return null;
}
@Nullable
public ModuleType loadModule(@NotNull List<Name> name, @NotNull Scope scope, int tag) {
if (name.isEmpty()) return null;
String qname = makeQname(name);
ModuleType mt = getBuiltinModule(qname);
if (mt != null) {
scope.update(name.get(0).id,
new Url(Builtins.LIBRARY_URL + mt.getTable().getPath() + ".html"),
mt, Binding.Kind.SCOPE);
return mt;
}
// If there are more than one segment
// load the packages first
ModuleType prev = null;
String startPath = locateModule(name.get(0).id);
if (startPath == null) {
return null;
}
File path = new File(startPath);
for (int i = 0; i < name.size(); i++) {
path = new File(path, name.get(i).id);
File initFile = new File(Util.joinPath(path, "__init__.py").getPath());
if (initFile.exists()) {
ModuleType mod = loadFile(initFile.getPath());
if (mod == null) return null;
if (prev != null) {
Binding b = prev.getTable().put(name.get(i).id, name.get(i), mod, Binding.Kind.VARIABLE, tag);
Indexer.idx.putRef(name.get(i), b);
} else {
Binding b = scope.put(name.get(i).id, name.get(i), mod, Binding.Kind.VARIABLE, tag);
Indexer.idx.putRef(name.get(i), b);
}
prev = mod;
} else if (i == name.size() - 1) {
File startFile = new File(path + ".py");
if (startFile.exists()) {
ModuleType mod = loadFile(startFile.getPath());
if (mod == null) return null;
if (prev != null) {
Binding b = prev.getTable().put(name.get(i).id, name.get(i), mod, Binding.Kind.VARIABLE, tag);
Indexer.idx.putRef(name.get(i), b);
} else {
Binding b = scope.put(name.get(i).id, name.get(i), mod, Binding.Kind.VARIABLE, tag);
Indexer.idx.putRef(name.get(i), b);
}
prev = mod;
} else {
return null;
}
}
}
return prev;
}
/**
* Load all Python source files recursively if the given fullname is a
* directory; otherwise just load a file. Looks at file extension to
* determine whether to load a given file.
*/
public void loadFileRecursive(String fullname) {
int count = countFileRecursive(fullname);
if (loadingProgress == null) {
loadingProgress = new FancyProgress(count, 50);
}
File file_or_dir = new File(fullname);
if (file_or_dir.isDirectory()) {
for (File file : file_or_dir.listFiles()) {
loadFileRecursive(file.getPath());
}
} else {
if (file_or_dir.getPath().endsWith(".py")) {
loadFile(file_or_dir.getPath());
}
}
}
// count number of .py files
public int countFileRecursive(String fullname) {
File file_or_dir = new File(fullname);
int sum = 0;
if (file_or_dir.isDirectory()) {
for (File file : file_or_dir.listFiles()) {
sum += countFileRecursive(file.getPath());
}
} else {
if (file_or_dir.getPath().endsWith(".py")) {
sum += 1;
}
}
return sum;
}
public void finish() {
// progress.end();
Util.msg("\nFinished loading files. " + nCalled + " functions were called.");
Util.msg("Analyzing uncalled functions");
applyUncalled();
// mark unused variables
for (List<Binding> bindings : allBindings.values()) {
for (Binding b : bindings) {
if (!b.getType().isClassType() &&
!b.getType().isFuncType() &&
!b.getType().isModuleType()
&& b.getRefs().isEmpty()) {
for (Def def : b.getDefs()) {
Indexer.idx.putProblem(def.getNode(), "Unused variable: " + def.getName());
}
}
}
}
for (Entry<Ref, List<Binding>> ent : references.entrySet()) {
convertCallToNew(ent.getKey(), ent.getValue());
}
Util.msg(getAnalysisSummary());
}
public void close() {
astCache.close();
}
private void convertCallToNew(@NotNull Ref ref, @NotNull List<Binding> bindings) {
if (ref.isRef()) {
return;
}
if (bindings.isEmpty()) {
return;
}
Binding nb = bindings.get(0);
Type t = nb.getType();
if (t.isUnionType()) {
t = t.asUnionType().firstUseful();
if (t == null) {
return;
}
}
if (!t.isUnknownType() && !t.isFuncType()) {
ref.markAsNew();
}
}
public void addUncalled(@NotNull FunType cl) {
if (!cl.func.called) {
uncalled.add(cl);
}
}
public void removeUncalled(FunType f) {
uncalled.remove(f);
}
public void applyUncalled() {
FancyProgress progress = new FancyProgress(uncalled.size(), 50);
while (!uncalled.isEmpty()) {
List<FunType> uncalledDup = new ArrayList<>(uncalled);
for (FunType cl : uncalledDup) {
progress.tick();
Call.apply(cl, null, null, null, null, null, newThread());
}
}
}
@NotNull
public String getAnalysisSummary() {
StringBuilder sb = new StringBuilder();
sb.append("\n" + Util.banner("analysis summary"));
String duration = Util.timeString(System.currentTimeMillis() - stats.getInt("startTime"));
sb.append("\n- total time: " + duration);
sb.append("\n- modules loaded: " + nLoadedFiles);
sb.append("\n- unresolved modules: " + failedModules.size());
sb.append("\n- semantic problems: " + semanticErrors.size());
sb.append("\n- failed to parse: " + failedToParse.size());
// calculate number of defs, refs, xrefs
int nDef = 0, nXRef = 0;
for (List<Binding> bindings : getAllBindings().values()) {
for (Binding b : bindings) {
nDef += b.getDefs().size();
nXRef += b.getRefs().size();
}
}
sb.append("\n- number of definitions: " + nDef);
sb.append("\n- number of cross references: " + nXRef);
sb.append("\n- number of references: " + getReferences().size());
long resolved = stats.getInt("resolved");
long unresolved = stats.getInt("unresolved");
sb.append("\n- resolved names: " + resolved);
sb.append("\n- unresolved names: " + unresolved);
sb.append("\n- name resolve rate: " + Util.percent(resolved, resolved + unresolved));
sb.append("\n" + Util.printGCStats());
return sb.toString();
}
public AstCache.DocstringInfo getModuleDocstringInfoForFile(String file) {
return getAstCache().getModuleDocstringInfo(file);
}
@NotNull
public List<String> getLoadedFiles() {
List<String> files = new ArrayList<>();
for (String file : moduleTable.keySet()) {
if (file.endsWith(".py")) {
files.add(file);
}
}
return files;
}
@Nullable
private Binding findBinding(@NotNull Binding b) {
List<Binding> existing = allBindings.get(b.getQname());
if (existing != null) {
for (Binding eb : existing) {
if (eb.equals(b)) {
return eb;
}
}
}
return null;
}
@NotNull
public void registerBinding(@NotNull Binding b) {
String qname = b.getQname();
List<Binding> existing = allBindings.get(qname);
if (existing != null) {
existing.add(b);
} else {
List<Binding> lb = new ArrayList<>();
lb.add(b);
allBindings.put(qname, lb);
}
}
public void log(Level level, String msg) {
if (logger.isLoggable(level)) {
logger.log(level, msg);
}
}
public void severe(String msg) {
log(Level.SEVERE, msg);
}
public void warn(String msg) {
log(Level.WARNING, msg);
}
public void info(String msg) {
log(Level.INFO, msg);
}
public void fine(String msg) {
log(Level.FINE, msg);
}
public void finer(String msg) {
log(Level.FINER, msg);
}
@NotNull
@Override
public String toString() {
return "<Indexer:locs=" + references.size() + ":probs="
+ semanticErrors.size() + ":files=" + nLoadedFiles + ">";
}
}