package edu.stanford.nlp.misc; import edu.stanford.nlp.util.logging.Redwood; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; import edu.stanford.nlp.util.Generics; /** * Parses the output of DependencyExtractor into a tree, and constructs * transitive dependency closures of any set of classes. * * @author Jamie Nicolson (nicolson@cs.stanford.edu) */ public class DependencyAnalyzer { /** A logger for this class */ private static Redwood.RedwoodChannels log = Redwood.channels(DependencyAnalyzer.class); /** Make true to record the dependencies as they are calculated. */ private static final boolean VERBOSE = false; /** * Represents a package, class, method, or field in the dependency tree. */ static class Identifier implements Comparable<Identifier> { public String name; /** * The set of Identifiers that are directly dependent on this one. */ public Set<Identifier> ingoingDependencies = Generics.newHashSet(); /** * The set of Identifiers upon which this Identifier is directly * dependent. */ public Set<Identifier> outgoingDependencies = Generics.newHashSet(); /** * True if this Identifier represents a class. It might be nicer * to use an enumerated type for all the types of Identifiers, but * for now all we care about is whether it is a class. */ boolean isClass = false; public Identifier(String name) { this.name = name; } /** * Two identifiers are equal() if and only if their fully-qualified * names are the same. */ @Override public boolean equals(Object obj) { return (obj != null) && (obj instanceof Identifier) && ((Identifier) obj).name.equals(name); } @Override public int hashCode() { return name.hashCode(); } public int compareTo(Identifier o) { return name.compareTo(o.name); } @Override public String toString() { return name; } } // end static class Identifier private Map<String,Identifier> identifiers = Generics.newHashMap(); /** * Adds the starting classes to depQueue and closure. * Allows * as a wildcard for class names. */ void addStartingClasses(LinkedList<Identifier> depQueue, Set<Identifier> closure, List<String> startingClasses) { // build patterns out of the given class names // escape . and $, turn * into .* for a regular expression Pattern[] startingPatterns = new Pattern[startingClasses.size()]; boolean[] matched = new boolean[startingClasses.size()]; for (int i = 0; i < startingClasses.size(); ++i) { String startingClass = startingClasses.get(i); startingClass = startingClass.replaceAll("\\.", "\\\\\\."); startingClass = startingClass.replaceAll("\\$", "\\\\\\$"); startingClass = startingClass.replaceAll("\\*", ".*"); startingPatterns[i] = Pattern.compile(startingClass); matched[i] = false; } // must iterate over every identifier, since we don't know which // ones will match any given expression for (Identifier id : identifiers.values()) { if (!id.isClass) continue; for (int i = 0; i < startingClasses.size(); ++i) { if (startingPatterns[i].matcher(id.name).matches()) { depQueue.addLast(id); closure.add(id); matched[i] = true; if (VERBOSE) { log.info("Starting class: " + id.name); } break; } } } for (int i = 0; i < startingClasses.size(); ++i) { if (!matched[i]) { log.info("Warning: pattern " + startingClasses.get(i) + " matched nothing"); } } } /** * Constructs the transitive closure of outgoing dependencies starting * from the given classes. That is, the returned collection is all the * classes that might be needed in order to use the given classes. * If none of the given classes are found, an empty collection is returned. * * @param startingClassNames A Collection of Strings, each the * fully-qualified name of a class. These are the starting elements of * the transitive closure. * @return A collection of Identifiers, each representing a class, * that are the transitive closure of the starting classes. */ public Collection<Identifier> transitiveClosure(List<String> startingClassNames) { Set<Identifier> closure = Generics.newHashSet(); // The depQueue is the queue of items in the closure whose dependencies // have yet to be scanned. LinkedList<Identifier> depQueue = new LinkedList<>(); // add all the starting classes to the closure and the depQueue addStartingClasses(depQueue, closure, startingClassNames); // Now work through the dependency queue, adding dependencies until // there are none left. while (!depQueue.isEmpty()) { Identifier id = depQueue.removeFirst(); for (Identifier outgoingDependency : id.outgoingDependencies) { if (outgoingDependency.isClass && !closure.contains(outgoingDependency)) { if (VERBOSE) log.info("Added " + outgoingDependency + " due to " + id); depQueue.addLast(outgoingDependency); closure.add(outgoingDependency); } } } return closure; } // // These regular expressions are used to parse the raw output // of DependencyExtractor. // public static final Pattern pkgLine = Pattern.compile("(\\S*)(?:\\s+\\*)?\\s*"); public static final Pattern classLine = Pattern.compile(" ([^<]\\S*)(?:\\s+\\*)?\\s*"); public static final Pattern memberLine = Pattern.compile(" ([a-zA-Z_\\$]{1}.*)"); public static final Pattern inDepLine = Pattern.compile("\\s*<-- (.*)"); public static final Pattern outDepLine = Pattern.compile("\\s*--> (.*)"); public static final Pattern bothDepLine = Pattern.compile("\\s*<-> (.*)"); /** * Takes a dependency closure generated by DependencyExtractor, and prints out the class names of exactly * those classes in the closure that are in an <code>edu.stanford.nlp</code>-prepended package. * * @param args takes one argument: the name of a file that contains the output of a run of * DependencyExtractor */ public static void main(String[] args) throws Exception { DependencyAnalyzer da = new DependencyAnalyzer(args[0]); ArrayList<String> startingClasses = new ArrayList<>(args.length - 1); for (int i = 1; i < args.length; ++i) { startingClasses.add(args[i]); } Collection<Identifier> closure = da.transitiveClosure(startingClasses); ArrayList<Identifier> sortedClosure = new ArrayList<>(closure); Collections.sort(sortedClosure); Set<String> alreadyOutput = Generics.newHashSet(); for (Identifier identifier : sortedClosure) { String name = identifier.name; if (name.startsWith("edu.stanford.nlp")) { name = name.replace('.', '/') + ".class"; // no need to output [] in the class names name = name.replaceAll("\\[\\]", ""); // filter by uniqueness in case there were array classes found if (alreadyOutput.contains(name)) continue; alreadyOutput.add(name); System.out.println(name); } } } public static String prependPackage(String pkgname, String classname) { if( pkgname.equals("") ) { return classname; } else { return pkgname + "." + classname; } } /** * Constructs a DependencyAnalyzer from the output of DependencyExtractor. * The data will be converted into a dependency tree. * * @param filename The path of a file containing the output of a run * of DependencyExtractor. */ public DependencyAnalyzer(String filename) throws IOException { BufferedReader input = new BufferedReader(new FileReader(filename)); String line; Identifier curPackage = null; Identifier curClass = null; while ((line = input.readLine()) != null) { Matcher matcher = pkgLine.matcher(line); String name; if (matcher.matches()) { name = matcher.group(1); curPackage = canonicalIdentifier(name); curClass = null; //log.info("Found package " + curPackage.name); } else { matcher = classLine.matcher(line); if (matcher.matches()) { name = prependPackage(curPackage.name, matcher.group(1)); curClass = canonicalIdentifier(name); curClass.isClass = true; //curPackage.classes.add(curClass); //log.info("Found class " + curClass.name); } else { matcher = memberLine.matcher(line); if (matcher.matches()) { name = curClass.name + "." + matcher.group(1); //log.info("Found member: " + name ); } else { matcher = inDepLine.matcher(line); if (matcher.matches()) { name = matcher.group(1); Identifier inDep = canonicalIdentifier(name); if (curClass != null) { curClass.ingoingDependencies.add(inDep); } //log.info("Found ingoing depedency: " + // name); } else { matcher = outDepLine.matcher(line); if (matcher.matches()) { name = matcher.group(1); Identifier outDep = canonicalIdentifier(name); if (curClass != null) { curClass.outgoingDependencies.add(outDep); } //log.info("Found outgoing dependency: " + // name); } else { matcher = bothDepLine.matcher(line); if (matcher.matches()) { name = matcher.group(1); Identifier dep = canonicalIdentifier(name); if (curClass != null) { curClass.ingoingDependencies.add(dep); curClass.outgoingDependencies.add(dep); } } else { log.info("Found unmatching line: " + line); } } } } } } } // After reading the dependencies, as a post-processing step we // connect all inner classes and outer classes with each other. for (String className : identifiers.keySet()) { Identifier classId = identifiers.get(className); if (!classId.isClass) { continue; } int baseIndex = className.indexOf("$"); if (baseIndex < 0) { continue; } String baseName = className.substring(0, baseIndex); Identifier baseId = identifiers.get(baseName); if (baseId == null) { continue; } baseId.ingoingDependencies.add(classId); baseId.outgoingDependencies.add(classId); classId.ingoingDependencies.add(baseId); classId.outgoingDependencies.add(baseId); } } /** * Returns the canonical Identifier with the given name. * * @param name The name of an Identifier. * @return The Identifier, which will have been newly created if it * did not already exist. */ private Identifier canonicalIdentifier(String name) { Identifier ident = identifiers.get(name); if (ident == null) { ident = new Identifier(name); identifiers.put(name, ident); } return ident; } }