package LBJ2;
import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileReader;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.io.Reader;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import LBJ2.IR.AST;
import LBJ2.IR.ClassifierAssignment;
import LBJ2.IR.ClassifierName;
import LBJ2.IR.CodedClassifier;
import LBJ2.IR.CompositeGenerator;
import LBJ2.IR.Conjunction;
import LBJ2.IR.Constant;
import LBJ2.IR.ConstraintDeclaration;
import LBJ2.IR.DeclarationList;
import LBJ2.IR.InferenceDeclaration;
import LBJ2.IR.InferenceInvocation;
import LBJ2.IR.LearningClassifierExpression;
import LBJ2.frontend.parser;
import LBJ2.frontend.Yylex;
import LBJ2.io.HexOutputStream;
import LBJ2.io.HexStringInputStream;
/**
* To be run after <code>SemanticAnalysis</code>, this pass determines which
* <code>CodeGenerator</code>s need to have their code generated and which
* classifiers need to be trained based on the revisions made to the LBJ
* source file.
*
* <p> A hard coded classifier, a constraint, or an inference named
* <code>foo</code> needs its code regenerated iff at least one of the
* following is true:
* <ul>
* <li> The file <code>foo.java</code> does not exist.
* <li>
* Using the comments at the top of <code>foo.java</code>, it is
* determined that the code specifying <code>foo</code> has been revised.
* </ul>
* If the comments at the top of <code>foo.java</code> do not exist, or if
* they don't have the expected form, the file will not be overwritten and an
* error will be generated.
*
* <p> All <code>CodeGenerator</code>s are also labeled as either "affected"
* (by a revision) or "unaffected". An <code>CodeGenerator</code> named
* <code>foo</code> is labeled "affected" iff at least one of the following
* is true:
* <ul>
* <li>
* <code>foo</code> is a hard coded classifier, a constraint, or an
* inference and either:
* <ul>
* <li> its code needed to be regenerated as described above or
* <li> it invokes another "affected" <code>CodeGenerator</code>.
* </ul>
* <li>
* <code>foo</code> is a learning classifier and at least one of its
* label or extractor classifiers is "affected".
* </ul>
*
* <p> A learning classifier named <code>foo</code> needs to have its code
* regenerated and retrained iff at least one of the following is true:
* <ul>
* <li> The file <code>foo.java</code> does not exist.
* <li>
* Using the comments at the top of <code>foo.java</code>, it is
* determined that the code specifying <code>foo</code> has been revised.
* <li> At least one of its label or extractor classifiers is "affected".
* </ul>
*
* @see LBJ2.SemanticAnalysis
* @author Nick Rizzolo
**/
public class RevisionAnalysis extends Pass
{
/** Constant representing the "unaffected" revision status. */
public static final Integer UNAFFECTED = new Integer(0);
/** Constant representing the "affected" revision status. */
public static final Integer AFFECTED = new Integer(1);
/** Constant representing the "revised" revision status. */
public static final Integer REVISED = new Integer(2);
/** The names of the three revision states. */
public static final String[] statusNames =
{ "unaffected", "affected", "revised" };
/**
* Keeps track of the names of classifiers whose revision status has been
* resolved.
**/
public static HashMap revisionStatus;
/**
* Set to <code>true</code> iff no code has changed since the compiler was
* last run.
**/
public static boolean noChanges;
/**
* Prints the contents of {@link #revisionStatus} to <code>STDOUT</code>.
**/
public static void printRevisionStatus() {
if (revisionStatus == null) {
System.out.println("No revision statuses.");
return;
}
for (Iterator I = revisionStatus.entrySet().iterator(); I.hasNext(); ) {
Map.Entry e = (Map.Entry) I.next();
String name = (String) e.getKey();
Integer status = (Integer) e.getValue();
System.out.println(name + ": " + statusToString(status));
Object classifierExpression =
SemanticAnalysis.representationTable.get(name);
if (classifierExpression instanceof LearningClassifierExpression) {
LearningClassifierExpression lce =
(LearningClassifierExpression) classifierExpression;
System.out.println(
" features: " + statusToString(lce.featuresStatus));
System.out.println(" pruning: " + statusToString(lce.pruneStatus));
System.out.println(
" learning: " + statusToString(lce.learningStatus));
System.out.println(
" only code generation: " + lce.onlyCodeGeneration);
}
}
}
/**
* Returns the name of a revision status, or <code>"no status"</code> if
* the status is <code>null</code>.
**/
public static String statusToString(Integer status) {
if (status == null) return "no status";
return statusNames[status.intValue()];
}
/**
* Read the second line from the specified classifier's generated code.
*
* @param name The name of the classifier.
* @param line The line number at which the classifier whose source we're
* reading is declared in its LBJ source file.
* @return The second line from the classifier's generated code without the
* opening comment marker (//), or <code>null</code> if the
* generated code doesn't exist or the file doesn't appear to be
* generated code.
**/
private static String readSecondLine(String name, int line) {
name += ".java";
if (Main.generatedSourceDirectory != null)
name = Main.generatedSourceDirectory + File.separator + name;
File javaSource = new File(name);
if (!javaSource.exists()) return null;
BufferedReader in = null;
try { in = new BufferedReader(new FileReader(javaSource)); }
catch (Exception e) {
System.err.println("Can't open '" + name + "' for input: " + e);
System.exit(1);
}
String line1 = "";
String line2 = "";
try {
line1 = in.readLine();
line2 = in.readLine();
}
catch (Exception e) {
System.err.println("Can't read from '" + name + "': " + e);
System.exit(1);
}
try { in.close(); }
catch (Exception e) {
System.err.println("Can't close file '" + name + "': " + e);
System.exit(1);
}
if (line1 == null || line2 == null || !line2.startsWith("// ")
|| !TranslateToJava.disclaimer.equals(line1)) {
reportError(line,
"The file '" + name + "' does not appear to have been generated by "
+ "LBJ2, but LBJ2 needs to overwrite it. Either remove the file, "
+ "or change the name of the classifier in '" + Main.sourceFilename
+ "'.");
return null;
}
return line2.substring(3);
}
/**
* This method reads the comments at the top of the file containing the
* code corresponding to the specified code generating node to determine if
* the LBJ source describing that code generator has been modified since
* the LBJ2 compiler was last executed.
*
* @param node The code generating node.
* @param convert Whether or not the code is converted to hexadecimal
* compressed format.
* @return <code>true</code> iff the associated Java file did not exist or
* it contained the expected comments and those comments indicate
* that a revision has taken place.
**/
private static boolean codeRevision(CodeGenerator node, boolean convert) {
String name = node.getName();
String line2 = readSecondLine(name, node.getLine());
if (line2 == null) return true;
String expected = null;
if (convert) {
PrintStream converter = null;
ByteArrayOutputStream converted = new ByteArrayOutputStream();
try {
converter = new PrintStream(
new GZIPOutputStream(
new HexOutputStream(converted)));
}
catch (Exception e) {
System.err.println("Could not create converter stream.");
System.exit(1);
}
converter.print(node.shallow().toString());
converter.close();
expected = converted.toString();
}
else expected = node.shallow().toString();
return !line2.equals(expected);
}
/**
* Recursively propagates the information about which nodes are "affected".
*
* @param name The name of an affected node.
**/
private static void propagateAffected(String name) {
boolean isCompositeGenerator =
SemanticAnalysis.representationTable.get(name)
instanceof CompositeGenerator;
boolean isRevised = revisionStatus.get(name) == REVISED;
HashSet dependors = (HashSet) SemanticAnalysis.dependorGraph.get(name);
assert dependors != null : "null entry in dependorGraph for " + name;
for (Iterator I = dependors.iterator(); I.hasNext(); ) {
String dependor = (String) I.next();
if (SemanticAnalysis.representationTable.get(dependor)
instanceof LearningClassifierExpression) {
LearningClassifierExpression lce =
(LearningClassifierExpression)
SemanticAnalysis.representationTable.get(dependor);
if (lce.featuresStatus == null || lce.featuresStatus != REVISED)
lce.featuresStatus = AFFECTED;
if (lce.pruneStatus == null || lce.pruneStatus != REVISED)
lce.pruneStatus = AFFECTED;
if (lce.learningStatus == null || lce.learningStatus != REVISED)
lce.learningStatus = AFFECTED;
lce.startingRound = 1;
}
if (!revisionStatus.containsKey(dependor)) {
if (isCompositeGenerator && isRevised
&& SemanticAnalysis.representationTable.get(dependor)
instanceof LearningClassifierExpression)
revisionStatus.put(dependor, REVISED);
else revisionStatus.put(dependor, AFFECTED);
propagateAffected((String) dependor);
}
}
}
/**
* Instantiates a pass that runs on an entire <code>AST</code>.
*
* @param ast The program to run this pass on.
**/
public RevisionAnalysis(AST ast) {
super(ast);
revisionStatus = new HashMap();
}
/**
* Runs this pass on all nodes of the indicated type.
*
* @param list The node to process.
**/
public void run(DeclarationList list) {
noChanges = true;
if (list.size() == 0) return;
runOnChildren(list);
noChanges = revisionStatus.size() == 0;
String[] revised =
(String[]) revisionStatus.keySet().toArray(new String[0]);
for (int i = 0; i < revised.length; ++i)
propagateAffected(revised[i]);
for (Iterator I = SemanticAnalysis.dependorGraph.keySet().iterator();
I.hasNext(); ) {
Object name = I.next();
if (!revisionStatus.containsKey(name)) {
revisionStatus.put(name, UNAFFECTED);
if (SemanticAnalysis.representationTable.get(name)
instanceof LearningClassifierExpression) {
LearningClassifierExpression lce =
(LearningClassifierExpression)
SemanticAnalysis.representationTable.get(name);
if (lce.featuresStatus == null) lce.featuresStatus = UNAFFECTED;
if (lce.pruneStatus == null) lce.pruneStatus = UNAFFECTED;
if (lce.learningStatus == null) lce.learningStatus = UNAFFECTED;
}
}
}
}
/**
* Parses a learning classifier expression out of an encoded string using
* the automatically generated scanner and parser.
*
* @param s The string out of which the learning classifier expression
* will be parsed.
* @return The parsed learning classifier expression.
**/
private static LearningClassifierExpression parseLCE(String s) {
Reader reader = null;
try {
reader =
new BufferedReader(
new InputStreamReader(
new GZIPInputStream(
new HexStringInputStream(s))));
}
catch (Exception e) {
System.err.println(
"LBJ ERROR: Can't instantiate string parser for LCE:");
e.printStackTrace();
System.exit(1);
}
AST ast = null;
try { ast = (AST) new parser(new Yylex(reader)).parse().value; }
catch (Exception e) {
System.err.println("LBJ ERROR: Can't parse LCE from string:");
e.printStackTrace();
System.exit(1);
}
SemanticAnalysis.runAndRestore(ast);
ClassifierAssignment ca =
(ClassifierAssignment) ast.declarations.iterator().next();
return (LearningClassifierExpression) ca.expression;
}
/**
* Runs this pass on all nodes of the indicated type.
*
* @param lce The node to process.
**/
public void run(LearningClassifierExpression lce) {
runOnChildren(lce);
String lceName = lce.name.toString();
String line2 = readSecondLine(lce.getName(), lce.getLine());
if (line2 == null || line2.length() == 0 || line2.equals("rebuild")) {
revisionStatus.put(lceName, REVISED);
lce.featuresStatus = lce.pruneStatus = lce.learningStatus = AFFECTED;
return;
}
LearningClassifierExpression oldLCE = parseLCE(line2);
String exFilePath = lceName + ".ex";
if (Main.generatedSourceDirectory != null)
exFilePath =
Main.generatedSourceDirectory + File.separator + exFilePath;
String lexFilePath = lceName + ".lex";
String lcFilePath = lceName + ".lc";
if (Main.classDirectory != null) {
String prefix = Main.classDirectory + File.separator;
lexFilePath = prefix + lexFilePath;
lcFilePath = prefix + lcFilePath;
}
File exFile = new File(exFilePath);
File lexFile = new File(lexFilePath);
File lcFile = new File(lcFilePath);
boolean preExtractToDisk =
lce.preExtract.value.startsWith("\"disk")
|| lce.preExtract.value.equals("true")
|| lce.preExtract.value.equals("\"true\"");
boolean previousPreExtractToDisk =
oldLCE.preExtract.value.startsWith("\"disk")
|| oldLCE.preExtract.value.equals("true")
|| oldLCE.preExtract.value.equals("\"true\"");
if (!oldLCE.returnType.equals(lce.returnType)
|| !oldLCE.name.equals(lce.name)
|| !oldLCE.argument.equals(lce.argument)
|| (oldLCE.labeler == null
? lce.labeler != null
: lce.labeler == null
|| !oldLCE.labeler.name.equals(lce.labeler.name))
|| !oldLCE.extractor.name.equals(lce.extractor.name)
|| (oldLCE.parser == null ? lce.parser != null
: !oldLCE.parser.equals(lce.parser))
|| (oldLCE.featureEncoding == null
? lce.featureEncoding != null
: lce.featureEncoding == null
|| !oldLCE.featureEncoding.value
.equals(lce.featureEncoding.value))
|| preExtractToDisk && !previousPreExtractToDisk
|| (preExtractToDisk ? !exFile.exists() : !lcFile.exists())
|| !lexFile.exists()) {
revisionStatus.put(lceName, REVISED);
lce.featuresStatus = lce.pruneStatus = lce.learningStatus = AFFECTED;
return;
}
if ((oldLCE.pruneCountType == null
? lce.pruneCountType != null
: !oldLCE.pruneCountType.equals(lce.pruneCountType))
|| (oldLCE.pruneThresholdType == null
? lce.pruneThresholdType != null
: !oldLCE.pruneThresholdType.equals(lce.pruneThresholdType))
|| (oldLCE.pruneThreshold == null
? lce.pruneThreshold != null
: !oldLCE.pruneThreshold.equals(lce.pruneThreshold))) {
lce.featuresStatus = preExtractToDisk ? UNAFFECTED : REVISED;
lce.pruneStatus = REVISED;
lce.learningStatus = AFFECTED;
lce.previousPruneCountType = oldLCE.pruneCountType;
revisionStatus.put(lceName, AFFECTED);
return;
}
if ((oldLCE.learnerName == null
? lce.learnerName != null
: !oldLCE.learnerName.equals(lce.learnerName))
|| (oldLCE.learnerConstructor == null
? lce.learnerConstructor != null
: !oldLCE.learnerConstructor.equals(lce.learnerConstructor))
|| (oldLCE.learnerParameterBlock == null
? lce.learnerParameterBlock != null
: !oldLCE.learnerParameterBlock.toString()
.equals(lce.learnerParameterBlock.toString()))
|| (oldLCE.K == null ? lce.K != null : !oldLCE.K.equals(lce.K))
|| oldLCE.splitPolicy != lce.splitPolicy
|| (oldLCE.testingMetric == null
? lce.testingMetric != null
: !oldLCE.testingMetric.equals(lce.testingMetric))
|| !oldLCE.alpha.equals(lce.alpha)
|| !lcFile.exists()) {
lce.featuresStatus = lce.pruneStatus =
preExtractToDisk ? UNAFFECTED : REVISED;
lce.learningStatus = REVISED;
revisionStatus.put(lceName, AFFECTED);
return;
}
if (oldLCE.rounds == null ? lce.rounds != null
: !oldLCE.rounds.equals(lce.rounds)) {
lce.featuresStatus = lce.pruneStatus =
preExtractToDisk ? UNAFFECTED : REVISED;
lce.learningStatus = REVISED;
revisionStatus.put(lceName, AFFECTED);
if (lce.K == null && lce.parameterSets.size() == 0
&& lce.rounds instanceof Constant
&& oldLCE.rounds instanceof Constant) {
int rounds =
lce.rounds == null
? 1 : Integer.parseInt(((Constant) lce.rounds).value);
int oldRounds =
oldLCE.rounds == null
? 1 : Integer.parseInt(((Constant) oldLCE.rounds).value);
if (rounds > oldRounds) lce.startingRound = oldRounds + 1;
}
return;
}
lce.onlyCodeGeneration =
(oldLCE.comment == null ? lce.comment != null
: !oldLCE.comment.equals(lce.comment))
|| (oldLCE.cacheIn == null ? lce.cacheIn != null
: !oldLCE.cacheIn.equals(lce.cacheIn))
|| oldLCE.singleExampleCache != lce.singleExampleCache
|| (oldLCE.evaluation == null
? lce.evaluation != null
: !oldLCE.evaluation.equals(lce.evaluation));
if (lce.onlyCodeGeneration) {
revisionStatus.put(lceName, REVISED);
lce.featuresStatus = lce.pruneStatus = lce.learningStatus = UNAFFECTED;
}
}
/**
* Runs this pass on all nodes of the indicated type.
*
* @param cn The node to process.
**/
public void run(ClassifierName cn) {
if (cn.referent == cn.name) return;
if (codeRevision(cn, false))
revisionStatus.put(cn.name.toString(), REVISED);
}
/**
* Runs this pass on all nodes of the indicated type.
*
* @param cc The node to process.
**/
public void run(CodedClassifier cc) {
if (codeRevision(cc, true))
revisionStatus.put(cc.name.toString(), REVISED);
}
/**
* Runs this pass on all nodes of the indicated type.
*
* @param cg The node to process.
**/
public void run(CompositeGenerator cg) {
runOnChildren(cg);
if (codeRevision(cg, true))
revisionStatus.put(cg.name.toString(), REVISED);
}
/**
* Runs this pass on all nodes of the indicated type.
*
* @param c The node to process.
**/
public void run(Conjunction c) {
runOnChildren(c);
if (codeRevision(c, false))
revisionStatus.put(c.name.toString(), REVISED);
}
/**
* Runs this pass on all nodes of the indicated type.
*
* @param i The node to process.
**/
public void run(InferenceInvocation i) {
if (codeRevision(i, false))
revisionStatus.put(i.name.toString(), REVISED);
}
/**
* Runs this pass on all nodes of the indicated type.
*
* @param cd The node to process.
**/
public void run(ConstraintDeclaration cd) {
if (codeRevision(cd, true))
revisionStatus.put(cd.name.toString(), REVISED);
}
/**
* Runs this pass on all nodes of the indicated type.
*
* @param id The node to process.
**/
public void run(InferenceDeclaration id) {
if (codeRevision(id, true))
revisionStatus.put(id.name.toString(), REVISED);
run(id.constraint);
}
}