package org.ggp.base.util.gdl.transforms;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
import org.ggp.base.util.concurrency.ConcurrencyUtils;
import org.ggp.base.util.gdl.GdlUtils;
import org.ggp.base.util.gdl.grammar.Gdl;
import org.ggp.base.util.gdl.grammar.GdlConstant;
import org.ggp.base.util.gdl.grammar.GdlDistinct;
import org.ggp.base.util.gdl.grammar.GdlLiteral;
import org.ggp.base.util.gdl.grammar.GdlNot;
import org.ggp.base.util.gdl.grammar.GdlPool;
import org.ggp.base.util.gdl.grammar.GdlRelation;
import org.ggp.base.util.gdl.grammar.GdlRule;
import org.ggp.base.util.gdl.grammar.GdlSentence;
import org.ggp.base.util.gdl.grammar.GdlTerm;
import org.ggp.base.util.gdl.grammar.GdlVariable;
import org.ggp.base.util.gdl.model.CartesianSentenceFormDomain;
import org.ggp.base.util.gdl.model.SentenceDomainModel;
import org.ggp.base.util.gdl.model.SentenceDomainModelFactory;
import org.ggp.base.util.gdl.model.SentenceDomainModelOptimizer;
import org.ggp.base.util.gdl.model.SentenceDomainModels;
import org.ggp.base.util.gdl.model.SentenceDomainModels.VarDomainOpts;
import org.ggp.base.util.gdl.model.SentenceForm;
import org.ggp.base.util.gdl.model.SentenceFormDomain;
import org.ggp.base.util.gdl.model.SentenceFormModel;
import org.ggp.base.util.gdl.model.SentenceForms;
import org.ggp.base.util.gdl.model.SentenceModelUtils;
import org.ggp.base.util.gdl.model.SimpleSentenceForm;
import org.ggp.base.util.gdl.model.assignments.AssignmentsImpl;
import com.google.common.collect.Lists;
import com.google.common.collect.Multimap;
import com.google.common.collect.Sets;
/**
* The CondensationIsolator is a GDL transformation designed to split up
* rules in a way that results in smaller propnets. For example, we may
* have a rule as follows:
*
* <pre>
* (<= (foo ?x ?y)
* (bar ?x ?y)
* (baz ?y ?z))
* </pre>
*
* <p>In the propnet, this will result in one AND node for each combination
* of ?x, ?y, and ?z. The CondensationIsolator would split it up as follows:
*
* <pre>
* (<= (foo ?x ?y)
* (bar ?x ?y)
* (baz_tmp0 ?y))
* (<= (baz_tmp0 ?y)
* (baz ?y ?z))
* </pre>
*
* <p>In the propnet, there will now be one AND node for each combination of
* ?x and ?y and one new link for each combination of ?y and ?z, but there
* will not be a cross-product of the domains of all three.
*
* <p>"Condensation" refers to the type of rule generated, in which we simply
* ignore certain variables.
*
* @author Alex Landau
*
*/
public class CondensationIsolator {
private CondensationIsolator() {
}
public static List<Gdl> run(List<Gdl> description) throws InterruptedException {
//This class is not put together in any "optimal" way, so it's left in
//an unpolished state for now. A better version would use estimates of
//the impact of breaking apart rules. (It also needs to stop itself from
//making multiple new relations with the same meaning.)
//This version will be rather advanced.
//In particular, it will try to incorporate
//1) More thorough scanning for condensations;
//2) Condensations that are only safe to perform because of mutexes.
//TODO: Don't perform condensations on stuff like (add _ _ _)...
//In general, don't perform condensations where the headroom is huge?
//Better yet... DON'T perform condensations on recursive functions!
//As for headroom... maybe make sure that # of vars eliminated > # "kept"
//Or make sure none are kept? Use directional connected components?
description = GdlCleaner.run(description);
description = DeORer.run(description);
description = VariableConstrainer.replaceFunctionValuedVariables(description);
//How do we define a condensation, and what needs to be true in it?
//Definition: A condensation set is a set of conjuncts of a
//sentence.
//Restrictions:
//1) There must be some variable not in the head of the sentence that
// appears exclusively in the condensation set. (This means we can
// easily find sets one of which must be a condensation set.)
//2) For any variable appearing in a distinct or not conjunct in the set,
// there must be a positive conjunct in the set also containing that
// variable. This does apply to variables found in the head.
//3) There must be at least one non-distinct literal outside the
// condensation set.
//How mutexes work:
//Say we have a rule
// (<= (r1 ?b)
// (r2 ?a ?b ?c)
// (r3 ?b ?c)
// (r4 ?a)
// (r5 ?c))
//If we wanted to factor out ?a, we'd normally have to do
/* (<= (r6 ?b ?c)
* (r2 ?a ?b ?c)
* (r4 ?a))
* (<= (r1 ?b)
* (r6 ?b ?c)
* (r3 ?b ?c)
* (r5 ?c))
* But if we know r2 is a mutex, instead we can do (notice r2 splitting):
* (<= (r6 ?b)
* (r2 ?a ?b ?c)
* (r4 ?a))
* (<= (r1 ?b)
* (r2 ?a ?b ?c)
* (r6 ?b)
* (r3 ?b ?c)
* (r5 ?c))
* Which in turn becomes:
* (<= (r6 ?b)
* (r2 ?a ?b ?c)
* (r4 ?a))
* (<= (r7 ?b)
* (r2 ?a ?b ?c)
* (r3 ?b ?c)
* (r5 ?c))
* (<= (r1 ?b)
* (r6 ?b)
* (r7 ?b))
* Both r6 and r7 can be further condensed to ignore ?c and ?a,
* respectively. What just happened?
* 1) The condensation set for ?a included the mutex r2.
* 2) r2 (by itself) would have required ?c to be included as an
* argument passed back to the original rule, which is undesirable.
* Instead, as it's a mutex, we leave a copy in the original rule
* and don't include the ?c.
*
* So, what kind of algorithm can we find to solve this task?
*/
List<Gdl> newDescription = new ArrayList<Gdl>();
Queue<GdlRule> rulesToAdd = new LinkedList<GdlRule>();
for(Gdl gdl : description) {
if(gdl instanceof GdlRule)
rulesToAdd.add((GdlRule) gdl);
else
newDescription.add(gdl);
}
//Don't use the model indiscriminately; it reflects the old description,
//not necessarily the new one
SentenceDomainModel model = SentenceDomainModelFactory.createWithCartesianDomains(description);
model = SentenceDomainModelOptimizer.restrictDomainsToUsefulValues(model);
UnusedSentenceNameSource sentenceNameSource = UnusedSentenceNameSource.create(model);
ConstantChecker constantChecker = ConstantCheckerFactory.createWithForwardChaining(model);
Set<SentenceForm> constantForms = model.getConstantSentenceForms();
ConcurrencyUtils.checkForInterruption();
List<Gdl> curDescription = Lists.newArrayList(description);
while(!rulesToAdd.isEmpty()) {
GdlRule curRule = rulesToAdd.remove();
if(isRecursive(curRule)) {
//Don't mess with it!
newDescription.add(curRule);
continue;
}
GdlSentence curRuleHead = curRule.getHead();
if(SentenceModelUtils.inSentenceFormGroup(curRuleHead, constantForms)) {
newDescription.add(curRule);
continue;
}
Set<GdlLiteral> condensationSet = getCondensationSet(curRule, model, constantChecker, sentenceNameSource);
ConcurrencyUtils.checkForInterruption();
if(condensationSet != null) {
List<GdlRule> newRules = applyCondensation(condensationSet, curRule, sentenceNameSource);
rulesToAdd.addAll(newRules);
//Since we're making only small changes, we can readjust
//the model as we go, instead of recomputing it
List<GdlRule> oldRules = Collections.singletonList(curRule);
List<Gdl> replacementDescription = Lists.newArrayList(curDescription);
replacementDescription.removeAll(oldRules);
replacementDescription.addAll(newRules);
curDescription = replacementDescription;
model = augmentModelWithNewForm(model, newRules);
} else {
newDescription.add(curRule);
}
}
return newDescription;
}
@SuppressWarnings("unused")
private static void saveKif(List<Gdl> description) {
//Save the description in a new file
//Useful for debugging chains of condensations to see
//which cause decreased performance
String filename = "ci0.kif";
int filenum = 0;
File file = null;
while(file == null || file.exists()) {
filenum++;
filename = "ci" + filenum + ".kif";
file = new File(filename);
file = new File("games/rulesheets", filename);
}
BufferedWriter out = null;
try {
out = new BufferedWriter(new FileWriter(file));
for(Gdl gdl : description) {
out.append(gdl.toString() + "\n");
}
} catch(IOException e) {
e.printStackTrace();
} finally {
try {
if (out != null) {
out.close();
}
} catch (IOException e) {}
}
}
private static boolean isRecursive(GdlRule rule) {
for(GdlLiteral literal : rule.getBody())
if(literal instanceof GdlSentence)
if(((GdlSentence) literal).getName().equals(rule.getHead().getName()))
//A good approximation
return true;
return false;
}
private static class UnusedSentenceNameSource {
private final Set<String> allNamesSoFar;
public UnusedSentenceNameSource(Collection<String> initialNames) {
allNamesSoFar = Sets.newHashSet(initialNames);
}
public static UnusedSentenceNameSource create(SentenceFormModel model) {
Set<String> sentenceFormNames = SentenceForms.getNames(model.getSentenceForms());
return new UnusedSentenceNameSource(sentenceFormNames);
}
public GdlConstant getNameWithPrefix(GdlConstant prefix) {
for(int i = 0; ; i++) {
String candidateName = prefix + "_tmp" + i;
if(!allNamesSoFar.contains(candidateName)) {
allNamesSoFar.add(candidateName);
return GdlPool.getConstant(candidateName);
}
}
}
}
private static List<GdlRule> applyCondensation(
Set<GdlLiteral> condensationSet, GdlRule rule,
UnusedSentenceNameSource sentenceNameSource) {
Set<GdlVariable> varsInCondensationSet = new HashSet<GdlVariable>();
for(GdlLiteral literal : condensationSet)
varsInCondensationSet.addAll(GdlUtils.getVariables(literal));
Set<GdlVariable> varsToKeep = new HashSet<GdlVariable>();
//Which vars do we "keep" (put in our new condensed literal)?
//Vars that are both:
//1) In the condensation set, in a non-mutex literal
//2) Either in the head or somewhere else outside the condensation set
for(GdlLiteral literal : condensationSet)
varsToKeep.addAll(GdlUtils.getVariables(literal));
Set<GdlVariable> varsToKeep2 = new HashSet<GdlVariable>();
varsToKeep2.addAll(GdlUtils.getVariables(rule.getHead()));
for(GdlLiteral literal : rule.getBody())
if(!condensationSet.contains(literal))
varsToKeep2.addAll(GdlUtils.getVariables(literal));
varsToKeep.retainAll(varsToKeep2);
//Now we're ready to split it apart
//Let's make the new rule
List<GdlTerm> orderedVars = new ArrayList<GdlTerm>(varsToKeep);
GdlConstant condenserName = sentenceNameSource.getNameWithPrefix(rule.getHead().getName());
//Make the rule head
GdlSentence condenserHead;
if(orderedVars.isEmpty()) {
condenserHead = GdlPool.getProposition(condenserName);
} else {
condenserHead = GdlPool.getRelation(condenserName, orderedVars);
}
List<GdlLiteral> condenserBody = new ArrayList<GdlLiteral>(condensationSet);
GdlRule condenserRule = GdlPool.getRule(condenserHead, condenserBody);
//TODO: Look for existing rules matching the new one
List<GdlLiteral> remainingLiterals = new ArrayList<GdlLiteral>();
for(GdlLiteral literal : rule.getBody())
if(!condensationSet.contains(literal))
remainingLiterals.add(literal);
remainingLiterals.add(condenserHead);
GdlRule modifiedRule = GdlPool.getRule(rule.getHead(), remainingLiterals);
List<GdlRule> newRules = new ArrayList<GdlRule>(2);
newRules.add(condenserRule);
newRules.add(modifiedRule);
return newRules;
}
private static Set<GdlLiteral> getCondensationSet(GdlRule rule,
SentenceDomainModel model,
ConstantChecker checker,
UnusedSentenceNameSource sentenceNameSource) throws InterruptedException {
//We use each variable as a starting point
List<GdlVariable> varsInRule = GdlUtils.getVariables(rule);
List<GdlVariable> varsInHead = GdlUtils.getVariables(rule.getHead());
List<GdlVariable> varsNotInHead = new ArrayList<GdlVariable>(varsInRule);
varsNotInHead.removeAll(varsInHead);
for(GdlVariable var : varsNotInHead) {
ConcurrencyUtils.checkForInterruption();
Set<GdlLiteral> minSet = new HashSet<GdlLiteral>();
for(GdlLiteral literal : rule.getBody())
if(GdlUtils.getVariables(literal).contains(var))
minSet.add(literal);
//#1 is already done
//Now we try #2
Set<GdlVariable> varsNeeded = new HashSet<GdlVariable>();
Set<GdlVariable> varsSupplied = new HashSet<GdlVariable>();
for(GdlLiteral literal : minSet)
if(literal instanceof GdlRelation)
varsSupplied.addAll(GdlUtils.getVariables(literal));
else if(literal instanceof GdlDistinct || literal instanceof GdlNot)
varsNeeded.addAll(GdlUtils.getVariables(literal));
varsNeeded.removeAll(varsSupplied);
if(!varsNeeded.isEmpty())
continue;
List<Set<GdlLiteral>> candidateSuppliersList = new ArrayList<Set<GdlLiteral>>();
for(GdlVariable varNeeded : varsNeeded) {
Set<GdlLiteral> suppliers = new HashSet<GdlLiteral>();
for(GdlLiteral literal : rule.getBody())
if(literal instanceof GdlRelation)
if(GdlUtils.getVariables(literal).contains(varNeeded))
suppliers.add(literal);
candidateSuppliersList.add(suppliers);
}
//TODO: Now... I'm not sure if we want to minimize the number of
//literals added, or the number of variables added
//Right now, I don't have time to worry about optimization
//Currently, we pick one at random
//TODO: Optimize this
Set<GdlLiteral> literalsToAdd = new HashSet<GdlLiteral>();
for(Set<GdlLiteral> suppliers : candidateSuppliersList)
if(Collections.disjoint(suppliers, literalsToAdd))
literalsToAdd.add(suppliers.iterator().next());
minSet.addAll(literalsToAdd);
if(goodCondensationSetByHeuristic(minSet, rule, model, checker, sentenceNameSource))
return minSet;
}
return null;
}
private static boolean goodCondensationSetByHeuristic(
Set<GdlLiteral> minSet, GdlRule rule, SentenceDomainModel model,
ConstantChecker checker,
UnusedSentenceNameSource sentenceNameSource) throws InterruptedException {
//We actually want the sentence model here so we can see the domains
//also, if it's a constant, ...
//Anyway... we want to compare the heuristic for the number of assignments
//and/or links that will be generated with or without the condensation set
//Heuristic for a rule is A*(L+1), where A is the number of assignments and
//L is the number of literals, unless L = 1, in which case the heuristic is
//just A. This roughly captures the number of links that would be generated
//if this rule were to be generated.
//Obviously, there are differing degrees of accuracy with which we can
//represent A.
//One way is taking the product of all the variables in all the domains.
//However, we can do better by actually asking the Assignments class for
//its own heuristic of how it would implement the rule as-is.
//The only tricky aspect here is that we need an up-to-date SentenceModel,
//and in some cases this could be expensive to compute. Might as well try
//it, though...
//Heuristic for the rule as-is:
long assignments = AssignmentsImpl.getNumAssignmentsEstimate(rule,
SentenceDomainModels.getVarDomains(rule, model, VarDomainOpts.INCLUDE_HEAD),
checker);
int literals = rule.arity();
if(literals > 1)
literals++; //We have to "and" the literals together
//Note that even though constants will be factored out, we're concerned here
//with getting through them in a reasonable amount of time, so we do want to
//count them. TODO: Not sure if they should be counted in L, though...
long curRuleHeuristic = assignments * literals;
//And if we split them up...
List<GdlRule> newRules = applyCondensation(minSet, rule, sentenceNameSource);
GdlRule r1 = newRules.get(0), r2 = newRules.get(1);
//Augment the model
SentenceDomainModel newModel = augmentModelWithNewForm(model, newRules);
long a1 = AssignmentsImpl.getNumAssignmentsEstimate(r1,
SentenceDomainModels.getVarDomains(r1, newModel, VarDomainOpts.INCLUDE_HEAD), checker);
long a2 = AssignmentsImpl.getNumAssignmentsEstimate(r2,
SentenceDomainModels.getVarDomains(r2, newModel, VarDomainOpts.INCLUDE_HEAD), checker);
int l1 = r1.arity(); if(l1 > 1) l1++;
int l2 = r2.arity(); if(l2 > 1) l2++;
//Whether we split or not depends on what the two heuristics say
long newRulesHeuristic = a1 * l1 + a2 * l2;
return newRulesHeuristic < curRuleHeuristic;
}
private static SentenceDomainModel augmentModelWithNewForm(
final SentenceDomainModel oldModel, List<GdlRule> newRules) {
final SentenceForm newForm = SimpleSentenceForm.create(newRules.get(0).getHead());
final SentenceFormDomain newFormDomain = getNewFormDomain(newRules.get(0), oldModel, newForm);
return new SentenceDomainModel() {
@Override
public SentenceFormDomain getDomain(SentenceForm form) {
if (form.equals(newForm)) {
return newFormDomain;
}
return oldModel.getDomain(form);
}
@Override
public Set<SentenceForm> getIndependentSentenceForms() {
throw new UnsupportedOperationException();
}
@Override
public Set<SentenceForm> getConstantSentenceForms() {
throw new UnsupportedOperationException();
}
@Override
public Multimap<SentenceForm, SentenceForm> getDependencyGraph() {
throw new UnsupportedOperationException();
}
@Override
public Set<GdlSentence> getSentencesListedAsTrue(SentenceForm form) {
throw new UnsupportedOperationException();
}
@Override
public Set<GdlRule> getRules(SentenceForm form) {
throw new UnsupportedOperationException();
}
@Override
public Set<SentenceForm> getSentenceForms() {
throw new UnsupportedOperationException();
}
@Override
public List<Gdl> getDescription() {
throw new UnsupportedOperationException();
}
@Override
public SentenceForm getSentenceForm(GdlSentence sentence) {
throw new UnsupportedOperationException();
}
};
}
private static SentenceFormDomain getNewFormDomain(GdlRule condensingRule,
SentenceDomainModel oldModel, SentenceForm newForm) {
Map<GdlVariable, Set<GdlConstant>> varDomains = SentenceDomainModels.getVarDomains(
condensingRule, oldModel, VarDomainOpts.BODY_ONLY);
List<Set<GdlConstant>> domainsForSlots = Lists.newArrayList();
for (GdlTerm term : GdlUtils.getTupleFromSentence(condensingRule.getHead())) {
if (!(term instanceof GdlVariable)) {
throw new RuntimeException("Expected all slots in the head of a condensing rule to be variables, but the rule was: " + condensingRule);
}
domainsForSlots.add(varDomains.get(term));
}
return CartesianSentenceFormDomain.create(newForm, domainsForSlots);
}
}