/******************************************************************************* * Copyright (C) 2006-2012 Dominik Jain. * * This file is part of ProbCog. * * ProbCog is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * ProbCog is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with ProbCog. If not, see <http://www.gnu.org/licenses/>. ******************************************************************************/ package probcog.srldb.datadict; import java.io.PrintStream; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.Map.Entry; import java.util.Set; import java.util.Vector; import probcog.bayesnets.core.BeliefNetworkEx; import probcog.srldb.ConstantArgument; import probcog.srldb.Database; import probcog.srldb.IRelationArgument; import probcog.srldb.IdentifierNamer; import probcog.srldb.Item; import probcog.srldb.Link; import probcog.srldb.Object; import probcog.srldb.datadict.domain.BooleanDomain; import probcog.srldb.datadict.domain.Domain; import edu.ksu.cis.bnj.ver3.core.BeliefNode; /** * Represents a data dictionary for a relational database. * @author Dominik Jain */ public class DataDictionary implements java.io.Serializable { private static final long serialVersionUID = 1L; protected HashMap<String, DDObject> objects; protected HashMap<String, DDRelation> relations; protected HashMap<String, DDAttribute> attributes; /** * !!! this map is not guaranteed to contain all relevant entries (is not certain to be in sync) */ protected HashMap<String, Domain<?>> domains; public DataDictionary() { objects = new HashMap<String, DDObject>(); attributes = new HashMap<String, DDAttribute>(); relations = new HashMap<String, DDRelation>(); domains = new HashMap<String, Domain<?>>(); } public void addObject(DDObject obj) throws DDException { objects.put(obj.getName(), obj); addAttributes(obj); } /** * adds the attributes of a given item (object or relation) to the global list of attributes * @param item * @throws DDException */ protected void addAttributes(DDItem item) throws DDException { for(DDAttribute attr : item.getAttributes().values()) { addAttribute(attr); } } /** * adds the given attribute to the data dictionary. This function should not be called * unless an attribute is added to an item (object or link) after the item was added to * the data dictionary, as all of an item's attributes are added automatically when * addObject or addRelation is called. * @param attr * @throws DDException if an attribute with the same name was already in the data dictionary */ public void addAttribute(DDAttribute attr) throws DDException { if(attributes.containsKey(attr.getName())) { throw new DDException("Duplicate attribute " + attr.getName() + "; already defined for item " + attr.getOwner().getName()); } attributes.put(attr.getName(), attr); domains.put(attr.getDomain().getName(), attr.getDomain()); } public void addRelation(DDRelation rel) throws DDException { relations.put(rel.getName(), rel); addAttributes(rel); //System.out.println("datadict now contains " + relations.size()); } public Collection<DDAttribute> getAttributes() { return attributes.values(); } public DDObject getObject(String name) throws DDException { return objects.get(name); } public DDRelation getRelation(String name) { return relations.get(name); } public DDAttribute getAttribute(String name) throws DDException { return attributes.get(name); } public Domain<?> getDomain(String name) { return domains.get(name); } public Collection<DDObject> getObjects() { return objects.values(); } public Collection<DDRelation> getRelations() { return relations.values(); } protected class DomainData { public Domain<?> domain; public Vector<DDAttribute> occurrences = new Vector<DDAttribute>(); public String[] values; public boolean wasReplaced = false; public DomainData(Domain<?> domain) { this.domain = domain; values = domain.getValues(); } } /** * Checks the data dictionary for overlapping domains. Each value must be unique (i.e. * in two (or more) domains, all references to the second domain are replaced by the first. * It is assumed that the first domain can be substituted - no merging takes place. * Moreover, it is ensured that attribute names do not coincide with link names - * as both are eventually used as predicate symbols in the context of MLNs. */ public void check() throws DDException { // get a hash map of all domains used HashMap<String, DomainData> domains = new HashMap<String,DomainData>(); for(DDAttribute attrib : this.attributes.values()) { Domain<?> domain = attrib.getDomain(); String domName = domain.getName(); DomainData dd; if(!domains.containsKey(domName)) domains.put(domName, dd = new DomainData(domain)); else dd = domains.get(domName); dd.occurrences.add(attrib); } // check attribute domain DomainData[] dd = new DomainData[domains.size()]; domains.values().toArray(dd); domains = null; for(int i = 0; i < dd.length; i++) { if(dd[i].wasReplaced) continue; // check whether the domain is actually boolean if(dd[i].domain.isBoolean()) { for(DDAttribute attrib : dd[i].occurrences) attrib.setDomain(BooleanDomain.getInstance()); dd[i].wasReplaced = true; continue; } // check all of the following domains for overlaps for(int j = i+1; j < dd.length; j++) { if(dd[j].wasReplaced) continue; // check whether any of the values in the first domain is in the other domain... for(int k = 0; k < dd[i].values.length; k++) { // ...and if so, replace all occurrences of the second domain with the first if(dd[j].domain.containsString(dd[i].values[k])) { System.err.println("Warning: domain " + dd[i].domain.getName() + " already contains value '" + dd[i].values[k] + "' of domain " + dd[j].domain.getName() + "; replacing all occurrences of " + dd[j].domain.getName() + "!"); for(DDAttribute attrib : dd[j].occurrences) attrib.setDomain(dd[i].domain); dd[j].wasReplaced = true; /*// if it's an automatic domain, merge if(dd[i].domain instanceof AutomaticDomain) { AutomaticDomain adom = (AutomaticDomain) dd[i].domain; for(int l = 0; l < dd[j].values.length; l++) adom.addValue(dd[j].values[l]); }*/ break; } } } } // ensure that attribute names do not coincide with link names Set<String> attrNames = this.attributes.keySet(); Set<String> linkNames = new HashSet<String>(this.relations.keySet()); linkNames.retainAll(attrNames); if(!linkNames.isEmpty()) { throw new DDException("Error: Duplicate predicate name(s); the name(s) " + linkNames.toString() + " cannot be used for attributes and links simultaneously!"); } } /** * outputs an attribute list for each type of object and relation * @param out the stream to write to */ public void outputAttributeLists(PrintStream out) { for(DDObject obj : getObjects()) { obj.outputAttributeList(out); } for(DDRelation rel : getRelations()) { rel.outputAttributeList(out); } } /** * outputs a comma-separated list of all attribute names, regardless of the item to which the attributes belong * @param out the stream to write to */ public void outputAttributeList(PrintStream out) { int i = 0; for(DDAttribute attr : this.attributes.values()) { if(attr.isDiscarded()) continue; if(i++ > 0) out.print(","); out.print(Database.stdAttribName(attr.getName())); } } public void checkObject(Object obj) throws DDException { DDObject ddobj = getObject(obj.objType()); if(ddobj == null) throw new DDException("Unknown object type " + obj.objType() + "; not in data dictionary!"); checkItemAttributes(obj, ddobj); } @SuppressWarnings("unchecked") public void checkLink(Link link) throws DDException, Exception { // check existence of corresponding link type in data dictionary DDRelation ddlink = getRelation(link.getName()); if(ddlink == null) throw new DDException("Unknown relation " + link.getName() + "; not in data dictionary!"); // check number of arguments if(link.getArguments().length != ddlink.getArguments().length) throw new DDException("The link " + link.toString() + " has the wrong number of parameters!"); // check argument types int i = 0; for(IRelationArgument arg : link.getArguments()) { IDDRelationArgument argtype = ddlink.getArguments()[i]; if(arg instanceof Object) { Object o = (Object)arg; if(o.objType() != argtype.getDomainName()) throw new DDException(String.format("Type mismatch for the %dth argument of %s; should be %s!", i+1, link.toString(), argtype.getDomainName())); } else { if(!(arg instanceof ConstantArgument)) { throw new DDException(String.format("Type mismatch for argument %d of %s; expected a constant argument!", i+1, link.toString())); } DDConstantArgument ddconst = (DDConstantArgument) argtype; if(!((Domain<String>)ddconst.getDomain()).contains(arg.getConstantName())) throw new DDException(String.format("Domain of argument %d of %s does not contain %s!", i+1, link.toString(), arg.getConstantName())); } i++; } // check attributes checkItemAttributes(link, ddlink); } /** * checks compatibility of an item's attributes with the corresponding data dictionary item * @param item * @param ddItem * @throws DDException */ protected void checkItemAttributes(Item item, DDItem ddItem) throws DDException { Set<String> allowedAttributes = ddItem.getAttributes().keySet(); for(Entry<String,String> attr : item.getAttributes().entrySet()) { String attribName = attr.getKey(); if(!allowedAttributes.contains(attribName)) throw new DDException("Undefined attribute '" + attribName + "' for item type '" + ddItem.getName() + "' or the attribute was applied to more than one type of object."); if(getAttribute(attribName).isDiscarded()) continue; Domain<?> domain = ddItem.getAttributes().get(attribName).getDomain(); String value = attr.getValue(); if(!domain.containsString(value)) throw new DDException("Invalid value " + value + " for attribute " + attribName + " of item " + ddItem.getName() + "; not in domain " + domain.getName()); } } public void onCommitObject(Object o) throws DDException {} public void onCommitLink(Link l) throws DDException {} /** * outputs the basic MLN for this data dictionary, which contains domain definitions and predicate declarations * @param out the stream to write to */ public void writeBasicMLN(PrintStream out) { DataDictionary datadict = this; out.println("// Markov Logic Network\n\n"); IdentifierNamer idNamer = new IdentifierNamer(datadict); // domains out.println("// ***************\n// domains\n// ***************\n"); HashSet<String> printedDomains = new HashSet<String>(); // the names of domains that have already been printed // - check all attributes for finite domains for(DDAttribute attrib : datadict.getAttributes()) { if(attrib.isDiscarded()) continue; Domain<?> domain = attrib.getDomain(); if(domain == null || attrib.isBoolean() || !domain.isFinite()) // boolean domains aren't handled because a boolean attribute value is not specified as a constant but rather using negation of the entire predicate continue; // we have a finite domain -> output this domain if it hasn't already been printed String name = domain.getName(); if(!printedDomains.contains(name)) { // check if the domain is empty String[] values = domain.getValues(); if(values.length == 0) { System.err.println("Warning: Domain " + domain.getName() + " is empty!"); continue; } // print the domain name String domIdentifier = idNamer.getLongIdentifier("domain", domain.getName()); out.print(domIdentifier + " = {"); // print the values (must start with upper-case letter) for(int i = 0; i < values.length; i++) { if(i > 0) out.print(", "); out.print(Database.stdAttribStringValue(values[i])); } out.println("}"); printedDomains.add(name); } } // predicate declarations out.println("\n\n// *************************\n// predicate declarations\n// *************************\n"); for(DDObject obj : datadict.getObjects()) { obj.MLNprintPredicateDeclarations(idNamer, out); } out.println("// Relations"); for(DDRelation rel : datadict.getRelations()) { rel.MLNprintPredicateDeclarations(idNamer, out); } // rules out.println("\n\n// ******************\n// rules\n// ******************\n"); /* for(DDObject obj : datadict.getObjects()) { obj.MLNprintRules(idNamer, out); } out.println("\n// mutual exclusiveness and exhaustiveness: relations"); for(DDRelation rel : datadict.getRelations()) { rel.MLNprintRules(idNamer, out); }*/ // unit clauses out.println("\n// unit clauses"); for(DDObject obj : datadict.getObjects()) { obj.MLNprintUnitClauses(idNamer, out); } for(DDRelation rel : datadict.getRelations()) { rel.MLNprintUnitClauses(idNamer, out); } } /** * outputs the basic BLOG model for this data dictionary, which contains domain definitions and predicate declarations * @param out the stream to write to */ public void writeBasicBLOGModel(PrintStream out) { out.println("// ABL Model\n\n"); IdentifierNamer idNamer = new IdentifierNamer(this); // object types out.println("// ***************\n// object types\n// ***************\n"); for(DDObject ddo : this.getObjects()) out.printf("type %s;\n", idNamer.getLongIdentifier("domain", ddo.getDomainName())); // fixed domains out.println("\n// ***************\n// domains\n// ***************\n"); HashSet<String> handledDomainTypes = new HashSet<String>(); for(DDAttribute dda : this.getAttributes()) { Domain<?> dom = dda.getDomain(); if(dda.isDiscarded() || dda.isBoolean() || handledDomainTypes.contains(dom)) continue; handledDomainTypes.add(dom.getName()); // print type declaration String domIdentifier = idNamer.getLongIdentifier("domain", dom.getName()); out.printf("type %s;\n", domIdentifier); if(dom == null || !dom.isFinite()) continue; // check if the domain is empty String[] values = dom.getValues(); if(values.length == 0) { System.err.println("Warning: Domain " + dom.getName() + " is empty!"); continue; } // print the domain out.print("guaranteed " + domIdentifier + " "); for(int i = 0; i < values.length; i++) { if(i > 0) out.print(", "); out.print(Database.stdAttribStringValue(values[i])); } out.println(";"); } // predicate declarations out.println("\n\n// *************************\n// function/predicate declarations\n// *************************\n"); for(DDObject obj : this.getObjects()) { obj.BLNprintPredicateDeclarations(idNamer, out); } out.println("// Relations"); for(DDRelation rel : this.getRelations()) { rel.BLNprintPredicateDeclarations(idNamer, out); } } public static class BLNStructure { public BeliefNetworkEx bn; protected HashMap<java.lang.Object,BeliefNode> dd2node; public BLNStructure(BeliefNetworkEx bn, HashMap<java.lang.Object,BeliefNode> dd2node) { this.bn = bn; this.dd2node = dd2node; } public BeliefNode getNode(DDAttribute attr) { return dd2node.get(attr); } public BeliefNode getNode(DDRelation rel) { return dd2node.get(rel); } public void connect(java.lang.Object ddAttributeOrRelation_Parent, java.lang.Object ddAttributeOrRelation_Child) { bn.bn.connect(dd2node.get(ddAttributeOrRelation_Parent), dd2node.get(ddAttributeOrRelation_Child)); } public void disconnect(java.lang.Object ddAttributeOrRelation_Parent, java.lang.Object ddAttributeOrRelation_Child) { bn.bn.disconnect(dd2node.get(ddAttributeOrRelation_Parent), dd2node.get(ddAttributeOrRelation_Child)); } } public BLNStructure createBasicBLNStructure() { BeliefNetworkEx bn = new BeliefNetworkEx(); HashMap<java.lang.Object, BeliefNode> dd2node = new HashMap<java.lang.Object, BeliefNode>(); IdentifierNamer namer = new IdentifierNamer(this); // attribute nodes for(DDAttribute attr : this.getAttributes()) { if(attr.isDiscarded()) continue; String nodeName = String.format("%s(%s)", attr.getName(), namer.getShortIdentifier("object", attr.getOwner().getName())); dd2node.put(attr, bn.addNode(nodeName)); } // relation nodes for(probcog.srldb.datadict.DDRelation rel : this.getRelations()) { StringBuffer nodeName = new StringBuffer(rel.getName() + "("); IDDRelationArgument[] relargs = rel.getArguments(); for(int i = 0; i < relargs.length; i++) { if(i > 0) nodeName.append(','); nodeName.append(namer.getShortIdentifier(rel.getName(), relargs[i].getDomainName())); } nodeName.append(')'); dd2node.put(rel, bn.addNode(nodeName.toString())); } return new BLNStructure(bn, dd2node); } public String toString() { StringBuffer sb = new StringBuffer("DataDictionary:\n"); for(DDObject ddo : this.objects.values()) { sb.append(ddo); sb.append('\n'); } for(DDRelation ddr : this.relations.values()) { sb.append(ddr); sb.append('\n'); } return sb.toString(); } /** * cleans up stale domain references */ public void cleanUp() { domains.clear(); for(DDAttribute attr : attributes.values()) domains.put(attr.getDomain().getName(), attr.getDomain()); } }