package com.openMap1.mapper.util; import java.util.Enumeration; import java.util.Hashtable; import java.util.Iterator; import java.util.Vector; import java.util.List; import org.eclipse.emf.ecore.EAttribute; import org.eclipse.emf.ecore.EClass; import org.eclipse.emf.ecore.EObject; import org.eclipse.emf.ecore.EReference; import org.eclipse.emf.ecore.EStructuralFeature; import org.eclipse.emf.ecore.EDataType; import org.eclipse.emf.common.util.EList; import com.openMap1.mapper.core.MapperException; import com.openMap1.mapper.core.SemanticMismatch; import com.openMap1.mapper.reader.XOReader; /** * Methods in this class find and evaluate the best possible match * between two Ecore model instances, where the child nodes may be in different orders * in the two instances. * @author robert * */ public class EcoreMatcher { private EObject eObj1, eObj2; private String sourceCode, resultCode; private XOReader sourceReader, resultReader; private boolean tracing = false; //------------------------------------------------------------------------------------- // constructor //------------------------------------------------------------------------------------- public EcoreMatcher(EObject eObj1, EObject eObj2, String sourceCode,String resultCode, XOReader sourceReader, XOReader resultReader) { this.eObj1 = eObj1; this.eObj2 = eObj2; this.sourceCode = sourceCode; this.resultCode = resultCode; this.sourceReader = sourceReader; this.resultReader = resultReader; } //------------------------------------------------------------------------------------- // public access methods //------------------------------------------------------------------------------------- public int[] scores() throws MapperException { int[] scores = new int[3]; Hashtable<String,Integer> fBag1 = featureBag(eObj1,true); Hashtable<String,Integer> fBag2 = featureBag(eObj2,true); scores[0] = bagMatch(fBag1,fBag1); scores[1] = bagMatch(fBag2,fBag2); Hashtable<String, SemanticMismatch> mismatches = new Hashtable<String, SemanticMismatch>(); scores[2] = treeMatch(eObj1,eObj2, mismatches); return scores; } //------------------------------------------------------------------------------------- // structured matching //------------------------------------------------------------------------------------- /** * match the structures beneath the EObjects as well as possible * (using a heuristic bag-matching to guide the search) * and build up the table of all mismatches. * @param e1 an EObject * @param e2 another EObjjecgt which should match it partially * @param mismatches the set of all mismatches found so far, to be extended * @return the number of matching features. * @throws MapperException */ public int treeMatch(EObject e1, EObject e2, Hashtable<String, SemanticMismatch> mismatches) throws MapperException { return doTreeMatch(e1,e2, mismatches, ""); } /** * * @param e1 an EObject * @param e2 another EObjjecgt which should match it partially * @param mismatches the set of all mismatches found so far, to be extended * @param path the path of containment associations from the root EObject * of each tree to e1 and e2 - it was to be used in semantic mismatch messages, * but it turns out to be unnecessary ,as the path can be deduced from either EObject. * @return the number of matching nodes in the trees below the two EObjects * @throws MapperException */ private int doTreeMatch(EObject e1, EObject e2, Hashtable<String, SemanticMismatch> mismatches, String path) throws MapperException { Hashtable<String,Integer> fBag1 = featureBag(e1,false); Hashtable<String,Integer> fBag2 = featureBag(e2,false); // initially, just the match on the top node - includes attributes and non-containment features int score = bagMatch(fBag1,fBag2); if (score > 0) recordMismatches(e1,e2,mismatches); EClass ec1 = e1.eClass(); //assume e1 and e2 are of the same eClass EClass ec2 = e2.eClass(); //needed because the two EObjects don't like each others' EStructuralFeatures trace("Matching " + ec1.getName()); // match child objects only within each containment feature for (Iterator<EStructuralFeature> it = ec1.getEAllStructuralFeatures().iterator();it.hasNext();) { EStructuralFeature ef1 = it.next(); if ((ef1 instanceof EReference) && (((EReference)ef1).isContainment())) { String refName = ef1.getName(); String newPath = path + "/" + refName; Object o1 = e1.eGet(ef1); // get the corresponding feature of the other EObject Object o2 = null; EStructuralFeature ef2 = ec2.getEStructuralFeature(refName); if (ef2 != null) o2 = e2.eGet(ef2); if (((o1 != null) && (o2 != null))) { // single child - match only if they are of the same class if ((((EReference)ef1).getUpperBound() == 1) && (o1.getClass().getName().equals(o2.getClass().getName()))) { score = score + 1 + doTreeMatch((EObject)o1, (EObject)o2, mismatches,newPath); } // lists of children - may contain several classes else if (((EReference)ef1).getUpperBound() == -1) { score = score + featureMatch(o1, o2, mismatches); } } } } return score; } /** * o1 and o2 are Lists of EObjects reached by some EReference feature. * Split them into sub-lists of the same class, and match the sub-lists * @param o1 * @param o2 * @return */ private int featureMatch(Object o1, Object o2, Hashtable<String, SemanticMismatch> mismatches) throws MapperException { int score = 0; Hashtable<String,Vector<BaggedEObject>> lbc1 = listsByClass(o1); Hashtable<String,Vector<BaggedEObject>> lbc2 = listsByClass(o2); for (Enumeration<String> en = lbc1.keys();en.hasMoreElements();) { String className = en.nextElement(); Vector<BaggedEObject> v1 = lbc1.get(className); Vector<BaggedEObject> v2 = lbc2.get(className); if ((v1 != null) && (v2 != null)) score = score + classMatch(v1,v2, mismatches); } return score; } /** * split a List of objects of different classes into * separate Lists for each class * @param o * @return */ private Hashtable<String,Vector<BaggedEObject>> listsByClass(Object o) { Hashtable<String,Vector<BaggedEObject>> lbc = new Hashtable<String,Vector<BaggedEObject>>(); if (o instanceof List<?>) for (Iterator<?> it = ((List<?>)o).iterator();it.hasNext();) { Object next = it.next(); if (next instanceof EObject) { EObject eo = (EObject)next; String className = eo.eClass().getName(); Vector<BaggedEObject> soFar = lbc.get(className); if (soFar == null) soFar = new Vector<BaggedEObject>(); soFar.add(new BaggedEObject(eo)); lbc.put(className,soFar); } } return lbc; } /** * Find the best match of two Vectors of EObjects, all of the same class. * Compute all pairwise matches, then pick off the best matches in order, * making sure that no EObject from either list is ever used more than once. * @param v1 * @param v2 * @return */ private int classMatch(Vector<BaggedEObject> v1, Vector<BaggedEObject> v2, Hashtable<String, SemanticMismatch> mismatches) throws MapperException { // compute all pairwise bag matches int[][] bagMatch = new int[v1.size()][v2.size()]; trace("class match " + v1.size() + " " + v2.size()); for (int i1 = 0; i1 < v1.size(); i1++) for (int i2 = 0; i2 < v2.size(); i2++) { BaggedEObject b1 = v1.get(i1); BaggedEObject b2 = v2.get(i2); bagMatch[i1][i2] = bagMatch(b1.fBag,b2.fBag); } // pick out the best of the pairwise matches, in order of descending score int score = 0; // the maximum number of matches - when one list is exhausted int maxMatches = Math.min(v1.size(), v1.size()); for (int m = 0; m < maxMatches; m++) score = score + bestRemainingMatch(bagMatch,v1,v2, mismatches); return score; } /** * Each time this method is called, it picks out the best bag match in the matrix * between BaggedEObjects that are not yet matched, marks them as matched * so they will not match again, and computes their structure match * @param bagMatch array of bag match scores * @param v1 * @param v2 * @return the structure match of the best match pair */ private int bestRemainingMatch(int[][] bagMatch, Vector<BaggedEObject> v1,Vector<BaggedEObject> v2, Hashtable<String, SemanticMismatch> mismatches) throws MapperException { // find the best remaining bag match int iBest=0; int jBest=0; int bestMatch = 0; for (int i = 0; i < v1.size(); i++) for (int j = 0; j < v2.size(); j++) if ((!v1.get(i).matched) && (!v2.get(j).matched) && (bagMatch[i][j] > bestMatch)) { bestMatch = bagMatch[i][j]; iBest = i; jBest = j; } // record that the best pair are both matched, and find their structure match int score = 0; if (bestMatch > 0) { v1.get(iBest).matched = true; v2.get(jBest).matched = true; score = treeMatch(v1.get(iBest).obj,v2.get(jBest).obj, mismatches); } return score; } /** * Inner class of an EObject and the extra information * needed to find a best tree match * @author robert * */ class BaggedEObject { Hashtable <String,Integer> fBag; EObject obj; boolean matched; BaggedEObject(EObject obj) { this.obj = obj; fBag = featureBag(obj, true); matched = false; } int size() {return bagMatch(fBag,fBag);} } //--------------------------------------------------------------------------------------------------- // recording mismatches between two Ecore instances //--------------------------------------------------------------------------------------------------- /** * record the mismatches between two Ecore structures * that occur locally on this node. * Mismatches that occur on descendant nodes will be done by recursive calls to this * method * @param e1 one of the EObjects - the one from the source of a translation, which * is generally expected to have more features * @param e2 the other EObject that is expected to match e1; this comes from the * result of a test translation, so is generally expected to have fewer features. * @param mismatches a Hashtable of SemanticMismatch objects found so far - * to be extended */ private void recordMismatches(EObject e1, EObject e2, Hashtable<String, SemanticMismatch> mismatches) throws MapperException { EClass ec1 = e1.eClass(); //assume e1 and e2 are of the same eClass EClass ec2 = e2.eClass(); //needed because the two EObjects don't like each others' EStructuralFeatures // look at all structural features, inherited or not for (Iterator<EStructuralFeature> it = ec1.getEAllStructuralFeatures().iterator();it.hasNext();) { EStructuralFeature ef1 = it.next(); Object o1 = e1.eGet(ef1); // get the corresponding feature of the other EObject Object o2 = null; EStructuralFeature ef2 = ec2.getEStructuralFeature(ef1.getName()); if (ef2 != null) o2 = e2.eGet(ef2); boolean isAttribute = (ef1 instanceof EAttribute); if ((isAttribute) && !(ef2 instanceof EAttribute)) { String message = "Feature " + ef1.getName() + " of class " + ec1.getName() + " is both an attribute and an association"; // FIXME - do something useful with the message System.out.println(message); return; } // no mismatch if both features are missing if (((o1 == null) && (o2 == null))) {} // common case - a feature present in the source, not in the result else if (((o1 != null) && (o2 == null))) { boolean bizarre = false; recordMissingFeature(e1,ef1,o1,mismatches,bizarre); } // bizarre case - a feature absent in the source, but present in the result else if (((o1 == null) && (o2 != null))) { boolean bizarre = true; recordMissingFeature(e1,ef1,o1,mismatches,bizarre); } // both features are present; need to check if their values differ else if (((o1 != null) && (o2 != null))) { if (isAttribute) { EDataType et1 = ((EAttribute)ef1).getEAttributeType(); EDataType et2 = ((EAttribute)ef2).getEAttributeType(); if (et1.isSerializable()) { String s1 = stringValue(o1,et1); String s2 = stringValue(o2,et2); boolean matched = ((s1 != null) && (s2 != null) && (s1.equals(s2))); if (!matched) { int nature = SemanticMismatch.SEMANTIC_INCORRECT_PROPERTY_VALUE; SemanticMismatch sm = new SemanticMismatch (nature,s1,s2,e1,ef1,null,sourceCode, resultCode); storeMismatch(sm,mismatches); } } } // an EReference present in both source and translation result else if (!isAttribute) { // single child - mismatch if they are not of the same class if ((((EReference)ef1).getUpperBound() == 1) && (!o1.getClass().getName().equals(o2.getClass().getName()))) { int nature = SemanticMismatch.SEMANTIC_INCORRECT_TARGET_CLASS; SemanticMismatch sm = new SemanticMismatch (nature,o1.getClass().getName(),o2.getClass().getName(), e1,ef1,null, sourceCode, resultCode); storeMismatch(sm,mismatches); } // lists of children - may contain several classes else if (((EReference)ef1).getUpperBound() == -1) { Hashtable<String,Vector<BaggedEObject>> l1 = listsByClass(o1); Hashtable<String,Vector<BaggedEObject>> l2 = listsByClass(o2); for (Enumeration<String> en1 = l1.keys();en1.hasMoreElements();) { String targetClass = en1.nextElement(); Vector<BaggedEObject> v1 = l1.get(targetClass); Vector<BaggedEObject> v2 = l2.get(targetClass); // common case - links and objects missing in translation if (v2 == null) for (int i = 0; i < v1.size(); i++) { EObject target = v1.get(i).obj; boolean bizarre = false; recordMissingAssociationOrClass(e1,ef1,target,mismatches,bizarre); } // both source and target have the association, but numbers differ else if (v1.size() != v2.size()) { EObject target = v1.get(0).obj; int nature = SemanticMismatch.SEMANTIC_INCORRECT_LINK_CARDINALITY; SemanticMismatch sm = new SemanticMismatch (nature,new Integer(v1.size()).toString(), new Integer(v2.size()).toString(), e1,ef1, target,sourceCode, resultCode); storeMismatch(sm,mismatches); } } for (Enumeration<String> en2 = l2.keys();en2.hasMoreElements();) { String targetClass = en2.nextElement(); Vector<BaggedEObject> v1 = l1.get(targetClass); Vector<BaggedEObject> v2 = l2.get(targetClass); // bizarre case - links and objects missing in source, present in translation if (v1 == null) for (int i = 0; i < v2.size(); i++) { EObject target = v2.get(i).obj; boolean bizarre = true; recordMissingAssociationOrClass(e1,ef1,target,mismatches,bizarre); } } } } } } } /** * record that the StructuralFeature ef1 on the EObject e1 * has a value in the translation source, but no value in the result * @param e1 * @param ef1 * @param o1 * @param mismatches */ private void recordMissingFeature(EObject e1, EStructuralFeature ef1, Object o1, Hashtable<String, SemanticMismatch> mismatches, boolean bizarre) throws MapperException { boolean isAttribute = (ef1 instanceof EAttribute); // record a missing attribute if (isAttribute) { int nature = SemanticMismatch.SEMANTIC_MISSING_PROPERTY_VALUE; if (bizarre) nature = SemanticMismatch.SEMANTIC_EXTRA_PROPERTY_VALUE; SemanticMismatch sm = new SemanticMismatch (nature,"","",e1,ef1,null,sourceCode, resultCode); sm.checkMissingMapping(sourceReader, resultReader); storeMismatch(sm,mismatches); } // record one or more missing classes or missing links else if ((ef1.getUpperBound() == 1) && (o1 instanceof EObject)) { EObject target = (EObject) o1; recordMissingAssociationOrClass(e1,ef1,target,mismatches,bizarre); } else if ((ef1.getUpperBound() == -1) && (o1 instanceof List<?>)) { for (Iterator<?> it = ((List<?>)o1).iterator();it.hasNext();) { Object next = it.next(); if (next instanceof EObject) { EObject target = (EObject) next; recordMissingAssociationOrClass(e1,ef1,target,mismatches,bizarre); } } } } /** * record a link or class which is missing (usually) or (if bizarre = true) * extra, in the translation result compared to the source * @param e1 the object owning the link * @param ef1 the structural feature for the link * @param target object at the end of the link * @param mismatches set of SemanticMismatch object to build up * @param bizarre if true, the error is 'extra' rather than 'missing' */ private void recordMissingAssociationOrClass(EObject e1, EStructuralFeature ef1,EObject target, Hashtable<String, SemanticMismatch> mismatches, boolean bizarre) throws MapperException { SemanticMismatch sm = null; EReference ref = (EReference)ef1; boolean containment = (ref.isContainment()); /* for containments, record the missing target objects by class; * for non-containment association, record the missing links * by role name and target class. */ if (containment) { int nature = SemanticMismatch.SEMANTIC_MISSING_CLASS; if (bizarre) nature = SemanticMismatch.SEMANTIC_EXTRA_CLASS; sm = new SemanticMismatch(nature,"","",target,null,null,sourceCode, resultCode); } else { int nature = SemanticMismatch.SEMANTIC_MISSING_LINK; if (bizarre) nature = SemanticMismatch.SEMANTIC_EXTRA_LINK; sm = new SemanticMismatch(nature,"","",e1,ef1,target,sourceCode, resultCode); } sm.checkMissingMapping(sourceReader, resultReader); storeMismatch(sm,mismatches); } /** * store a SemanticMismatch - or if it duplicates an * existing one, increase the appropriate occurrence * count for that mismatch * @param sm * @param mismatches */ private void storeMismatch(SemanticMismatch sm, Hashtable<String, SemanticMismatch> mismatches) { SemanticMismatch existing = mismatches.get(sm.key()); if (existing != null) existing.addOccurrence(); else mismatches.put(sm.key(), sm); } //------------------------------------------------------------------------------------- // bag matching //------------------------------------------------------------------------------------- /** * compute the bag match between two EObject structures * @param fBag1 * @param fBag2 * @return integer bag match score */ private int bagMatch(Hashtable<String,Integer> fBag1, Hashtable<String,Integer> fBag2) { trace("bag match " + fBag1.size() + " " + fBag2.size()); int match = 0; for (Enumeration<String> en = fBag1.keys();en.hasMoreElements();) { String triplet = en.nextElement(); Integer i2 = fBag2.get(triplet); if (i2 != null) { int occ1 = fBag1.get(triplet).intValue(); int occ2 = i2.intValue(); match = match + Math.min(occ1, occ2); } } return match; } /** * For a first match of two EObjects and all their descendants, regard each * object as a bag of features. * This first match is used to pair up child nodes of any node in a full * structure match. * featureBag has key = [feature-stringValue-depth] triplet * and Value = Integer number of occurrences * @param eo any EObject * @param drilldown if true, drill down to child nodes * @return its feature bag */ private Hashtable<String,Integer> featureBag(EObject eo, boolean drillDown) { Hashtable<String,Integer> fBag = new Hashtable<String,Integer>(); buildFeatureBag(fBag,eo,0,drillDown); return fBag; } /** * recursive building of a feature bag for an EObject * @param fBag feature bag being built up * @param eo object at this depth of recursion * @param depth depth of nesting in original EObject tree * @param drilldown if true, drill down to child nodes */ private void buildFeatureBag(Hashtable<String,Integer> fBag, EObject eo, int depth, boolean drillDown) { for (Iterator<EStructuralFeature> it = eo.eClass().getEAllStructuralFeatures().iterator();it.hasNext();) { EStructuralFeature ef = it.next(); Object oValue = eo.eGet(ef); String fName = ef.getName(); if ((ef instanceof EAttribute)&& (oValue != null)) { EDataType type = ((EAttribute)ef).getEAttributeType(); if (type.isSerializable()) // i.e if there is some 'toString' method for the type { String sValue = stringValue(oValue,type); if (sValue != null) { String triplet = fName + "$" + sValue + "@" + depth; addToBag(triplet, fBag); } } } else if ((ef instanceof EReference) && (oValue != null)) { EReference er = (EReference)ef; if (er.getUpperBound() == -1) // unbounded { EList<?> values = (EList<?>)oValue; for (Iterator<?> iv = values.iterator();iv.hasNext();) { Object next = iv.next(); if (next instanceof EObject) { EObject reffed = (EObject)next; String triplet = fName + "$" + reffed.eClass().getName() + "@" + depth; addToBag(triplet, fBag); // recursion to lower levels of the structure if ((er.isContainment()) && drillDown) buildFeatureBag(fBag,reffed,depth + 1,true); } } } else if ((er.getUpperBound() == 1) && (oValue != null) && (oValue instanceof EObject)) // bounded { EObject reffed = (EObject)oValue; String triplet = fName + "$" + reffed.eClass().getName() + "@" + depth; addToBag(triplet, fBag); if ((er.isContainment()) && drillDown) buildFeatureBag(fBag,reffed,depth + 1,true); } } } } /** * record a triplet in the feature bag * @param triplet * @param fBag */ private void addToBag(String triplet, Hashtable<String,Integer> fBag) { Integer soFar = fBag.get(triplet); if (soFar == null) soFar = new Integer(1); else soFar = new Integer(soFar.intValue() + 1); fBag.put(triplet, soFar); } private String stringValue(Object oValue, EDataType type) { String sValue = null; if (oValue != null) { if (type.getName().equals("EString")) sValue = (String)oValue; else if (type.getName().equals("EInt")) sValue = ((Integer)oValue).toString(); else if (type.getName().equals("EBoolean")) sValue = ((Boolean)oValue).toString(); else if (type.getName().equals("EFloat")) sValue = ((Float)oValue).toString(); else {System.out.println("Unrecognised data type: " + type.getName());} } return sValue; } //--------------------------------------------------------------------------------------------------------- // Equalising text keys of objects //--------------------------------------------------------------------------------------------------------- /** * Some EObjects, made by using mappings to read a CDA XML into Ecore after the XML has been through an in-wrapper transform, * have objects of class 'Text' or 'Text_1' etc, with attributes 'textContent' or 'textContent_1' etc, where the * values of the attributes are keys like 'key_1' etc. * * These keys are generated by the in-wrapper transform as keys for html-like subtrees (rendered text in the CDA) stored by the wrapper transform. * But the assignment of keys by the wrapper transform is arbitrary, so that two text subtrees in different instances, which * are identical and should match, get assigned different keys and fail to match. * * To avoid such failure to match, this method replaces all such key values by a single value, * so that the matching done by this class will succeed, and can get 100% matches if all other values match. * * @param eo */ public static void equaliseTextKeys(EObject eo) { EClass ec = eo.eClass(); for (Iterator<EStructuralFeature> it = ec.getEAllStructuralFeatures().iterator();it.hasNext();) { EStructuralFeature ef = it.next(); /* Any attribute whose name begins with 'textContent', of an object whose class name begins with 'Text', * has values like 'Key_5' set by an in-wrapper transform (the value is a key pointing to a text subtree) , * where the index is determined at random and will fail to match. * Remove the attribute value so there will be no failure (and no spurious extra score) */ if (ef instanceof EAttribute) { if ((ec.getName().startsWith("Text")) && (ef.getName().startsWith("textContent"))) eo.eUnset(ef); } // recurse downward through containment relations else if (ef instanceof EReference) { Object val = eo.eGet(ef); EReference er = (EReference)ef; if (er.isContainment()) { // follow to the single EObject at the end of the association if (er.getUpperBound() == 1) { if (val instanceof EObject) equaliseTextKeys((EObject)val); } // follow to multiple EObjects at the end of the association else if (er.getUpperBound() == -1) { if (val instanceof List<?>) for (Iterator<?> iu = ((List<?>)val).iterator();iu.hasNext();) { Object next = iu.next(); if (next instanceof EObject) equaliseTextKeys((EObject)next); } } } } } } private void trace(String s) {if (tracing) System.out.println(s);} }