package husacct.analyse.task.reconstruct.mojo; import java.io.*; import java.util.*; public class MoJoCalculator { /* File info */ private String sourceFile, targetFile, relFile; private BufferedReader br_s, br_t, br_r; public MoJoCalculator(String sf, String tf, String rf) { sourceFile = sf; targetFile = tf; relFile = rf; } /* The mapping between objects and clusters in B */ private Map<String, String> mapObjectClusterInB = new Hashtable<String, String>(); /* The mappings of clusters to tags in both A and B */ private Map<String, Integer> mapClusterTagA = new Hashtable<String, Integer>(); private Map<String, Integer> mapClusterTagB = new Hashtable<String, Integer>(); /* Mapping between edges and their edgecost */ private Hashtable<String, Double> tableR = new Hashtable<String, Double>(); /* use for store the name of each items */ private Vector<String> clusterNamesInA = new Vector<String>(); // Stores the number of objects in each cluster in partition B // Used in calculating the max distance from partition B private Vector<Integer> cardinalitiesInB = new Vector<Integer>(); /* This vector contains a vector for each cluster in A */ private Vector<Vector<String>> partitionA = new Vector<Vector<String>>(); private int l = 0; /* number of clusters in A */ private int m = 0; /* number of clusters in B */ private long numberOfObjectsInA; private Cluster A[] = null; private boolean verbose = false; /* * record the capacity of each group, if the group is empty ,the count is * zero, otherwise >= 1 */ private int groupscount[] = null; /* * after join operations, each group will have only one cluster left, we use * grouptags[i] to indicate the remain cluster in group i */ private Cluster grouptags[] = null; /* * every none empty group have a tag * point to a cluster in A */ public long mojoplus() { commonPrep(); /* tag assigment */ tagAssignment("MoJoPlus"); /* draw graph and matching */ maxbipartiteMatching(); /* Calculate total cost */ return calculateCost(); } public double mojofm() { double returnValue = 0; commonPrep(); /* tag assigment */ tagAssignment("MoJo"); /* draw graph and matching */ maxbipartiteMatching(); /* Calculate MoJoFM value */ returnValue = mojofmValue(cardinalitiesInB, numberOfObjectsInA, calculateCost()); return returnValue; } public double edgemojo() { /* In EdgeMoJo mode, we read the relationship file first */ if (relFile != null) readRelationRSFfile(); commonPrep(); /* tag assigment */ tagAssignment("MoJo"); /* draw graph and matching */ maxbipartiteMatching(); /* Calculate total cost */ double result = calculateCost(); /* perform the edgeMoJo */ result = result + edgeCost(); return result; } public void setVerbose(boolean v) { verbose = v; } public long mojo() { commonPrep(); /* tag assigment */ tagAssignment("MoJo"); /* draw graph and matching */ maxbipartiteMatching(); /* Calculate total cost */ return calculateCost(); } private void commonPrep() { numberOfObjectsInA = 0; /* Read target file first to update mapObjectClusterInB */ if (isBunch(targetFile)) readTargetBunchFile(); else readTargetRSFFile(); /* Read source file */ if (isBunch(sourceFile)) readSourceBunchFile(); else readSourceRSFfile(); l = mapClusterTagA.size(); /* number of clusters in A */ m = mapClusterTagB.size(); /* number of clusters in B */ A = new Cluster[l]; /* create A */ groupscount = new int[m]; /* the count of each group, 0 if empty */ grouptags = new Cluster[m]; /* * the first cluster in each group, null if * empty */ /* init group tags */ for (int j = 0; j < m; j++) { grouptags[j] = null; } /* create each cluster in A */ for (int i = 0; i < l; i++) { A[i] = new Cluster(i, l, m); } } private double edgeCost() { /* Perform join operation first */ for (int j = 0; j < m; j++) { if (groupscount[j] > 1) { for (int i = 0; i < l; i++) { if (A[i].getGroup() == j) { if (grouptags[j].getNo() != i) { grouptags[j].merge(A[i]); }; } } } } /* Calculate the additional edge cost */ double result = 0; for (int j = 0; j < m; j++) { if (grouptags[j] != null) result += grouptags[j].edgeCost(tableR, grouptags, null); } return result; } /* public void showSequence() { commonPrep(); System.out.println("Join operations"); for (int j = 0; j < m; j++) { if (groupscount[j] > 1) { for (int i = 0; i < l; i++) { if (A[i].getGroup() == j) { if (grouptags[j].getNo() != i) { grouptags[j].merge(A[i]); System.out.println("Join clusters " + clusterNamesInA.elementAt(grouptags[j].getNo()) + " and " + clusterNamesInA.elementAt(i)); } } } } } System.out.println("Move operations"); int newClusterIndex = l; for (int j = 0; j < m; j++) { if (grouptags[j] != null) { for (int i = 0; i < m; i++) { if (i != j && grouptags[j].objectList.elementAt(i).size() > 0) { System.out.print("Move " + grouptags[j].objectList.elementAt(i) + " from A" + (grouptags[j].getNo() + 1)); if (grouptags[i] != null) // the group is not empty { System.out.println(" to cluster A" + (grouptags[i].getNo() + 1)); } else { grouptags[i] = new Cluster(newClusterIndex++, l, m); // create a new group System.out.println(" to newly created cluster A" + (grouptags[i].getNo() + 1) + "(G" + (i + 1) + ")"); } grouptags[j].move(i, grouptags[i]); } } } } } */ private void maxbipartiteMatching() { /* Create the graph and add all the edges */ BipartiteGraph bgraph = new BipartiteGraph(l + m, l, m); for (int i = 0; i < l; i++) { for (int j = 0; j < A[i].groupList.size(); j++) { bgraph.addedge(i, l + A[i].groupList.elementAt(j).intValue()); } } /* Use maximum bipartite matching to calculate the groups */ bgraph.matching(); /* * Assign group after matching, for each Ai in matching, assign the * corresponding group, for other cluster in A, just leave them alone */ for (int i = l; i < l + m; i++) { if (bgraph.vertex[i].matched) { int index = bgraph.adjacentList.elementAt(i).elementAt(0).intValue(); A[index].setGroup(i - l); } } } /* * Calculates the MoJoFM value, using the formula MoJoFM(M) = 1 - mno(A,B)/ * max(mno(any_A,B)) * 100% */ private double mojofmValue(@SuppressWarnings("rawtypes") Vector number_of_B, long obj_number, long totalCost) { /* * This method is edited specifically for HUSACCT to prevent that incompatible intended architecture, * with one overlapping module (e.g. ExternalSystems, or xLibraries) receive a result of 100%. Due to the * following characteristic of MoJo (see ReadME in mojo.jar): "If the two decompositions do not refer to the * same set of clustered objects, only the intersection of the two sets will be considered." */ double mojofmValue = 0; long maxDis = maxDistanceTo(number_of_B, obj_number); if (totalCost == 0) { if (obj_number > 1) { mojofmValue = Math.rint((1 - (double) totalCost / (double) maxDis) * 10000) / 100; } } else { mojofmValue = Math.rint((1 - (double) totalCost / (double) maxDis) * 10000) / 100; } return mojofmValue; } /* calculate the max(mno(B, any_A)), which is also the max(mno(any_A, B)) */ private long maxDistanceTo(@SuppressWarnings("rawtypes") Vector number_of_B, long obj_number) { int group_number = 0; int[] B = new int[number_of_B.size()]; for (int i = 0; i < B.length; i++) { B[i] = ((Integer) number_of_B.elementAt(i)).intValue(); } /* sort the array in ascending order */ java.util.Arrays.sort(B); for (int i = 0; i < B.length; i++) { /* calculate the minimum maximum possible groups for partition B */ /* * after sort the B_i in ascending order B_i: 1, 2, 3, 4, 5, 6, 7, * 8, 10, 10, 10, 15 we can calculate g in this way g: 1, 2, 3, 4, * 5, 6, 7, 8, 9, 10, 10, 11 */ if (group_number < B[i]) group_number++; } /* return n - l + l - g = n - g */ return obj_number - group_number; } private long calculateCost() { int moves = 0; /* total number of move operations */ int no_of_nonempty_group = 0; /* number of total noneempty groups */ long totalCost = 0; /* total cost of MoJo */ /* find none empty groups and find total number of moves */ for (int i = 0; i < l; i++) { /* caculate the count of nonempty groups */ /* * when we found that a group was set to empty but in fact is not * empty, we increase the number of noneempty group by 1 */ if (groupscount[A[i].getGroup()] == 0) { no_of_nonempty_group += 1; } /* assign group tags */ /* if this group has no tag, then we assign A[i] to its tag */ if (grouptags[A[i].getGroup()] == null) { grouptags[A[i].getGroup()] = A[i]; } /* assign the group count */ groupscount[A[i].getGroup()] += 1; /* calculate the number of move opts for each cluster */ moves += A[i].gettotalTags() - A[i].getMaxtag(); } totalCost = moves + l - no_of_nonempty_group; return totalCost; } private void tagAssignment(String mode) { for (int i = 0; i < l; i++) { int tag = -1; String clusterName = ""; for (int j = 0; j < partitionA.elementAt(i).size(); j++) { String objName = partitionA.elementAt(i).elementAt(j); clusterName = mapObjectClusterInB.get(objName); tag = mapClusterTagB.get(clusterName).intValue(); A[i].addobject(tag, objName, mode); } } } private void readSourceBunchFile() { try { br_s = new BufferedReader(new FileReader(sourceFile)); } catch (FileNotFoundException e) { throw new RuntimeException("Could not open " + sourceFile); } try { for (String line = br_s.readLine(); line != null; line = br_s.readLine()) { int equalMark = line.indexOf("="); String strClusterA = line.substring(0, equalMark).trim(); String objList = line.substring(equalMark + 1).trim(); StringTokenizer st = new StringTokenizer(objList, ","); int objNumber = st.countTokens(); numberOfObjectsInA += objNumber; int index = mapClusterTagA.size(); clusterNamesInA.addElement(strClusterA); mapClusterTagA.put(strClusterA, new Integer(index)); partitionA.addElement(new Vector<String>()); for (int i = 0; i < objNumber; i++) { String obj = st.nextToken().trim(); partitionA.elementAt(index).addElement(obj); } } } catch (IOException e) { throw new RuntimeException("Could not read from " + sourceFile); } try { br_s.close(); } catch (IOException e) { throw new RuntimeException("Could not close " + sourceFile); } } private void readSourceRSFfile() { try { br_s = new BufferedReader(new FileReader(sourceFile)); } catch (FileNotFoundException e) { throw new RuntimeException("Could not open " + sourceFile); } long extraInA = 0; try { for (String line = br_s.readLine(); line != null; line = br_s.readLine()) { StringTokenizer st = new StringTokenizer(line); if (st.countTokens() != 3) { String message = "Incorrect RSF format in " + sourceFile + " in the following line:\n" + line; throw new RuntimeException(message); } // Ignore lines that do not start with contain if (!st.nextToken().toLowerCase().equals("contain")) continue; int index = -1; String clusterName = st.nextToken(); String objectName = st.nextToken(); if (mapObjectClusterInB.keySet().contains(objectName)) { numberOfObjectsInA++; Integer objectIndex = mapClusterTagA.get(clusterName); if (objectIndex == null) { index = mapClusterTagA.size(); clusterNamesInA.addElement(clusterName); mapClusterTagA.put(clusterName, new Integer(index)); partitionA.addElement(new Vector<String>()); } else { index = objectIndex.intValue(); } partitionA.elementAt(index).addElement(objectName); } else extraInA++; } } catch (IOException e) { throw new RuntimeException("Could not read from " + sourceFile); } try { br_s.close(); } catch (IOException e) { throw new RuntimeException("Could not close " + sourceFile); } if (extraInA > 0) put("Warning: " + extraInA + " objects in " + sourceFile + " were not found in " + targetFile + ". They will be ignored."); long extraInB = mapObjectClusterInB.keySet().size() - numberOfObjectsInA; if (extraInB > 0) put("Warning: " + extraInB + " objects in " + targetFile + " were not found in " + sourceFile + ". They will be ignored."); } private void readRelationRSFfile() { try { br_r = new BufferedReader(new FileReader(relFile)); } catch (FileNotFoundException e) { throw new RuntimeException("Could not open " + relFile); } try { for (String line = br_r.readLine(); line != null; line = br_r.readLine()) { StringTokenizer st = new StringTokenizer(line); if (st.countTokens() != 3) { String message = "Incorrect RSF format in " + relFile + " in the following line:\n" + line; throw new RuntimeException(message); } // The relation name is not used st.nextToken(); String obj1 = st.nextToken(); String obj2 = st.nextToken(); /* * we use obj1+"%@$"+obj2 as the key, store it into hash table, * for all kinds of relationship we consider them with same * connection strength */ /* * for example, if this time we see a call obj1 obj2, we store * obj1+"%@$"+obj2 with value 1, next time we see a ref obj1 * obj2, we store obj1+"%@$"+obj2 with value 2 */ if (tableR.get(obj1 + "%@$" + obj2) == null) tableR.put(obj1 + "%@$" + obj2, new Double(1)); else { double previous_value = (tableR.get(obj1 + "%@$" + obj2)).doubleValue(); tableR.put(obj1 + "%@$" + obj2, new Double(previous_value + 1)); } } } catch (IOException e) { throw new RuntimeException("Could not read from " + relFile); } try { br_r.close(); } catch (IOException e) { throw new RuntimeException("Could not close " + relFile); } } private void readTargetBunchFile() { try { br_t = new BufferedReader(new FileReader(targetFile)); } catch (FileNotFoundException e) { throw new RuntimeException("Could not open " + targetFile); } try { for (String line = br_t.readLine(); line != null; line = br_t.readLine()) { int equalMark = line.indexOf("="); String strClusterB = line.substring(0, equalMark).trim(); String objList = line.substring(equalMark + 1, line.length()).trim(); StringTokenizer st = new StringTokenizer(objList, ","); int objNumber = st.countTokens(); int index = mapClusterTagB.size(); cardinalitiesInB.addElement(new Integer(objNumber)); mapClusterTagB.put(strClusterB, new Integer(index)); for (int i = 0; i < objNumber; i++) { String obj = st.nextToken().trim(); mapObjectClusterInB.put(obj, strClusterB); } } } catch (IOException e) { throw new RuntimeException("Could not read from " + targetFile); } try { br_t.close(); } catch (IOException e) { throw new RuntimeException("Could not close " + targetFile); } } private void readTargetRSFFile() { try { br_t = new BufferedReader(new FileReader(targetFile)); } catch (FileNotFoundException e) { throw new RuntimeException("Could not open " + targetFile); } try { for (String line = br_t.readLine(); line != null; line = br_t.readLine()) { StringTokenizer st = new StringTokenizer(line); if (st.countTokens() != 3) { String message = "Incorrect RSF format in " + targetFile + " in the following line:\n" + line; throw new RuntimeException(message); } // Ignore lines that do not start with contain if (!st.nextToken().toLowerCase().equals("contain")) continue; String clusterName = st.nextToken(); /* Remove quotes from the token */ int first_quote_index = clusterName.indexOf("\""); if (first_quote_index == 0 && clusterName.indexOf("\"", first_quote_index + 1) == clusterName.length() - 1) clusterName = clusterName.substring(first_quote_index + 1, clusterName.length() - 1); String objectName = st.nextToken(); int index = -1; /* Search for the cluster name in mapClusterTagB */ Integer objectIndex = mapClusterTagB.get(clusterName); if (objectIndex == null) { // This cluster is not in mapClusterTagB yet index = mapClusterTagB.size(); // Since it is a new cluster, it currently contains 1 object cardinalitiesInB.addElement(new Integer(1)); mapClusterTagB.put(clusterName, new Integer(index)); } else { index = objectIndex.intValue(); // Increase the cluster's cardinality in vector // cardinalitiesInB int newCardinality = 1 + cardinalitiesInB.elementAt(index).intValue(); cardinalitiesInB.setElementAt(new Integer(newCardinality), index); } mapObjectClusterInB.put(objectName, clusterName); } } catch (IOException e) { throw new RuntimeException("Could not read from " + targetFile); } try { br_t.close(); } catch (IOException e) { throw new RuntimeException("Could not close " + targetFile); } } private boolean isBunch(String file) { int dot = file.lastIndexOf("."); if (dot < 0) return false; String extend = file.substring(dot + 1).trim(); if (extend.equalsIgnoreCase("bunch")) return true; return false; } private void put(String message) { if (verbose) System.out.println(message); } }