package br.ufmg.dcc.labsoft.aserg.modularitycheck.enhancements.processing.data; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; import data.handler.CarryFileMemory; import br.ufmg.dcc.labsoft.aserg.modularitycheck.enhancements.properties.util.Properties; import br.ufmg.dcc.labsoft.aserg.modularitycheck.enhancements.properties.util.Utils; public class AssociationRule { private static byte[][] term_cluster_matrix; private static ArrayList<String> clusterClasses; private static ArrayList<String> classesName; private static byte[] intersection;// Contains the index of the class in // Classesname private static byte[] intersectionItemSet; private static int[] clusterSize; private static byte[][] itemSet_cluster_matrix; private static File[] clusters; private static final int MIN_SIZE = 2; public static final String SUB_TITLE = "ItemSet of Number "; private static String path = "C:\\Users\\Luciana\\Dropbox\\Testes\\GeronimoNew\\Results\\Clusters\\"; private static String apriori = "C:\\Users\\Luciana\\Dropbox\\Testes\\GeronimoNew\\Results\\Apriori\\regras.txt"; /*** * 1 - Criar um arquivo onde cada linha cont�m as classes de um cluster 2 * - Aplicar o Apriori 3 - Selecionar um cluster para deixar a regra e * remover a regra dos demais clusters * * @throws IOException * @throws FileNotFoundException */ public static void setClusterDataSet(String path2) throws FileNotFoundException, IOException { File[] clusters = new File(path).listFiles(); StringBuilder dataSet = new StringBuilder(); for (File cluster : clusters) { String[] clusterContent = new CarryFileMemory( cluster.getAbsolutePath()).carryCompleteFile(); ArrayList<String> content = new ArrayList<String>(); for (int i = 1; i < clusterContent.length; i++) { if (clusterContent[i].length() > 1 && !content.contains(clusterContent[i].trim())) content.add(clusterContent[i].trim()); // dataSet.append(clusterContent[i].trim()).append(Properties.COMMA); } for (String term : content) { dataSet.append(term).append(Properties.COMMA); } dataSet.deleteCharAt(dataSet.length() - 1); dataSet.append(Properties.NEW_LINE); } Utils.writeFile( dataSet.toString(), "C:\\Users\\Luciana\\Dropbox\\Testes\\GeronimoNew\\Results\\Apriori\\dataAprioriLSICosine.txt"); } public static void readAprioriRules() throws FileNotFoundException, IOException { ArrayList<String[]> itemSets = new ArrayList<String[]>(); int[] itemSetSize = null; // int numberOfClusters = 0; retrieveItems(itemSets); retrieveClasses(); createMatrixOfClusters(); intersectClusters(); int zeroClusters = countZeroClusters(); itemSetSize = new int[itemSets.size()]; createMatrixOfItemSets(itemSets, itemSetSize); intersectItemSets(); if (zeroClusters > 0) { saveExtraClusters(zeroClusters, itemSets, itemSetSize); createUnitaryItemsetCluster(itemSetSize, itemSets);// at� aqui o // n�mero de // clusters // tende voltar // ao normal ArrayList<String> miscellaneousList = getClusterMiscellaneous(); if (miscellaneousList.size() > 0) { createNewClusters(); saveClusterMiscellaneous(miscellaneousList); } } else { createUnitaryItemsetCluster(itemSetSize, itemSets); createNewClusters(); ArrayList<String> miscellaneousList = getClusterMiscellaneous(); if (miscellaneousList.size() > 0) { createNewClusters(); saveClusterMiscellaneous(miscellaneousList); } // int[] selectedClusters = getSmallestClusters(); // if(selectedClusters[1] == -1) {//save miscellaneous with c1 // StringBuilder dataC1 = readContent(selectedClusters[0]); // saveClusterMiscellaneous(dataC1); // } // else{//union between c1 and c2 // unionBetweenTwoClusters(selectedClusters); // } } removeItemsFromClusterFiles(); deleteFiles(); } /*** * Creates new clusters from itemsets * * @param itemSets * @param itemSetSize * @throws IOException */ private static void createNewClusters() throws IOException { int[] selectedClusters = getTwoSmallClusters(); StringBuilder items = new StringBuilder(); items.append(Properties.NEW_LINE); for (int clusterId : selectedClusters) { for (int item = 0; item < term_cluster_matrix[0].length; item++) { if (term_cluster_matrix[clusterId][item] == 1) { items.append(clusterClasses.get(item)).append( Properties.NEW_LINE); term_cluster_matrix[clusterId][item] = 0; } } clusterSize[clusterId] = 0; } Utils.writeFile(items.toString(), path + "joinedClusters" + selectedClusters[0]); } /*** * Gets the two smallest clusters * * @return */ private static int[] getTwoSmallClusters() { int size = -1; int[] theSmallestClusters = new int[2]; int count = 0; while (count < 2) { int smallest = 99999; int index = -1; for (int value = 0; value < clusterSize.length; value++) { if (clusterSize[value] < smallest && size != value && clusterSize[value] > 0) { smallest = clusterSize[value]; index = value; } } size = index; theSmallestClusters[count] = index; count++; } return theSmallestClusters; } /*** * Creates the cluster containg only the classes that are spread and are not * in any itemset * * @param selectedClusters * @throws IOException */ @SuppressWarnings("unused") private static void createSpreadClassCluster(int[] selectedClusters) throws IOException { StringBuilder items = new StringBuilder(); items.append(Properties.NEW_LINE); // for(int i = 0; i < intersection.length; i++){ // if(intersection[i] == 1 && // !classesName.contains(clusterClasses.indexOf(i))){ // items.append(clusterClasses.get(i)).append(Properties.NEW_LINE); // intersection[i] = 0; // } // } for (int clusterId : selectedClusters) { for (int item = 0; item < term_cluster_matrix[0].length; item++) { if (term_cluster_matrix[clusterId][item] == 1) { items.append(clusterClasses.get(item)).append( Properties.NEW_LINE); term_cluster_matrix[clusterId][item] = 0; } } } Utils.writeFile(items.toString(), path + "joinedClusters"); } /*** * Creates the cluster containing only the itemset with size 1, removes from * the intersection * * @param itemSetSize * @param itemSets * @throws IOException */ private static void createUnitaryItemsetCluster(int[] itemSetSize, ArrayList<String[]> itemSets) throws IOException { StringBuilder items = new StringBuilder(); String temp = null; items.append(Properties.NEW_LINE); for (int i = itemSetSize.length - 1; i >= 0; i--) { if (itemSetSize[i] == 1) { for (int j = 0; j < itemSet_cluster_matrix[0].length; j++) { if (itemSet_cluster_matrix[i][j] == 1) { boolean canTakeIt = true; temp = classesName.get(j); itemSet_cluster_matrix[i][j] = 0; if (intersectionItemSet[j] == 1) {// search in itemsets for (int isfm = 0; isfm < itemSet_cluster_matrix.length; isfm++) { if (isfm != i && itemSet_cluster_matrix[isfm][j] == 1 && itemSetSize[isfm] > itemSetSize[i]) canTakeIt = false; else if (isfm != i && itemSet_cluster_matrix[isfm][j] == 1) { itemSet_cluster_matrix[isfm][j] = 0; itemSetSize[isfm] = itemSetSize[isfm] - 1; } } } if (canTakeIt) { items.append(temp).append(Properties.NEW_LINE); intersectionItemSet[j] = 0; } break; } } itemSetSize[i] = 0; } } Utils.writeFile(items.toString(), path + "UnitaryItemsets"); } /*** * Removes the intersection of the item from the term matrix * * @param name */ @SuppressWarnings("unused") private static void removeFromTermMatrix(String name) { int item = clusterClasses.indexOf(name); for (int clusterId = 0; clusterId < term_cluster_matrix.length; clusterId++) { term_cluster_matrix[clusterId][item] = 0; intersection[item] = 0; clusterSize[clusterId] = clusterSize[clusterId] - 1; } } /*** * Makes the intersection between itemsets and items */ private static void intersectItemSets() { int intersectionTemp = 0; intersectionItemSet = new byte[classesName.size()]; for (int item = 0; item < itemSet_cluster_matrix[0].length; item++) { for (int itemSetId = 0; itemSetId < itemSet_cluster_matrix.length; itemSetId++) { if (itemSet_cluster_matrix[itemSetId][item] == 1) intersectionTemp++; } if (intersectionTemp > 1) { intersectionItemSet[item] = 1; } } } /*** * Creates the matrix of maximal item sets * * @param itemSets */ private static void createMatrixOfItemSets(ArrayList<String[]> itemSets, int[] itemSetSize) { itemSet_cluster_matrix = new byte[itemSets.size()][classesName.size()]; for (int itemSet = 0; itemSet < itemSets.size(); itemSet++) { String[] items = itemSets.get(itemSet); itemSetSize[itemSet] = items.length; for (String item : items) { itemSet_cluster_matrix[itemSet][classesName .indexOf(item.trim())] = 1; } } } /*** * Reads the apriori file that contains the maximal itemset * * @param itemSets * @throws FileNotFoundException * @throws IOException */ private static void retrieveItems(ArrayList<String[]> itemSets) throws FileNotFoundException, IOException { classesName = new ArrayList<String>(); String[] maximalItemset = new CarryFileMemory(apriori) .carryCompleteFile(); for (String line : maximalItemset) { String[] items = line.split(" ")[0].split(Properties.COMMA); itemSets.add(items); for (String item : items) { item = item.trim(); if (!classesName.contains(item)) classesName.add(item); } } } /*** * Reads the clusters and retrieve the class names that are not into * itemsets * * @throws FileNotFoundException * @throws IOException */ private static void retrieveClasses() throws FileNotFoundException, IOException { clusterClasses = new ArrayList<String>(); clusters = new File(path).listFiles(); for (File cluster : clusters) { String[] clusterContent = new CarryFileMemory( cluster.getAbsolutePath()).carryCompleteFile(); for (int i = 1; i < clusterContent.length; i++) { String term = clusterContent[i].trim(); if (!term.isEmpty()) { if (!clusterClasses.contains(term) && !classesName.contains(term)) clusterClasses.add(term); } } } } /*** * Creates the matrix cluster versus classes and get the size of each * cluster * * @throws FileNotFoundException * @throws IOException */ private static void createMatrixOfClusters() throws FileNotFoundException, IOException { term_cluster_matrix = new byte[clusters.length][clusterClasses.size()]; clusterSize = new int[clusters.length]; ArrayList<String> classes = null; int clusterId = 0; int column = -1; for (File cluster : clusters) { String[] clusterContent = new CarryFileMemory( cluster.getAbsolutePath()).carryCompleteFile(); classes = new ArrayList<String>(); for (int i = 1; i < clusterContent.length; i++) { String term = clusterContent[i].trim(); if (!term.isEmpty()) { column = clusterClasses.indexOf(term); if (column != -1) { if (!classes.contains(term)) classes.add(term); term_cluster_matrix[clusterId][column] = 1; } } } clusterSize[clusterId] = classes.size(); clusterId++; } } /*** * Detects the intersection, remove it from clusters and save the classes in * intersection when its a itemset class otherwise the smallest clusters * keep the classes * */ private static void intersectClusters() { // intersection = new byte[term_cluster_matrix[0].length]; ArrayList<Integer> clusterIdIntersection = null; for (int item = 0; item < term_cluster_matrix[0].length; item++) { clusterIdIntersection = new ArrayList<Integer>(); for (int clusterId = 0; clusterId < term_cluster_matrix.length; clusterId++) { if (term_cluster_matrix[clusterId][item] == 1) clusterIdIntersection.add(clusterId); } if (clusterIdIntersection.size() > 1) { // if(!classesName.contains(clusterClasses.get(item))){ removeSpreadIntersection(clusterIdIntersection); // }else // intersection[item] = 0; for (int cluster : clusterIdIntersection) { term_cluster_matrix[cluster][item] = 0; clusterSize[cluster] = clusterSize[cluster] - 1; } } } } /*** * In case when the class does not belong to any itemset, but it has an * intersection, this class remains in the smallest cluster * * @param clusterIdIntersection * @param item */ private static void removeSpreadIntersection( ArrayList<Integer> clusterIdIntersection) { int smallestCluster = clusterIdIntersection.get(0); int size = clusterSize[smallestCluster]; int temp = -1; for (int cluster = 1; cluster < clusterIdIntersection.size(); cluster++) { temp = clusterIdIntersection.get(cluster); if (clusterSize[temp] < size && clusterSize[temp] > 0) { smallestCluster = temp; size = clusterSize[cluster]; } } clusterIdIntersection.remove((Object) smallestCluster); } /*** * Counts how many clusters are empty * * @return */ private static int countZeroClusters() { int count = 0; for (int value : clusterSize) { if (value == 0) count++; } return count; } /*** * Saves the extra clusters * * @param zeroClusters * @param itemSets * @throws IOException */ private static void saveExtraClusters(int zeroClusters, ArrayList<String[]> itemSets, int[] itemSetSize) throws IOException { int index = 0; while (zeroClusters > 1) { index = getTheLargestItemSet(itemSetSize); saveNewCluster(index); updateItemSetMatrix(itemSetSize, index, itemSets.get(index)); zeroClusters--; } } private static void updateItemSetMatrix(int[] itemSetSize, int itemSetIndex, String[] itemSet) { for (String item : itemSet) { int index = classesName.indexOf(item); if (intersectionItemSet[index] == 1) { for (int itemSetId = 0; itemSetId < itemSet_cluster_matrix.length; itemSetId++) { if (itemSet_cluster_matrix[itemSetId][index] == 1) { itemSet_cluster_matrix[itemSetId][index] = 0; itemSetSize[itemSetId] = itemSetSize[itemSetId] - 1; } } intersectionItemSet[index] = 0; } // intersection[clusterClasses.indexOf(item)] = 0; } } private static int getTheLargestItemSet(int[] itemSetSize) { int theLargest = itemSetSize[0]; int index = 0; for (int i = 1; i < itemSetSize.length; i++) { if (itemSetSize[i] > theLargest) { theLargest = itemSetSize[i]; index = i; } } itemSetSize[index] = -1; return index; } /*** * Saves the itemset as the extra cluster and remove from miscelanea * * @param id * @throws IOException */ private static void saveNewCluster(int id) throws IOException { StringBuilder content = new StringBuilder(); content.append(Properties.NEW_LINE); for (int item = 0; item < itemSet_cluster_matrix[id].length; item++) { if (itemSet_cluster_matrix[id][item] == 1) content.append(classesName.get(item)).append( Properties.NEW_LINE); } Utils.writeFile(content.toString(), path + "extra" + id); } /*** * Saves the cluster miscellaneous that maybe contais maximal itemsets * greater than 1 * * @throws IOException */ private static void saveClusterMiscellaneous( ArrayList<String> miscellaneousList) throws IOException { StringBuilder miscellaneous = new StringBuilder(); miscellaneous.append(Properties.NEW_LINE); boolean changedItemSet = false; for (int itemSetId = 0; itemSetId < itemSet_cluster_matrix.length; itemSetId++) { for (String term : miscellaneousList) { if (itemSet_cluster_matrix[itemSetId][classesName.indexOf(term)] == 1) { if (!changedItemSet) { miscellaneous.append(Properties.NEW_LINE) .append(SUB_TITLE + itemSetId) .append(Properties.NEW_LINE); changedItemSet = true; } miscellaneous.append(term).append(Properties.NEW_LINE); } } changedItemSet = false; } // for(int index = 0; index < miscellaneousList.size(); index++){ // miscellaneous.append(miscellaneousList.get(index)).append(Properties.NEW_LINE); // } Utils.writeFile(miscellaneous.toString(), path + "Miscellaneous"); } // private static void saveClusterMiscellaneous(StringBuilder data) throws // IOException { // StringBuilder miscellaneous = new StringBuilder(); // miscellaneous.append(Properties.NEW_LINE); // if(data != null) miscellaneous.append(data.toString()); // for(int index = 0; index < intersectionItemSet.length; index++){ // if(intersectionItemSet[index] == 1) // miscellaneous.append(classesName.get(index)).append(Properties.NEW_LINE); // } // Utils.writeFile(miscellaneous.toString(), path + "Miscellaneous"); // } /*** * Get the classes from miscellaneous * * @return * @throws IOException */ private static ArrayList<String> getClusterMiscellaneous() throws IOException { ArrayList<String> miscellaneous = new ArrayList<String>(); for (int index = 0; index < intersectionItemSet.length; index++) { if (intersectionItemSet[index] == 1) miscellaneous.add(classesName.get(index)); } return miscellaneous; } /*** * Verifies if exists unit clusters and returns them. Otherwise, return the * smallest * * @return */ @SuppressWarnings("unused") private static int[] getSmallestClusters() { int smallest = clusterSize[0]; int index = 0; int[] theSmallestClusters = new int[2]; int count = 0; for (int value = 1; value < clusterSize.length; value++) { if (count == 2) break; else if (clusterSize[value] <= MIN_SIZE && clusterSize[value] > 0) { theSmallestClusters[count] = value; // get the clusterId count++; } } if (count == 1) theSmallestClusters[count + 1] = -1; else if (count == 0) { for (int value = 1; value < clusterSize.length; value++) { if (clusterSize[value] < smallest) { smallest = clusterSize[value]; index = value; } } theSmallestClusters[0] = index; theSmallestClusters[1] = -1; } return theSmallestClusters; } /*** * Reads the smallest cluster * * @param clusterId * @return */ @SuppressWarnings("unused") private static StringBuilder readContent(int clusterId) { StringBuilder data = new StringBuilder(); for (int item = 0; item < term_cluster_matrix[0].length; item++) { if (term_cluster_matrix[clusterId][item] == 1) data.append(clusterClasses.get(item)).append( Properties.NEW_LINE); } return data; } /*** * Union two clusters, save it and update the clusterSize * * @param selectedClusters * @throws IOException */ @SuppressWarnings("unused") private static void unionBetweenTwoClusters(int[] selectedClusters) throws IOException { StringBuilder data = new StringBuilder(); data.append(Properties.NEW_LINE); for (int clusterId : selectedClusters) { for (int item = 0; item < term_cluster_matrix[0].length; item++) { if (term_cluster_matrix[clusterId][item] == 1) { data.append(clusterClasses.get(item)).append( Properties.NEW_LINE); term_cluster_matrix[clusterId][item] = 0; } } } clusterSize[selectedClusters[0]] = clusterSize[selectedClusters[0]] + clusterSize[selectedClusters[1]]; clusterSize[selectedClusters[1]] = 0; Utils.writeFile(data.toString(), path + clusters[selectedClusters[0]]); } /*** * Deletes the cluster files which size is zero */ private static void deleteFiles() throws IOException { System.gc(); for (int index = 0; index < clusterSize.length; index++) { if (clusterSize[index] == 0) { System.out.println("Deleting ... " + clusters[index].getName()); clusters[index].delete(); } } } /*** * Updates the cluster files * * @param hashClusters * @return * @throws FileNotFoundException * @throws IOException */ private static void removeItemsFromClusterFiles() throws FileNotFoundException, IOException { StringBuilder cleanedCluster = null; for (int clusterId = 0; clusterId < term_cluster_matrix.length; clusterId++) { if (clusterSize[clusterId] > 0) { cleanedCluster = new StringBuilder(); String[] clusterContent = new CarryFileMemory( clusters[clusterId].getAbsolutePath()) .carryCompleteFile(); cleanedCluster.append(clusterContent[0].trim()).append( Properties.NEW_LINE); for (int item = 0; item < term_cluster_matrix[0].length; item++) { if (term_cluster_matrix[clusterId][item] == 1) { cleanedCluster.append(clusterClasses.get(item)).append( Properties.NEW_LINE); } } Utils.writeFile(cleanedCluster.toString(), clusters[clusterId].getAbsolutePath()); } } } }