package br.ufmg.dcc.labsoft.aserg.modularitycheck.enhancements.processing.data;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import data.handler.CarryFileMemory;
import br.ufmg.dcc.labsoft.aserg.modularitycheck.enhancements.properties.util.MathImpl;
import br.ufmg.dcc.labsoft.aserg.modularitycheck.enhancements.properties.util.Properties;
import br.ufmg.dcc.labsoft.aserg.modularitycheck.enhancements.properties.util.Quicksort;
import br.ufmg.dcc.labsoft.aserg.modularitycheck.enhancements.properties.util.Utils;
public class MiscellaneousCluster {
private static String[] topTen;
private static int[] count;
private int[] clusterSize;
private int median;
private int mean;
private int theChosenValue;
private int deviation;
Map<String, ArrayList<String>> hashClusters;
private int biggest;
public MiscellaneousCluster() {
median = 0;
mean = 0;
biggest = 0;
}
/***
* Read the size of each cluster
*
* @throws FileNotFoundException
* @throws IOException
*/
private void getClusterSize() throws FileNotFoundException, IOException {
hashClusters = new HashMap<String, ArrayList<String>>();
ArrayList<String> classesName = null;
ArrayList<String> clusters = null;
for (int i = 0; i < topTen.length; i++) {
clusters = TestData.getHashFiles().get(topTen[i]);
for (String cluster : clusters) {
classesName = new ArrayList<String>();
classesName.add(topTen[i]);
if (hashClusters.containsKey(cluster))
classesName.addAll(hashClusters.get(cluster));
hashClusters.put(cluster, classesName);
}
}
}
/***
* Get the size for each cluster and calculate the mean
*/
private void searchMean() {
clusterSize = new int[hashClusters.size()];
Iterator<String> iterator = hashClusters.keySet().iterator();
mean = 0;
int index = 0;
while (iterator.hasNext()) {
String key = iterator.next();
clusterSize[index] = hashClusters.get(key).size();
mean += clusterSize[index];
System.out.println("Cluster: " + key + "Size: "
+ clusterSize[index] + "Median:" + median);
if (clusterSize[index] > biggest)
biggest = clusterSize[index];
index++;
}
Arrays.sort(clusterSize);
assessMean();
}
/***
* Calculate the mean of the clusters mean = (int)mean/clusterSize.length;
*/
private void assessMean() {
mean = (int) MathImpl.getMean(mean, clusterSize.length);
}
/***
* Calculate the median of the clusters
*
*/
private void assessMedian() {
median = (int) MathImpl.getMedian(clusterSize);
}
private void assessDeviation() {
deviation = (int) MathImpl.getDeviation(clusterSize);
}
/***
* Retrieve the frequency of each class in the commits
*/
private void getClasseFrequency() {
int top = TestData.getHashFiles().keySet().size();
topTen = new String[top];
count = new int[top];
int index = 0;
Iterator<String> iterator = TestData.getHashFiles().keySet().iterator();
while (iterator.hasNext()) {
String key = iterator.next();
topTen[index] = key;
count[index] = TestData.getHashFiles().get(key).size();
index++;
}
Quicksort.sort(count, topTen);
}
/***
*
* @param beginInFile
* 1 for pam clusters and 2 for lda
* @throws FileNotFoundException
* @throws IOException
*/
public void createMiscellaneousCluster(int beginInFile)
throws FileNotFoundException, IOException {
getClasseFrequency();
getClusterSize();
searchMean();
assessMedian();
assessDeviation();
printClusterMiscellaneous();
cleanClusters(beginInFile);
}
/***
* Save the new cluster
*
* @throws IOException
*/
private void printClusterMiscellaneous() throws IOException {
int i = 0;
StringBuilder finalReport = new StringBuilder();
// theChosenValue = mean;
// if(median > theChosenValue) theChosenValue = median;
theChosenValue = mean + deviation;
int conta = theChosenValue - 1;
int temp = count[theChosenValue];
while (conta >= 0 && count[conta] == temp) {
conta--;
}
int index = conta;
int quant = theChosenValue - conta;
conta = theChosenValue - 1;
temp = count[theChosenValue];
while (conta < count.length && count[conta] == temp) {
conta++;
}
if (quant > (conta - theChosenValue))
index = conta - 1;
theChosenValue = index;
// theChosenValue = biggest;
finalReport.append(Properties.NEW_LINE);
while (i <= theChosenValue && count[i] > 1) {
finalReport.append(topTen[i]).append(Properties.NEW_LINE);
i++;
}
Utils.writeFile(finalReport.toString(), Properties.getClusterPath()
+ "Miscellaneous");
}
/***
* Discard in the clusters frequent classes
*
* @param className
* @return
*/
private boolean removeFrequentClasses(String className) {
for (int i = 0; i <= theChosenValue; i++) {
if (className.equals(topTen[i])) {
return true;
}
}
return false;
}
/***
* Remove from clusters all frequent classes
*
* @param index
* @throws FileNotFoundException
* @throws IOException
*/
private void cleanClusters(int index) throws FileNotFoundException,
IOException {
Iterator<String> iterator = hashClusters.keySet().iterator();
StringBuilder filteredClasses = null;
ArrayList<String> classesName = null;
String key = null;
while (iterator.hasNext()) {
key = iterator.next();
classesName = hashClusters.get(key);
String[] clusterContent = new CarryFileMemory(
Properties.getClusterPath() + key).carryCompleteFile();
filteredClasses = new StringBuilder();
if (index == 1)
filteredClasses.append(clusterContent[0]).append(
Properties.NEW_LINE);
else
filteredClasses.append(clusterContent[0])
.append(Properties.NEW_LINE).append(clusterContent[1])
.append(Properties.NEW_LINE);
for (String className : classesName) {
if (!removeFrequentClasses(className))
filteredClasses.append(className).append(
Properties.NEW_LINE);
}
Utils.writeFile(filteredClasses.toString(),
Properties.getClusterPath() + key);
}
}
}