package i5.las2peer.services.ocd.algorithms; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.ListIterator; import java.util.Iterator; import java.util.Map; import java.util.Set; import org.apache.commons.math3.linear.Array2DRowRealMatrix; import org.apache.commons.math3.linear.ArrayRealVector; import org.la4j.matrix.Matrix; import i5.las2peer.services.ocd.algorithms.utils.Cluster; import i5.las2peer.services.ocd.algorithms.utils.Clustering; import i5.las2peer.services.ocd.algorithms.utils.OcdAlgorithmException; import i5.las2peer.services.ocd.algorithms.utils.Point; import i5.las2peer.services.ocd.algorithms.utils.Termmatrix; import i5.las2peer.services.ocd.graphs.Cover; import i5.las2peer.services.ocd.graphs.CoverCreationType; import i5.las2peer.services.ocd.graphs.CustomGraph; import i5.las2peer.services.ocd.graphs.GraphType; import i5.las2peer.services.ocd.metrics.ExecutionTime; import y.base.Node; public class WordClusteringRefinementAlgorithm implements OcdAlgorithm{ ///////////////// ////Variables//// ///////////////// /** * Coefficient to determine if two clusters should be merged. */ private double overlappCoef = 0.5; private boolean svd = false; //////////////////////// //// Parameter Names//// //////////////////////// public static final String OVERLAPP_COEF_NAME = "overlappingCoefficient"; public static final String SVD_NAME = "svd"; //////////////////// //// Constructor//// //////////////////// public WordClusteringRefinementAlgorithm(){ } @Override public Set<GraphType> compatibleGraphTypes() { Set<GraphType> compatibilities = new HashSet<GraphType>(); compatibilities.add(GraphType.CONTENT_UNLINKED); return compatibilities; } @Override public CoverCreationType getAlgorithmType() { return CoverCreationType.WORD_CLUSTERING_REF_ALGORITHM; } @Override public Map<String, String> getParameters() { Map<String, String> parameters = new HashMap<String, String>(); parameters.put(OVERLAPP_COEF_NAME, Double.toString(overlappCoef)); parameters.put(SVD_NAME, Boolean.toString(svd)); return parameters; } @Override public void setParameters(Map<String, String> parameters) throws IllegalArgumentException { if(parameters.containsKey(OVERLAPP_COEF_NAME)){ overlappCoef = Double.parseDouble(parameters.get(OVERLAPP_COEF_NAME)); if(overlappCoef < 0 || overlappCoef > 1 ){ throw new IllegalArgumentException(); } parameters.remove(OVERLAPP_COEF_NAME); } if(parameters.containsKey(SVD_NAME)){ svd = Boolean.parseBoolean(parameters.get(SVD_NAME)); parameters.remove(SVD_NAME); } if(parameters.size() > 0) { throw new IllegalArgumentException(); } } @Override public Cover detectOverlappingCommunities(CustomGraph graph) throws OcdAlgorithmException,InterruptedException { Termmatrix termMat = new Termmatrix(graph); graph.setTermMatrix(termMat); ExecutionTime time = new ExecutionTime(); time.start(); if(svd){ double min = 0; Array2DRowRealMatrix m = new Array2DRowRealMatrix(termMat.SVD().getData()); for(int ind = 0; ind < m.getRowDimension(); ind++){ double temp = m.getRowVector(ind).getMinValue(); if(temp < min){ min = temp; } } for(int j = 0; j < m.getRowDimension(); j++){ m.setRowVector(j,(ArrayRealVector) m.getRowVector(j).mapAdd(min)); } termMat.setMatrix((Array2DRowRealMatrix) m); } LinkedList<Cluster> resClust = new LinkedList<Cluster>(); Cluster[] temp = new Cluster[2]; Clustering opt = new Clustering(); // resClust = initializeClustering(termMat); temp = findMergeCandidates(resClust); while(temp != null){ resClust = mergeClusters(temp, resClust, termMat.getNodeIdList()); temp = findMergeCandidates(resClust); } opt.setClustering(resClust); Matrix membershipMatrix = opt.createMembershipMatrixNode(graph); Cover res = new Cover(graph,membershipMatrix); time.stop(); time.setCoverExecutionTime(res); return res; } private Cluster[] findMergeCandidates(LinkedList<Cluster> resClust) { //Cluster[] res = new LinkedList<Cluster[]>(); Cluster[] match = new Cluster[2]; //double max = 0.0; //check each cluster with the rest of the clustering for(Iterator<Cluster> it = resClust.iterator(); it.hasNext();){ Cluster c = it.next(); //List<Cluster> rest = resClust.subList(resClust.indexOf(c) + 1, resClust.size()); for(ListIterator<Cluster> li = resClust.listIterator(resClust.indexOf(c) +1); li.hasNext();){ Cluster cr = li.next(); //compute overlapping coefficient double oc = overlappingCoefficient(c,cr); /*if oc of the clusters is greater equal to the threashold and the maximum of the fixed cluster compared to the restlist add the clusters to the merging candidates*/ if(oc >= overlappCoef /*&& oc > max*/){ //max = oc; match[0] = c; match[1] = cr; return match; } } //res.add(match); } return null; } private LinkedList<Cluster> mergeClusters(Cluster[] candidates, LinkedList<Cluster> list, LinkedList<Node> nodeList) { int index; //merging each entry in the candidates list and adding the new cluster to the result list //for(Cluster[] entry : candidates){ LinkedList<Node> n = candidates[1].getNodes(); Cluster cl = candidates[0]; list.remove(candidates[0]); list.remove(candidates[1]); ArrayRealVector temp = cl.getCentroid(); //check each point in the second cluster, if needs to be added for(int i = 0; i <n.size(); i++){ Node node = n.get(i); //index of the tf-idf value of the node in the matrix index = nodeList.indexOf(node); if(!cl.getNodes().contains(node)){ //add node to the cluster //add tf-idf value to the merged clusters vector temp.setEntry(index,candidates[1].getCentroid().getEntry(index)); cl.addNode(node); } } cl.setCentroid(temp); list.add(cl); return list; } private double overlappingCoefficient(Cluster c, Cluster cr) { double max = 0; double cut = 0; double res; LinkedList<Node> n = c.getNodes(); LinkedList<Node> n1 = cr.getNodes(); ArrayRealVector c1 = c.getCentroid(); ArrayRealVector c2 = cr.getCentroid(); int sizeC1 = n.size(); int sizeC2 = n1.size(); LinkedList<Node> temp = new LinkedList<Node>(n); /*for(int i = 0; i < c1.getDimension(); i++){ if(c1.getEntry(i) != 0){ sizeC1++; if(c2.getEntry(i) != 0){ sizeC2++; cut++; } }else{ if(c2.getEntry(i) != 0){ sizeC2++; } } }*/ if(sizeC1 > sizeC2){ max = sizeC1; }else{ max = sizeC2; } /*if(n.size() < n1.size()){ min = n.size(); }else{ min = n1.size(); }*/ //temp now only contains elements that are in p and p1 temp.retainAll(n1); cut = temp.size(); res = cut/max; return res; } private LinkedList<Cluster> initializeClustering(Termmatrix termMat) { LinkedList<Cluster> res = new LinkedList<Cluster>(); Array2DRowRealMatrix matrix = termMat.getMatrix(); /*for(int i = 0; i < matrix.getRowDimension(); i++){ Cluster c = new Cluster(); //retrieve column vector as centroid of the cluster ArrayRealVector cent = (ArrayRealVector) matrix.getRowVector(i); c.setCentroid(cent); //add corresponding node to the cluster Node n = termMat.getNodeIdList().get(i); c.addNode(n); //add cluster to the result res.add(c); }*/ for(int i = 0; i < matrix.getColumnDimension(); i++){ Cluster c = new Cluster(); //retrieve column vector as centroid of the cluster ArrayRealVector cent = (ArrayRealVector) matrix.getColumnVector(i); c.setCentroid(cent); //add nodes to the cluster for(int j = 0; j < cent.getDimension(); j++){ if(cent.getEntry(j) != 0){ c.addNode(termMat.getNodeIdList().get(j)); } } //add cluster to the result res.add(c); } return res; } }