package ids.utils;
import ids.clustering.model.ConstraintType;
import ids.clustering.model.Domain;
import ids.clustering.model.Pair;
import ids.clustering.utils.ClusterUtils;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
import java.util.logging.Logger;
import cern.colt.matrix.DoubleMatrix2D;
import cern.colt.matrix.impl.SparseDoubleMatrix2D;
public class ConstraintsUtils {
private boolean verbose = false;
private Logger log;
private ClusterUtils clusterUtils;
private CommonUtils commonUtils;
// Transitive Closure
int tc_n = 0;
int[] tc_done;
double[][] tc_A;
int[] tc_c; // neighborhood
// Sparse Transitive Closure
DoubleMatrix2D tc_sA;
public ConstraintsUtils(boolean verbose) {
this.verbose = verbose;
if (verbose) log = Logger.getLogger(getClass().getName());
clusterUtils = new ClusterUtils(verbose);
commonUtils = new CommonUtils(verbose);
}
// generate constraints from cluster points
public double[][] getConstraintList(Domain domain) {
if (domain.number_constraints == 0) return null;
//int dim = domain.data[0].length;
// temp
List<PointClusterPair> points_lits = new ArrayList<PointClusterPair>();
// get the points of interest first
for (int i = 0; i < domain.k; i++) {
// get cluster data
SearchResult<Double> sr = clusterUtils.getClusterData(domain.data, domain.idx, i);
List<Integer> points = sr.getIndices();
if (points != null) {
// get distance between each points in the cluster to it centroid
double[] pd = commonUtils.getDistance(sr.getData(), domain.centroids[i], domain.distance);
// sort resulted index according to the distance
ArrayIndexComparator comp = new ArrayIndexComparator(pd);
Integer[] index = comp.createIndex();
Arrays.sort(index, comp);
// map to the global indexing
int[] global_index = commonUtils.getElementsByIndeces(points, index);
if (verbose) {
System.out.println("Cluster: " + i);
for (int j = 0; j < global_index.length; j++) {
double real_pd = commonUtils.getDistance(domain.data[global_index[j]], domain.centroids[i], domain.distance);
System.out.println(index[j].toString() + ": distance: " + pd[index[j]] + ": global index: " + global_index[j] + ": real distance: " + real_pd);
}
}
// save top domain.number_constraints points
int n_points = domain.number_constraints;
if (global_index.length < domain.number_constraints) n_points = global_index.length;
for (int j = 0; j < n_points; j++) {
PointClusterPair pair = new PointClusterPair(global_index[j], i);
points_lits.add(pair);
}
} else {
System.out.println("Empty cluster found.");
} // if points != null
} // for loop for each cluster
// number of constraints
int n = points_lits.size();
if (verbose) System.out.println("Constraints Utils: Number of constraints: " + n);
// print points_index
/*
System.out.println("Points indeces");
for (int i = 0; i < n; i++) {
PointClusterPair pair = points_lits.get(i);
System.out.println(i + ": " + pair.point_index + ": " + pair.cluster_index);
}
*/
// generate constraints
int nc = n*(n-1)/2;
double[][] res = new double[nc][3];
int v_tt = -1;
for (int i = 0; i < n; i++) {
PointClusterPair pair1 = points_lits.get(i);
for (int j = i + 1; j < n; j++) {
v_tt++;
PointClusterPair pair2 = points_lits.get(j);
res[v_tt][0] = pair1.point_index + 1; // since first object has index 1, not 0!
res[v_tt][1] = pair2.point_index + 1;
if (pair1.cluster_index == pair2.cluster_index) {
// create a must-link constraint
res[v_tt][2] = 1;
} else {
// create a cannot-link constraint
res[v_tt][2] = 2;
}
}
}
// print points_index
/*
System.out.println("Points constraints");
for (int i = 0; i < nc; i++) {
System.out.println(i + ": " + res[i][0] + " " + res[i][1] + " " + res[i][2]);
}
*/
return res;
}
private class PointClusterPair {
public int point_index;
public int cluster_index;
public PointClusterPair(int point, int cluster) {
this.point_index = point;
this.cluster_index = cluster;
}
}
// In this function we create sets of cannot-link constraint between any
// pair of lambda neighborhoods.
public void inferCannotLinkConstraints(DoubleMatrix2D C, int[] nLambda, int lambda) {
CommonUtils utils = new CommonUtils(verbose);
for (int i = 0; i < lambda; i++) { // for every neighborhood
// first set
Set<Integer> set_i = utils.getIndicesByValue(nLambda, i);
for (int j = i+1; j < lambda; j++) {
// second set
Set<Integer> set_j = utils.getIndicesByValue(nLambda, j);
if ((set_i.size()==1)&(set_j.size()==1)) break; // nothing to infer
if (testForCannotLinkConstraints(set_i, set_j, C)) {
inferCannotLinksInN(set_i, set_j, C);
}
}
}
}
private boolean testForCannotLinkConstraints(Set<Integer> set_i, Set<Integer> set_j, DoubleMatrix2D C) {
for (Integer i : set_i) {
for (Integer j : set_j) {
if (C.getQuick(i,j)==1.0) return true;
}
}
return false;
}
private void inferCannotLinksInN(Set<Integer> set_i, Set<Integer> set_j, DoubleMatrix2D C) {
for (Integer i : set_i) {
for (Integer j : set_j) {
C.setQuick(i, j, 1.0);
C.setQuick(j, i, 1.0);
if (verbose) {
log.info("Creating cannot-link constraints between points " + i + " and " + j);
}
}
}
}
// dense version of the transitive closure
public double[][] TransitiveClosure(double[][] A) {
// initialization
initTC(A);
// run
int label = -1; // start from 0!
for (int i = 0; i < tc_n; i++) {
if (tc_done[i] == 0) {
label++;
ladelDFS(i, label);
}
}
// compute the tc
double[][] tc = new double[tc_n][tc_n];
for (int i = 0; i<tc_n; i++) {
for (int j = i+1; j < tc_n; j++) {
if (tc_c[i] == tc_c[j]) {
tc[i][j] = 1;
tc[j][i] = 1;
}
}
}
// add diagonal
for (int i = 0; i < tc_n; i++) tc[i][i] = 1;
return tc;
}
public int[] getNeighborhood() {
return tc_c;
}
private void ladelDFS(int row, int label) {
tc_done[row] = 1;
tc_c[row] = label;
for (int i = 0; i < tc_n; i++) {
if ((tc_A[row][i]==1)&(tc_done[i]==0)) ladelDFS(i, label);
}
}
private void initTC(double[][] A) {
// number of nodes
tc_n = A.length;
// labels
tc_done = new int[tc_n];
// output
tc_A = A.clone();
// tc_c
tc_c = new int[tc_n];
}
// sparse transitive closure
public DoubleMatrix2D TransitiveClosure(DoubleMatrix2D A) {
// initialization
initSTC(A);
// run
int label = -1; // start from -1, so first neigh will start form 0!
for (int i = 0; i < tc_n; i++) {
if (tc_done[i] == 0) {
label++;
ladelSDFS(i, label);
}
}
// compute the tc
DoubleMatrix2D tc = new SparseDoubleMatrix2D(tc_n, tc_n);
for (int i = 0; i<tc_n; i++) {
for (int j = i+1; j < tc_n; j++) {
if (tc_c[i] == tc_c[j]) {
tc.setQuick(i, j, 1.0);
tc.setQuick(j, i, 1.0);
}
}
}
// add diagonal
for (int i = 0; i < tc_n; i++) tc.setQuick(i, i, 1.0);
return tc;
}
private void ladelSDFS(int row, int label) {
tc_done[row] = 1;
tc_c[row] = label;
for (int i = 0; i < tc_n; i++) {
if ((tc_sA.getQuick(row, i)==1.0)&(tc_done[i]==0)) ladelSDFS(i, label);
}
}
private void initSTC(DoubleMatrix2D A) {
// number of nodes
tc_n = A.rows();
// labels
tc_done = new int[tc_n];
// output
tc_sA = new SparseDoubleMatrix2D(tc_n, tc_n);
tc_sA = A.copy();
// tc_c
tc_c = new int[tc_n];
}
public ArrayList<Pair> ParseConstraints(double[][] constraints, int n) throws Exception {
ArrayList<Pair> res = new ArrayList<Pair>();
for (int i=0; i<n; i++) {
ConstraintType type;
if (((int)constraints[i][2])==1) {
type = ConstraintType.MUST_LINK;
if (verbose) {
log.info("Creating must-link constraint between object " + (int)constraints[i][0] +
" and " + (int)constraints[i][1]);
}
} else if (((int)constraints[i][2])==2) {
type = ConstraintType.CANNOT_LINK;
if (verbose) {
log.info("Creating cannot-link constraint between object " + (int)constraints[i][0] +
" and " + (int)constraints[i][1]);
}
} else {
throw new IllegalArgumentException("Cannot find constraint type");
}
Pair pair = new Pair((int)constraints[i][0], (int)constraints[i][1], type);
res.add(pair);
}
return res;
}
/**
* Parse constraint from double[][] matrix in format
* <point_index_a> <point_index_b> <constraint_type>
* if "constraint_type" = 1 then this is a must-link constraint
* if "constraint_type" = 2 then this is a cannot-link constraint
* @param constraints - double[][] matrix of constraints
* @param n - number of points in data set
* @param type - the target constraint type
* @return constraint matrix in sparse format
*/
public DoubleMatrix2D ParseConstraints(double[][] constraints, int n, ConstraintType type) {
if (constraints==null) return null;
int n_constraints = constraints.length;
if (n_constraints == 0) return null;
if (verbose) System.out.println("Parsing constraints..");
DoubleMatrix2D res = new SparseDoubleMatrix2D(n, n);
// run
int counter = 0;
for (int i = 0; i < n_constraints; i++) {
int a = (int)constraints[i][0] - 1; // -1 since data ID starts from 0 but from 1 in the constant file
int b = (int)constraints[i][1] - 1;
if ( (((int)constraints[i][2])==1)&&(type == ConstraintType.MUST_LINK) ) { // must-link constraints
counter++;
res.setQuick(a, b, 1.0);
res.setQuick(b, a, 1.0);
if (verbose) {
System.out.println("Creating a must-link constraint between object " + a + " and " + b);
}
} else if ( (((int)constraints[i][2])==2)&&(type == ConstraintType.CANNOT_LINK) ) { // cannot-link constraints
counter++;
res.setQuick(a, b, 1.0);
res.setQuick(b, a, 1.0);
if (verbose) {
System.out.println("Creating a cannot-link constraint between object " + a + " and " + b);
}
} else {
System.out.println("Cannot find constraints type or not a target constraint type");
}
} // end for loop
if (verbose) {
System.out.printf("Done. %d constraints has been parsed.\n", counter*2);
}
return res;
}
}