package de.tud.inf.operator.learner.clustering.clusterer;
import java.util.ArrayList;
import java.util.List;
import java.util.Vector;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.IOContainer;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.MissingIOObjectException;
import com.rapidminer.operator.Model;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.learner.clustering.Cluster;
import com.rapidminer.operator.learner.clustering.ClusterModel;
import com.rapidminer.operator.learner.clustering.DefaultCluster;
import com.rapidminer.operator.learner.clustering.FlatClusterModel;
import com.rapidminer.operator.learner.clustering.FlatCrispClusterModel;
import com.rapidminer.operator.learner.clustering.IdUtils;
import com.rapidminer.operator.learner.clustering.clusterer.AbstractFlatClusterer;
import com.rapidminer.operator.learner.clustering.clusterer.FuzzyMembershipModel;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeDouble;
import com.rapidminer.parameter.UndefinedParameterError;
/**
*
*
*
*/
public class SignificanceAwareMultiCaseClustering extends AbstractFlatClusterer{
public static final Class<? extends IOObject> inputModelsClass = FuzzyMembershipModel.class;
public static final String PARAMETER_T = "t";
public static final String PARAMETER_ALPHA = "alpha";
public static final String PARAMETER_UNDECIDABLE_CASE_MAPPING = "a? mapping";
public static final String[] UNDECIDABLE_CASE_MAPPINGS = {"a+","a-"};
public static final int UNDECIDABLE_CASE_TO_PLUS = 0;
public static final int UNDECIDABLE_CASE_TO_MINUS = 1;
private ArrayList<FuzzyMembershipModel> fuzzyInputModels;
private double treshold;
private double alpha;
private int undecidableCaseMapping;
private FlatCrispClusterModel clusterModel;
public SignificanceAwareMultiCaseClustering(OperatorDescription description) {
super(description);
}
@Override
public ClusterModel createClusterModel(ExampleSet exampleSet)
throws OperatorException {
int[] globalAssignment = new int[3];
double normalizedCase;
Vector<Double> vi, vj;
String viId, vjId;
FuzzyMembershipModel model;
DefaultCluster cluster;
int clusterID = 0;
addInputModels();
setParameters();
clusterModel = new FlatCrispClusterModel();
DefaultCluster noiseCluster = null;
for (int i = 0; i < exampleSet.size(); i++) {
viId = IdUtils.getIdFromExample(exampleSet.getExample(i));
/* get the Cluster for the first example */
cluster = getClusterForId(viId);
/* not needed because of noise cluster*/
//
// /* if it is not part of a cluster assign it to a new one */
//
// if(cluster == null) {
//
// cluster = new DefaultCluster(String.valueOf(++clusterID));
//
// cluster.addObject(viId);
//
// clusterModel.addCluster(cluster);
// }
for (int j = i + 1; j < exampleSet.size(); j++) {
//aPlusCount = aMinusCount = aQuestionCount = 0;
globalAssignment[0] = globalAssignment[1] = globalAssignment[2] = 0;
vjId = IdUtils.getIdFromExample(exampleSet.getExample(j));
/* calculate the global assignment cases */
for (int k = 0; k < fuzzyInputModels.size(); k++) {
model = fuzzyInputModels.get(k);
vi = model.getMembership(exampleSet.getExample(i));
vj = model.getMembership(exampleSet.getExample(j));
normalizedCase = filter(normalizedCasePlus(vi, vj));
if (normalizedCase > 0)
globalAssignment[0]++;
else {
if (normalizedCase < 0)
globalAssignment[2]++;
else
globalAssignment[1]++;
}
}
/* The second point is added to the same cluster if
* the global case is a+
* or the global case is a? and it is mapped to a+
* or the global vector is balanced and it is mapped to a+
*/
if (((balanced(globalAssignment) || (maxIndex(globalAssignment) == 1)) && undecidableCaseMapping == UNDECIDABLE_CASE_TO_PLUS)
|| maxIndex(globalAssignment) == 0) {
/* Is the point part of a cluster */
DefaultCluster clusterNew = getClusterForId(vjId);
/* both points are not part of the clustering */
if (clusterNew == null && cluster == null) {
/* create a new cluster */
cluster = new DefaultCluster(String
.valueOf(++clusterID));
/* assign both points to the cluster */
cluster.addObject(viId);
cluster.addObject(vjId);
clusterModel.addCluster(cluster);
} else {
if (cluster != null && clusterNew == null)
cluster.addObject(vjId);
else {
if (cluster == null && clusterNew != null) {
clusterNew.addObject(viId);
cluster = clusterNew;
} else {
/* both points are clustered */
/*
* if example belongs to another cluster ->
* merge>
*/
if (!clusterNew.getId().equals(cluster.getId())) {
cluster.addAll(clusterNew);
clusterModel.removeCluster(clusterNew);
}
}
}
}
}
}
/* if Example could not be inserted into a cluster -> add it to a noise cluster*/
if(cluster == null) {
if(noiseCluster == null) {
noiseCluster = new DefaultCluster(String.valueOf(++clusterID));
clusterModel.addCluster(noiseCluster);
}
noiseCluster.addObject(viId);
}
}
return clusterModel;
}
@Override
public List<ParameterType> getParameterTypes() {
List<ParameterType> parameterTypes = super.getParameterTypes();
ParameterType type;
type = new ParameterTypeDouble(PARAMETER_T,"The treshold value",0.0,1.0,0.1);
type.setExpert(false);
parameterTypes.add(type);
type = new ParameterTypeDouble(PARAMETER_ALPHA,"The slope for the tanh-function",1.0,Double.MAX_VALUE,1.0);
type.setExpert(false);
parameterTypes.add(type);
type = new ParameterTypeCategory(PARAMETER_UNDECIDABLE_CASE_MAPPING,"The mapping of the a? case of the global clustering",UNDECIDABLE_CASE_MAPPINGS,0);
type.setExpert(false);
parameterTypes.add(type);
return parameterTypes;
}
@Override
public Class<?>[] getInputClasses() {
return new Class<?>[] { ExampleSet.class, FuzzyMembershipModel.class};
}
public void addInputModels() {
IOContainer inputContainer = this.getInput();
fuzzyInputModels = new ArrayList<FuzzyMembershipModel>();
while(inputContainer.contains(inputModelsClass)) {
try {
fuzzyInputModels.add(inputContainer.remove(FuzzyMembershipModel.class));
} catch (MissingIOObjectException e) {
log("This Exception should never occur. Must be something wrong with the IOContainer implementation");
}
}
}
public int c(Vector<Double> vi, Vector<Double> vj) throws OperatorException {
if(vi.size() != vj.size())
throw new OperatorException("Assingnment Vectors differ");
int ret = -1;
double maxTempValueI = 0.0;
double maxTempValueJ = 0.0;
for(int i = 0;i < vi.size(); i++) {
if(vi.get(i) >= maxTempValueI && vj.get(i) >= maxTempValueJ) {
ret = 1;
maxTempValueI = vi.get(i);
maxTempValueJ = vj.get(i);
} else {
if(vi.get(i) >= maxTempValueI) {
ret = -1;
maxTempValueI = vi.get(i);
} else {
if(vj.get(i) >= maxTempValueJ) {
ret = -1;
maxTempValueJ = vj.get(i);
}
}
}
}
return ret;
}
public boolean balanced(Vector<Double> v) {
double max = 0.0;
int maxCount = 0;
for(int i = 0;i < v.size(); i++) {
if(v.get(i) > max) {
/*new maximum*/
maxCount = 1;
max = v.get(i);
} else {
/* one component more has the maximum */
if(v.get(i) == max) {
maxCount++;
}
}
}
/* v is balanced if two or more components are maximum*/
return (maxCount > 1);
}
public int maxIndex(int[] v) {
int max = -1;
int maxIndex = -1;
for(int i = 0;i<v.length;i++) {
if(v[i] > max) {
max = v[i];
maxIndex = i;
}
}
return maxIndex;
}
public boolean balanced(int[] v) {
int max = 0;
int maxCount = 0;
for(int i = 0;i < v.length; i++) {
if(v[i] > max) {
/*new maximum*/
maxCount = 1;
max = v[i];
} else {
/* one component more has the maximum */
if(v[i] == max) {
maxCount++;
}
}
}
/* v is balanced if two or more components are maximum*/
return (maxCount > 1);
}
public int paCase(Vector<Double> vi, Vector<Double> vj) throws OperatorException {
int cValue = c(vi, vj);
if (cValue == 1 && (!(balanced(vi) || balanced(vj))))
return 1;
if (cValue == -1)
return -1;
return 0;
}
public double significance(Vector<Double> vi, Vector<Double> vj) {
int kl = vi.size();
double significance = 0.0;
double klMinusOne = 1.0/(double)kl;
for(int i = 0; i < kl; i++) {
significance += Math.abs(vi.get(i) - klMinusOne) * Math.abs(vj.get(i) - klMinusOne);
}
return significance;
}
public double casePlus(Vector<Double> vi, Vector<Double> vj, int paCase) throws OperatorException {
return (paCase * significance(vi, vj));
}
public double norm(double numberOfCluster, int caseOutcome) {
switch(caseOutcome) {
case 1:
return (1.0 - (double)(1/numberOfCluster));
case -1:
return ((-4.0 / (numberOfCluster * numberOfCluster)) + (3.0 / numberOfCluster));
case 0:
return 1;
default:
return 0;
}
}
public double normalizedCasePlus(Vector<Double> vi, Vector<Double> vj) throws OperatorException {
int paCase = paCase(vi, vj);
return (casePlus(vi, vj, paCase) / norm(vi.size(),paCase));
}
public double filter(double normalizedCaseOutcome) {
normalizedCaseOutcome = Math.tanh(alpha * normalizedCaseOutcome);
if(Math.abs(normalizedCaseOutcome) <= this.treshold )
return 0;
return normalizedCaseOutcome;
}
public DefaultCluster getClusterForId(String exampleId) {
Cluster cluster;
for(int i = 0;i < clusterModel.getNumberOfClusters();i++) {
cluster = clusterModel.getClusterAt(i);
if(cluster.contains(exampleId))
return (DefaultCluster)cluster;
}
return null;
}
private void setParameters() throws OperatorException{
treshold = getParameterAsDouble(PARAMETER_T);
alpha = getParameterAsDouble(PARAMETER_ALPHA);
undecidableCaseMapping = getParameterAsInt(PARAMETER_UNDECIDABLE_CASE_MAPPING);
}
}