package de.tud.inf.operator.learner.clustering.clusterer;
import java.util.ArrayList;
import java.util.List;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.learner.clustering.IdUtils;
import com.rapidminer.operator.learner.clustering.clusterer.uncertain.DBScanEAClustering;
import com.rapidminer.operator.similarity.attributebased.uncertain.ProbabilityDensityFunction;
import com.rapidminer.tools.Ontology;
/**
* extends DBSCAN^EA, takes attribute specific uncertainty regions
* @author Antje Gruner
*
*/
public class DBScanEAClusteringWithPdf extends DBScanEAClustering{
public DBScanEAClusteringWithPdf(OperatorDescription description) {
super(description);
}
@Override
protected Double[][] getSamples(String id) {
if(!sampleCache.containsKey(id)) {
Example ex = IdUtils.getExampleFromId(es, id);
//instead of using globalFuzziness, sample uncertain [correlated] values with corresponding pdf
ProbabilityDensityFunction pdf;
List<Double[][]> resList = new ArrayList<Double[][]>();
for (Attribute att : ex.getAttributes())
if(att.isComplex() && Ontology.ATTRIBUTE_VALUE_TYPE.isA(att.getValueType(),Ontology.UNCERTAIN)){
pdf = ex.getUncertainValue(att);
sampleStrategy.setPdf(pdf);
sampleStrategy.setValue(pdf.getValue());
resList.add(sampleStrategy.getSamples());
}
else{
//certain value -> return sampleStrategy.sampleRate times the same point
//since attribute is not complex -> dimension = 1
int dim = 1;
Double[][] pDim = new Double[sampleStrategy.getSampleRate()][dim];
for(int i =0;i<pDim.length;i++)
pDim[i][0] = ex.getValue(att);
resList.add(pDim);
}
//finally put subdimensional sample points together
Double[][] res = concatSubDimPoints(resList);
sampleCache.put(id, res);
return res;
}
return sampleCache.get(id);
}
/**
* concatenates a list of subdimensional sample points to full dimensional sample point
* @param list
* @return
*/
private Double[][] concatSubDimPoints(List<Double[][]> list) {
//test whether sampleRate>=1
int dimPoint =0;
//fetch dimension
for(int i=0;i<list.size();i++)
//get dim. from first sample point (should be all the same)
if(list.get(i).length>0){
dimPoint += list.get(i)[0].length;
}
int sampleRate = list.get(0).length;
Double[][] res = new Double[sampleRate][dimPoint];
int count =0;
for(int i=0;i<list.size();i++){ //nr of list entries
for(int j=0;j<list.get(i).length;j++){ //sample rate
Double[] subPoint = list.get(i)[j];
System.arraycopy(subPoint, 0, res[j], count, subPoint.length);
}
count += list.get(i)[0].length;
}
return res;
}
}