package com.yahoo.labs.samoa.moa.clusterers; /* * #%L * SAMOA * %% * Copyright (C) 2010 RWTH Aachen University, Germany * %% * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * #L% */ import java.util.ArrayList; import java.util.Arrays; import java.util.Random; import com.github.javacliparser.FloatOption; import com.github.javacliparser.IntOption; import com.yahoo.labs.samoa.moa.cluster.Clustering; import com.yahoo.labs.samoa.moa.cluster.SphereCluster; import com.yahoo.labs.samoa.moa.core.Measurement; import com.yahoo.labs.samoa.moa.core.DataPoint; import com.yahoo.labs.samoa.instances.Instance; public class ClusterGenerator extends AbstractClusterer{ private static final long serialVersionUID = 1L; public IntOption timeWindowOption = new IntOption("timeWindow", 't', "Rang of the window.", 1000); public FloatOption radiusDecreaseOption = new FloatOption("radiusDecrease", 'r', "The average radii of the centroids in the model.", 0, 0, 1); public FloatOption radiusIncreaseOption = new FloatOption("radiusIncrease", 'R', "The average radii of the centroids in the model.", 0, 0, 1); public FloatOption positionOffsetOption = new FloatOption("positionOffset", 'p', "The average radii of the centroids in the model.", 0, 0, 1); public FloatOption clusterRemoveOption = new FloatOption("clusterRemove", 'D', "Deletes complete clusters from the clustering.", 0, 0, 1); public FloatOption joinClustersOption = new FloatOption("joinClusters", 'j', "Join two clusters if their hull distance is less minRadius times this factor.", 0, 0, 1); public FloatOption clusterAddOption = new FloatOption("clusterAdd", 'A', "Adds additional clusters.", 0, 0, 1); private static double err_intervall_width = 0.0; private ArrayList<DataPoint> points; private int instanceCounter; private int windowCounter; private Random random; private Clustering sourceClustering = null; @Override public void resetLearningImpl() { points = new ArrayList<DataPoint>(); instanceCounter = 0; windowCounter = 0; random = new Random(227); //joinClustersOption.set(); //evaluateMicroClusteringOption.set(); } @Override public void trainOnInstanceImpl(Instance inst) { if(windowCounter >= timeWindowOption.getValue()){ points.clear(); windowCounter = 0; } windowCounter++; instanceCounter++; points.add( new DataPoint(inst,instanceCounter)); } @Override public boolean implementsMicroClusterer() { return true; } public void setSourceClustering(Clustering source){ sourceClustering = source; } @Override public Clustering getMicroClusteringResult() { //System.out.println("Numcluster:"+clustering.size()+" / "+num); //Clustering source_clustering = new Clustering(points, overlapThreshold, microInitMinPoints); if(sourceClustering == null){ System.out.println("You need to set a source clustering for the ClusterGenerator to work"); return null; } return alterClustering(sourceClustering); } public Clustering getClusteringResult(){ sourceClustering = new Clustering(points); // if(sourceClustering == null){ // System.out.println("You need to set a source clustering for the ClusterGenerator to work"); // return null; // } return alterClustering(sourceClustering); } private Clustering alterClustering(Clustering scclustering){ //percentage of the radius that will be cut off //0: no changes to radius //1: radius of 0 double errLevelRadiusDecrease = radiusDecreaseOption.getValue(); //0: no changes to radius //1: radius 100% bigger double errLevelRadiusIncrease = radiusIncreaseOption.getValue(); //0: no changes //1: distance between centers is 2 * original radius double errLevelPosition = positionOffsetOption.getValue(); int numRemoveCluster = (int)(clusterRemoveOption.getValue()*scclustering.size()); int numAddCluster = (int)(clusterAddOption.getValue()*scclustering.size()); for (int c = 0; c < numRemoveCluster; c++) { int delId = random.nextInt(scclustering.size()); scclustering.remove(delId); } int numCluster = scclustering.size(); double[] err_seeds = new double[numCluster]; double err_seed_sum = 0.0; double tmp_seed; for (int i = 0; i < numCluster; i++) { tmp_seed = random.nextDouble(); err_seeds[i] = err_seed_sum + tmp_seed; err_seed_sum+= tmp_seed; } double sumWeight = 0; for (int i = 0; i <numCluster; i++) { sumWeight+= scclustering.get(i).getWeight(); } Clustering clustering = new Clustering(); for (int i = 0; i <numCluster; i++) { if(!(scclustering.get(i) instanceof SphereCluster)){ System.out.println("Not a Sphere Cluster"); continue; } SphereCluster sourceCluster = (SphereCluster)scclustering.get(i); double[] center = Arrays.copyOf(sourceCluster.getCenter(),sourceCluster.getCenter().length); double weight = sourceCluster.getWeight(); double radius = sourceCluster.getRadius(); //move cluster center if(errLevelPosition >0){ double errOffset = random.nextDouble()*err_intervall_width/2.0; double errOffsetDirection = ((random.nextBoolean())? 1 : -1); double level = errLevelPosition + errOffsetDirection * errOffset; double[] vector = new double[center.length]; double vectorLength = 0; for (int d = 0; d < center.length; d++) { vector[d] = (random.nextBoolean()?1:-1)*random.nextDouble(); vectorLength += Math.pow(vector[d],2); } vectorLength = Math.sqrt(vectorLength); //max is when clusters are next to each other double length = 2 * radius * level; for (int d = 0; d < center.length; d++) { //normalize length and then strecht to reach error position vector[d]=vector[d]/vectorLength*length; } // System.out.println("Center "+Arrays.toString(center)); // System.out.println("Vector "+Arrays.toString(vector)); //check if error position is within bounds double [] newCenter = new double[center.length]; for (int d = 0; d < center.length; d++) { //check bounds, otherwise flip vector if(center[d] + vector[d] >= 0 && center[d] + vector[d] <= 1){ newCenter[d] = center[d] + vector[d]; } else{ newCenter[d] = center[d] + (-1)*vector[d]; } } center = newCenter; for (int d = 0; d < center.length; d++) { if(newCenter[d] >= 0 && newCenter[d] <= 1){ } else{ System.out.println("This shouldnt have happend, Cluster center out of bounds:"+Arrays.toString(newCenter)); } } //System.out.println("new Center "+Arrays.toString(newCenter)); } //alter radius if(errLevelRadiusDecrease > 0 || errLevelRadiusIncrease > 0){ double errOffset = random.nextDouble()*err_intervall_width/2.0; int errOffsetDirection = ((random.nextBoolean())? 1 : -1); if(errLevelRadiusDecrease > 0 && (errLevelRadiusIncrease == 0 || random.nextBoolean())){ double level = (errLevelRadiusDecrease + errOffsetDirection * errOffset);//*sourceCluster.getWeight()/sumWeight; level = (level<0)?0:level; level = (level>1)?1:level; radius*=(1-level); } else{ double level = errLevelRadiusIncrease + errOffsetDirection * errOffset; level = (level<0)?0:level; level = (level>1)?1:level; radius+=radius*level; } } SphereCluster newCluster = new SphereCluster(center, radius, weight); newCluster.setMeasureValue("Source Cluster", "C"+sourceCluster.getId()); clustering.add(newCluster); } if(joinClustersOption.getValue() > 0){ clustering = joinClusters(clustering); } //add new clusters by copying clusters and set a random center for (int c = 0; c < numAddCluster; c++) { int copyId = random.nextInt(clustering.size()); SphereCluster scorg = (SphereCluster)clustering.get(copyId); int dim = scorg.getCenter().length; double[] center = new double [dim]; double radius = scorg.getRadius(); boolean outofbounds = true; int tryCounter = 0; while(outofbounds && tryCounter < 20){ tryCounter++; outofbounds = false; for (int j = 0; j < center.length; j++) { center[j] = random.nextDouble(); if(center[j]- radius < 0 || center[j] + radius > 1){ outofbounds = true; break; } } } if(outofbounds){ System.out.println("Coludn't place additional cluster"); } else{ SphereCluster scnew = new SphereCluster(center, radius, scorg.getWeight()/2); scorg.setWeight(scorg.getWeight()-scnew.getWeight()); clustering.add(scnew); } } return clustering; } private Clustering joinClusters(Clustering clustering){ double radiusFactor = joinClustersOption.getValue(); boolean[] merged = new boolean[clustering.size()]; Clustering mclustering = new Clustering(); if(radiusFactor >0){ for (int c1 = 0; c1 < clustering.size(); c1++) { SphereCluster sc1 = (SphereCluster) clustering.get(c1); double minDist = Double.MAX_VALUE; double minOver = 1; int maxindexCon = -1; int maxindexOver = -1; for (int c2 = 0; c2 < clustering.size(); c2++) { SphereCluster sc2 = (SphereCluster) clustering.get(c2); // double over = sc1.overlapRadiusDegree(sc2); // if(over > 0 && over < minOver){ // minOver = over; // maxindexOver = c2; // } double dist = sc1.getHullDistance(sc2); double threshold = Math.min(sc1.getRadius(), sc2.getRadius())*radiusFactor; if(dist > 0 && dist < minDist && dist < threshold){ minDist = dist; maxindexCon = c2; } } int maxindex = -1; if(maxindexOver!=-1) maxindex = maxindexOver; else maxindex = maxindexCon; if(maxindex!=-1 && !merged[c1]){ merged[c1]=true; merged[maxindex]=true; SphereCluster scnew = new SphereCluster(sc1.getCenter(),sc1.getRadius(),sc1.getWeight()); SphereCluster sc2 = (SphereCluster) clustering.get(maxindex); scnew.merge(sc2); mclustering.add(scnew); } } } for (int i = 0; i < merged.length; i++) { if(!merged[i]) mclustering.add(clustering.get(i)); } return mclustering; } @Override protected Measurement[] getModelMeasurementsImpl() { throw new UnsupportedOperationException("Not supported yet."); } @Override public void getModelDescription(StringBuilder out, int indent) { throw new UnsupportedOperationException("Not supported yet."); } @Override public boolean isRandomizable() { return false; } @Override public boolean keepClassLabel(){ return true; } public double[] getVotesForInstance(Instance inst) { return null; } }