/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mahout.clustering.display;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.mahout.clustering.Cluster;
import org.apache.mahout.clustering.canopy.CanopyDriver;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.common.distance.ManhattanDistanceMeasure;
import org.apache.mahout.math.DenseVector;
import java.awt.BasicStroke;
import java.awt.Color;
import java.awt.Graphics;
import java.awt.Graphics2D;
import java.util.List;
/**
* Java desktop graphics class that runs canopy clustering and displays the results.
* This class generates random data and clusters it.
*/
public class DisplayCanopy extends DisplayClustering {
DisplayCanopy() {
initialize();
this.setTitle("Canopy Clusters (>" + (int) (significance * 100) + "% of population)");
}
@Override
public void paint(Graphics g) {
plotSampleData((Graphics2D) g);
plotClusters((Graphics2D) g);
}
protected static void plotClusters(Graphics2D g2) {
int cx = CLUSTERS.size() - 1;
for (List<Cluster> clusters : CLUSTERS) {
for (Cluster cluster : clusters) {
if (isSignificant(cluster)) {
g2.setStroke(new BasicStroke(1));
g2.setColor(Color.BLUE);
double[] t1 = {T1, T1};
plotEllipse(g2, cluster.getCenter(), new DenseVector(t1));
double[] t2 = {T2, T2};
plotEllipse(g2, cluster.getCenter(), new DenseVector(t2));
g2.setColor(COLORS[Math.min(DisplayClustering.COLORS.length - 1, cx)]);
g2.setStroke(new BasicStroke(cx == 0 ? 3 : 1));
plotEllipse(g2, cluster.getCenter(), cluster.getRadius().times(3));
}
}
cx--;
}
}
public static void main(String[] args) throws Exception {
Path samples = new Path("samples");
Path output = new Path("output");
Configuration conf = new Configuration();
HadoopUtil.delete(conf, samples);
HadoopUtil.delete(conf, output);
RandomUtils.useTestSeed();
generateSamples();
writeSampleData(samples);
//boolean b = true;
//if (b) {
CanopyDriver.buildClusters(conf, samples, output, new ManhattanDistanceMeasure(), T1, T2, 0, true);
loadClusters(output, new PathFilter() {
@Override
public boolean accept(Path path) {
String pathString = path.toString();
return pathString.contains("/clusters-");
}
});
//} else {
// List<Vector> points = new ArrayList<Vector>();
// for (VectorWritable sample : SAMPLE_DATA) {
// points.add(sample.get());
// }
// List<Canopy> canopies = CanopyClusterer.createCanopies(points, new ManhattanDistanceMeasure(), T1, T2);
// CanopyClusterer.updateCentroids(canopies);
// List<Cluster> clusters = new ArrayList<Cluster>();
// for (Canopy canopy : canopies) {
// clusters.add(canopy);
// }
// CLUSTERS.add(clusters);
//}
new DisplayCanopy();
}
}