package org.apache.mahout.clustering;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.Vector.Element;
import org.apache.mahout.math.VectorWritable;
public class CIMapper extends
Mapper<WritableComparable<?>,VectorWritable,IntWritable,Cluster> {
private ClusterClassifier classifier;
private ClusteringPolicy policy;
/*
* (non-Javadoc)
*
* @see
* org.apache.hadoop.mapreduce.Mapper#setup(org.apache.hadoop.mapreduce.Mapper
* .Context)
*/
@Override
protected void setup(Context context) throws IOException,
InterruptedException {
List<Cluster> models = null;
classifier = new ClusterClassifier(models);
policy = new KMeansClusteringPolicy();
super.setup(context);
}
/*
* (non-Javadoc)
*
* @see org.apache.hadoop.mapreduce.Mapper#map(java.lang.Object,
* java.lang.Object, org.apache.hadoop.mapreduce.Mapper.Context)
*/
@Override
protected void map(WritableComparable<?> key, VectorWritable value,
Context context) throws IOException, InterruptedException {
Vector probabilities = classifier.classify(value.get());
Vector selections = policy.select(probabilities);
for (Iterator<Element> it = selections.iterateNonZero(); it.hasNext();) {
Element el = it.next();
classifier.train(el.index(), value.get(), el.get());
}
}
/*
* (non-Javadoc)
*
* @see
* org.apache.hadoop.mapreduce.Mapper#cleanup(org.apache.hadoop.mapreduce.
* Mapper.Context)
*/
@Override
protected void cleanup(Context context) throws IOException,
InterruptedException {
List<Cluster> clusters = classifier.getModels();
for (int index = 0; index < clusters.size(); index++) {
context.write(new IntWritable(index), clusters.get(index));
}
super.cleanup(context);
}
}