package skywriting.examples.skyhout.kmeans;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Random;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RawLocalFileSystem;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.mahout.clustering.kmeans.Cluster;
import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.VectorWritable;
import skywriting.examples.skyhout.common.SkywritingTaskFileSystem;
import com.asgow.ciel.executor.Ciel;
import com.asgow.ciel.references.Reference;
import com.asgow.ciel.references.WritableReference;
import com.asgow.ciel.tasks.FirstClassJavaTask;
public class KMeansHead implements FirstClassJavaTask {
private final Reference dataPartitionRef;
private final int k;
private final int numDimensions;
public KMeansHead(Reference dataPartitionRef, int k, int numDimensions) {
this.dataPartitionRef = dataPartitionRef;
this.k = k;
this.numDimensions = numDimensions;
}
@Override
public Reference[] getDependencies() {
return new Reference[] { this.dataPartitionRef };
}
@Override
public void invoke() throws Exception {
DataInputStream dis = new DataInputStream(new FileInputStream(Ciel.RPC.getFilenameForReference(this.dataPartitionRef)));
WritableReference out = Ciel.RPC.getOutputFilename(0);
DataOutputStream dos = new DataOutputStream(out.open());
Configuration conf = new Configuration();
conf.setClassLoader(Ciel.CLASSLOADER);
SkywritingTaskFileSystem fs = new SkywritingTaskFileSystem(new InputStream[] { dis }, new OutputStream[] { dos }, conf);
SequenceFile.Reader mapReader = new SequenceFile.Reader(fs, new Path("/in/0"), fs.getConf());
SequenceFile.Writer writer = new SequenceFile.Writer(
fs, fs.getConf(), new Path("/out/0"), Text.class, Cluster.class);
Text currentID = new Text();
Text currentKey = new Text();
VectorWritable currentVector = mapReader.getValueClass().asSubclass(VectorWritable.class).newInstance();
for (int i = 0; i < this.k; ++i) {
mapReader.next(currentID, currentVector);
currentKey.set("CCC" + i);
Cluster cluster = new Cluster(currentVector.get(), i);
writer.append(currentKey, cluster);
}
writer.close();
//Ciel.RPC.closeOutput(0);
}
@Override
public void setup() {
// TODO Auto-generated method stub
}
}