package skywriting.examples.kmeans; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.ObjectInputStream; import java.util.ArrayList; import com.asgow.ciel.executor.Ciel; import com.asgow.ciel.references.Reference; import com.asgow.ciel.references.WritableReference; import com.asgow.ciel.tasks.FirstClassJavaTask; public class KMeansReducer implements FirstClassJavaTask { private final Reference[] partialSumsRefs; private final Reference oldClustersRef; private final int k; private final int numDimensions; private final double epsilon; private final Reference[] dataPartitionsRefs; private final int iteration; private final boolean doCache; public KMeansReducer(Reference[] partialSumsRefs, Reference oldClustersRef, int k, int numDimensions, double epsilon, Reference[] dataPartitionsRefs, int iteration, boolean doCache) { this.partialSumsRefs = partialSumsRefs; this.oldClustersRef = oldClustersRef; this.k = k; this.numDimensions = numDimensions; this.epsilon = epsilon; this.dataPartitionsRefs = dataPartitionsRefs; this.iteration = iteration; this.doCache = doCache; } @Override public Reference[] getDependencies() { ArrayList<Reference> retList = new ArrayList<Reference>(this.partialSumsRefs.length + 1); for (int i = 0; i < this.partialSumsRefs.length; ++i) { retList.add(this.partialSumsRefs[i]); } retList.add(this.oldClustersRef); return retList.toArray(new Reference[this.partialSumsRefs.length + 1]); } @Override public void invoke() throws Exception { KMeansMapperResult result = new KMeansMapperResult(this.k, this.numDimensions); for (Reference pSumRef : this.partialSumsRefs) { ObjectInputStream ois = new ObjectInputStream(new FileInputStream(Ciel.RPC.getFilenameForReference(pSumRef))); KMeansMapperResult pSum = (KMeansMapperResult) ois.readObject(); result.add(pSum); ois.close(); } result.normalise(); DataInputStream oldClustersIn = new DataInputStream(new BufferedInputStream(new FileInputStream(Ciel.RPC.getFilenameForReference(this.oldClustersRef)), 1048576)); double[][] oldClusters = new double[this.k][this.numDimensions]; for (int i = 0; i < this.k; ++i) { for (int j = 0; j < this.numDimensions; ++j) { oldClusters[i][j] = oldClustersIn.readDouble(); } } oldClustersIn.close(); double error = result.error(oldClusters); System.err.println("Iteration " + this.iteration + "; Error = " + error); if (error > this.epsilon && this.iteration <= 20) { WritableReference newClustersOut = Ciel.RPC.getNewObjectFilename("clusters"); DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(newClustersOut.open(), 1048576)); for (int i = 0; i < this.k; ++i) { for (int j = 0; j < result.sums[i].length; ++j) { // result has been normalised. dos.writeDouble(result.sums[i][j]); } } dos.close(); Reference newClustersRef = newClustersOut.getCompletedRef(); Reference[] newPartialSumsRefs = new Reference[this.dataPartitionsRefs.length]; for (int i = 0; i < newPartialSumsRefs.length; ++i) { newPartialSumsRefs[i] = Ciel.spawn(new KMeansMapper(this.dataPartitionsRefs[i], newClustersRef, this.k, this.numDimensions, this.doCache), null, 1)[0]; } Ciel.tailSpawn(new KMeansReducer(newPartialSumsRefs, newClustersRef, this.k, this.numDimensions, this.epsilon, this.dataPartitionsRefs, this.iteration + 1, this.doCache), null); } else { Ciel.returnPlainString("Finished!"); //Ciel.returnObject(result.sums); } } @Override public void setup() { // TODO Auto-generated method stub } }