package skywriting.examples.kmeans;
import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.EOFException;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.Iterator;
import java.util.LinkedList;
import com.asgow.ciel.executor.Ciel;
import com.asgow.ciel.references.Reference;
import com.asgow.ciel.tasks.FirstClassJavaTask;
public class KMeansMapper implements FirstClassJavaTask {
public static double getSquaredDistance(double[] x, double[] y) {
double ret = 0.0;
for (int i = 0; i < x.length; ++i) {
ret += (y[i] - x[i]) * (y[i] - x[i]);
}
return ret;
}
private final Reference dataPartitionRef;
private final Reference clustersRef;
private final int k;
private final int numDimensions;
private final boolean doCache;
public KMeansMapper(Reference dataPartitionRef, Reference clustersRef, int k, int numDimensions, boolean doCache) {
this.dataPartitionRef = dataPartitionRef;
this.clustersRef = clustersRef;
this.k = k;
this.numDimensions = numDimensions;
this.doCache = doCache;
}
@Override
public Reference[] getDependencies() {
return new Reference[] { this.dataPartitionRef, this.clustersRef };
}
@Override
public void invoke() throws Exception {
DataInputStream clustersIn = new DataInputStream(new BufferedInputStream(Ciel.RPC.getStreamForReference(this.clustersRef, 1048576, false, false, false), 1048576));
double[][] clusters = new double[this.k][this.numDimensions];
for (int i = 0; i < this.k; ++i) {
for (int j = 0; j < this.numDimensions; ++j) {
clusters[i][j] = clustersIn.readDouble();
}
//System.err.println("Cluster " + i + " " + clusters[i][0] + " " + clusters[i][1]);
}
clustersIn.close();
KMeansMapperResult result = new KMeansMapperResult(this.k, this.numDimensions);
LinkedList<double[]> vectors;
boolean doRead;
DataInputStream dataIn;
Iterator<double[]> vectorIterator;
if (this.doCache) {
vectors = (LinkedList<double[]>) Ciel.softCache.tryGetCache("fastkmeansin", this.dataPartitionRef);
if (vectors == null) {
doRead = true;
vectors = new LinkedList<double[]>();
vectorIterator = null;
dataIn = new DataInputStream(new BufferedInputStream((Ciel.RPC.getStreamForReference(this.dataPartitionRef, 1048576, false, true, false)), 1048576));
} else {
doRead = false;
dataIn = null;
vectorIterator = vectors.iterator();
}
} else {
doRead = true;
dataIn = new DataInputStream(new BufferedInputStream((Ciel.RPC.getStreamForReference(this.dataPartitionRef, 1048576, false, true, false)), 1048576));
vectorIterator = null;
vectors = null;
}
double[] currentVector = new double[this.numDimensions];
int v = 0;
long start = System.currentTimeMillis();
try {
while (true) {
if (!doRead) {
if (vectorIterator.hasNext()) {
currentVector = vectorIterator.next();
} else {
break;
}
} else {
for (int j = 0; j < this.numDimensions; ++j) {
currentVector[j] = dataIn.readDouble();
}
if (doCache) {
vectors.addLast(currentVector);
}
}
int nearestCluster = -1;
double minDistance = Double.MAX_VALUE;
for (int i = 0; i < this.k; ++i) {
double distance = getSquaredDistance(currentVector, clusters[i]);
if (distance < minDistance) {
nearestCluster = i;
minDistance = distance;
}
}
++v;
//System.err.println("Vector " + currentVector[0] + " " + currentVector[1]);
result.add(nearestCluster, currentVector);
if (doRead && doCache) {
currentVector = new double[this.numDimensions];
}
}
} catch (EOFException eofe) {
;
}
long finish = System.currentTimeMillis();
System.err.println("*****>>>>> " + (doRead ? "From-disk" : "From-cache") + " loop with " + v + " vectors took " + (finish - start) + " ms");
if (doRead && doCache) {
Ciel.softCache.putCache(vectors, "fastkmeansin", this.dataPartitionRef);
}
if (dataIn != null) {
dataIn.close();
}
Ciel.returnObject(result);
}
@Override
public void setup() {
// TODO Auto-generated method stub
}
}