package kmeans;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.Serializable;
import java.util.Random;
/**
* A class to encapsulate an input set of
* Points for use in the KMeans program and the
* reading/writing of a set of points to data files.
*/
public final class KMeansDataSet implements Serializable {
private static final int cookie = 0x2badfdc0;
private static final int version = 1;
public final int numPoints;
public final int numDimensions;
public final float[] points;
public KMeansDataSet(int np, int nd, float[] pts) {
assert np * nd == pts.length;
numPoints = np;
numDimensions = nd;
points = pts;
}
public final float getFloat(int point, int dim) {
return points[point*numDimensions + dim];
}
public final int getPointOffset(int point) {
return point*numDimensions;
}
/**
* Create numPoints random points each of dimension numDimensions.
*/
public static KMeansDataSet generateRandomPoints(int numPoints, int numDimensions) {
Random rnd = new Random(0);
float[] points = new float[numPoints*numDimensions];
for (int i=0; i<numPoints*numDimensions; i++) {
points[i] = rnd.nextFloat();
}
return new KMeansDataSet(numPoints, numDimensions, points);
}
/**
* Generate a set of random points and write them to a data file
* @param fileName the name of the file to create
* @param numPoints the number of points to write to the file
* @param numDimensions the number of dimensions each point should have
* @param seed a random number seed to generate the points.
* @return <code>true</code> on success, <code>false</code> on failure
*/
public static boolean generateRandomPointsToFile(String fileName, int numPoints, int numDimensions, int seed) {
try {
Random rand = new Random(seed);
File outputFile = new File(fileName);
DataOutputStream out = new DataOutputStream(new FileOutputStream(outputFile));
out.writeInt(cookie);
out.writeInt(version);
out.writeInt(numPoints);
out.writeInt(numDimensions);
int numFloats = numPoints * numDimensions;
for (int i=0; i<numFloats; i++) {
out.writeFloat(rand.nextFloat());
}
} catch (FileNotFoundException e) {
System.out.println("Unable to open file for writing "+fileName);
return false;
} catch (IOException e) {
System.out.println("Error writing data to "+fileName);
e.printStackTrace();
return false;
}
return true;
}
/**
* Write a set of points to a data file
* @param fileName the name of the file to create
* @param data the points to write
* @return <code>true</code> on success, <code>false</code> on failure
*/
public static boolean writePointsToFile(String fileName, KMeansDataSet data) {
int numPoints = data.numPoints;
if (numPoints == 0) return false;
int numDimensions = data.numPoints;
try {
File outputFile = new File(fileName);
DataOutputStream out = new DataOutputStream(new FileOutputStream(outputFile));
out.writeInt(cookie);
out.writeInt(version);
out.writeInt(numPoints);
out.writeInt(numDimensions);
for (int i=0; i<numPoints*numDimensions; i++) {
out.writeFloat(data.points[i]);
}
} catch (FileNotFoundException e) {
System.out.println("Unable to open file for writing "+fileName);
return false;
} catch (IOException e) {
e.printStackTrace();
return false;
}
return true;
}
/**
* Create numPoints random points each of dimension numDimensions.
* @param fileName the name of the data file containing the points
*/
public static KMeansDataSet readPointsFromFile(String fileName) {
int i = 0;
int j = 0;
int numDimensions = 0;
int numPoints = 0;
float[] points = null;
try {
DataInputStream data = new DataInputStream(new FileInputStream(new File(fileName)));
int fc = data.readInt();
if (fc != cookie) {
System.err.printf("Invalid cookie. Found %d but expected %d\n", fc, cookie);
}
int fv = data.readInt();
if (fv != version) {
System.err.printf("Invalid version. Found %d but expected %d\n", fc, cookie);
}
numPoints = data.readInt();
numDimensions = data.readInt();
points = new float[numPoints*numDimensions];
System.out.printf("Reading %d %d-dimensional points from %s\n", numPoints, numDimensions, fileName);
for (i=0; i<numPoints; i++) {
for (j=0; j<numDimensions; j++) {
points[i*numDimensions+ j] = data.readFloat();
}
}
} catch (FileNotFoundException e) {
System.err.println("Unable to open file "+fileName);
System.exit(-1);
} catch (IOException e) {
System.err.printf("File did not contain enough data for %d %d-dimenstional points\n", numPoints, numDimensions);
System.err.printf("Only found %d floats; expected to find %d\n", i*numDimensions+j, numPoints*numDimensions);
e.printStackTrace();
System.exit(-1);
}
return new KMeansDataSet(numPoints, numDimensions, points);
}
}