import mpi.MPI;
import mpi.MPIException;
import mpi.MpiOps;
import org.apache.commons.cli.*;
import org.apache.commons.math3.stat.correlation.PearsonsCorrelation;
import java.io.*;
import java.util.*;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
public class DistanceCalculator {
private String vectorFolder;
private String distFolder;
private boolean normalize;
private boolean mpi = false;
private MpiOps mpiOps;
private int distanceType;
private boolean sharedInput = false;
public DistanceCalculator(String vectorFolder, String distFolder, boolean normalize, boolean mpi, int distanceType, boolean sharedInput) {
this.vectorFolder = vectorFolder;
this.distFolder = distFolder;
this.normalize = normalize;
this.mpi = mpi;
this.distanceType = distanceType;
this.sharedInput = sharedInput;
}
public static void main(String[] args) {
Options options = new Options();
options.addOption("v", true, "Input Vector folder");
options.addOption("d", true, "Distance matrix folder");
options.addOption("n", false, "normalize");
options.addOption("m", false, "mpi");
options.addOption("t", true, "distance type");
options.addOption("s", false, "shared input directory");
options.addOption(Utils.createOption("f", true, "Single calc", false));
CommandLineParser commandLineParser = new BasicParser();
try {
CommandLine cmd = commandLineParser.parse(options, args);
String _vectorFile = cmd.getOptionValue("v");
String _distFile = cmd.getOptionValue("d");
boolean _normalize = cmd.hasOption("n");
boolean mpi = cmd.hasOption("m");
int distanceType = Integer.parseInt(cmd.getOptionValue("t"));
boolean sharedInput = cmd.hasOption("s");
String singleFile = cmd.getOptionValue("f");
if (singleFile == null) {
String print = "vector: " + _vectorFile + " ,distance matrix folder: "
+ _distFile + " ,normalize: "
+ _normalize + " ,mpi: " + mpi
+ " ,distance type: " + distanceType
+ " ,shared input: " + sharedInput;
System.out.println(print);
if (mpi) {
MPI.Init(args);
}
DistanceCalculator program = new DistanceCalculator(_vectorFile, _distFile, _normalize, mpi, distanceType, sharedInput);
program.process();
if (mpi) {
MPI.Finalize();
}
} else {
DistanceCalculator program = new DistanceCalculator(_vectorFile, _distFile, _normalize, mpi, distanceType, sharedInput);
program.processFile(new File(singleFile));
}
} catch (MPIException | ParseException e) {
e.printStackTrace();
System.out.println(options.toString());
}
}
private static int INC = 7000;
private void process() {
System.out.println("Starting Distance calculator...");
File inFolder = new File(vectorFolder);
if (!inFolder.isDirectory()) {
System.out.println("In should be a folder: " + vectorFolder);
return;
}
// create the out directory
Utils.createDirectory(distFolder);
int rank = 0;
int size = 0;
try {
if (mpi) {
mpiOps = new MpiOps();
rank = mpiOps.getRank();
size = mpiOps.getSize();
}
BlockingQueue<File> files = new LinkedBlockingQueue<File>();
List<File> list = new ArrayList<File>();
Collections.addAll(list, inFolder.listFiles());
Collections.sort(list);
if (mpi && sharedInput) {
Iterator<File> datesItr = list.iterator();
int i = 0;
while (datesItr.hasNext()) {
File next = datesItr.next();
if (i == rank) {
files.add(next);
}
i++;
if (i == size) {
i = 0;
}
}
} else {
files.addAll(list);
}
List<Thread> threads = new ArrayList<Thread>();
// start 4 threads
for (int i = 0; i < 1; i++) {
Thread t = new Thread(new Worker(files));
t.start();
threads.add(t);
}
for (Thread t : threads) {
try {
t.join();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
System.out.println("Distance calculator finished...");
} catch (MPIException e) {
throw new RuntimeException("Failed to communicate");
}
}
private class Worker implements Runnable {
private BlockingQueue<File> queue;
private Worker(BlockingQueue<File> queue) {
this.queue = queue;
}
@Override
public void run() {
while (!queue.isEmpty()) {
try {
File f = queue.take();
processFile(f);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
}
private void processFile(File fileEntry) {
long start = System.currentTimeMillis();
WriterWrapper writer = null;
if (fileEntry.isDirectory()) {
return;
}
String outFileName = distFolder + "/" + fileEntry.getName();
String smallValDir = distFolder + "/small";
String smallOutFileName = smallValDir + "/" + fileEntry.getName();
System.out.println("Calculator vector file: " + fileEntry.getAbsolutePath() + " Output: " + outFileName);
//File smallDirFile = new File(smallValDir);
//smallDirFile.mkdirs();
writer = new WriterWrapper(outFileName, false);
//WriterWrapper smallWriter = new WriterWrapper(smallOutFileName, true);
// +1 to accomodate constant sctock
int lineCount = countLines(fileEntry);
// initialize the double arrays for this block
double values[][] = new double[INC][];
double cachedValues[][] = new double[INC][];
for (int i = 0; i < values.length; i++) {
values[i] = new double[lineCount];
cachedValues[i] = new double[lineCount];
}
for (int i = 0; i < cachedValues.length; i++) {
for (int j = 0; j < cachedValues[i].length; j++) {
cachedValues[i][j] = -1;
}
}
int []histogram = new int[100];
double []chanegHisto = new double[100];
double dmax = Double.MIN_VALUE;
double dmin = Double.MAX_VALUE;
int startIndex = 0;
int endIndex = -1;
List<VectorPoint> vectors;
// do {
startIndex = endIndex + 1;
endIndex = startIndex + INC - 1;
int readStartIndex = 0;
int readEndIndex = INC - 1;
vectors = Utils.readVectors(fileEntry, startIndex, endIndex);
// if (vectors.size() == 0) {
// break;
// }
// System.out.println("Processing block: " + startIndex + " : " + endIndex);
// now start from the begining and go through the whole file
List<VectorPoint> secondVectors = vectors;
System.out.println("Reading second block: " + readStartIndex + " : " + readEndIndex + " read size: " + secondVectors.size());
for (int i = 0; i < secondVectors.size(); i++) {
VectorPoint sv = secondVectors.get(i);
double v = VectorPoint.vectorLength(1, sv);
for (int z = 0; z < 100; z++) {
if (v < (z + 1) * .1) {
chanegHisto[z]++;
break;
}
}
for (int j = 0; j < vectors.size(); j++) {
VectorPoint fv = vectors.get(j);
double cor = 0;
// assume i,j is eqaul to j,i
if (cachedValues[readStartIndex + i][j] == -1) {
cor = sv.correlation(fv, distanceType);
} else {
cor = cachedValues[readStartIndex + i][j];
}
if (cor > dmax) {
dmax = cor;
}
if (cor < dmin) {
dmin = cor;
}
values[j][readStartIndex + i] = cor;
cachedValues[j][readStartIndex + i] = cor;
}
}
readStartIndex = readEndIndex + 1;
readEndIndex = readStartIndex + INC - 1;
System.out.println("MAX distance is: " + dmax + " MIN Distance is: " + dmin);
// write the vectors to file
for (int i = 0; i < vectors.size(); i++) {
for (int j = 0; j < values[i].length; j++) {
double doubleValue = values[i][j]/dmax;
for (int k = 0; k < 100; k++) {
if (doubleValue < (k + 1.0) / 100) {
histogram[k]++;
break;
}
}
if (doubleValue < 0) {
System.out.println("*********************************ERROR, invalid distance*************************************");
throw new RuntimeException("Invalid distance");
} else if (doubleValue > 1) {
System.out.println("*********************************ERROR, invalid distance*************************************");
throw new RuntimeException("Invalid distance");
}
short shortValue = (short) (doubleValue * Short.MAX_VALUE);
writer.writeShort(shortValue);
}
writer.line();
}
// } while (true);
if (writer != null) {
writer.close();
}
System.out.println("MAX: " + VectorPoint.maxChange + " MIN: " + VectorPoint.minChange);
System.out.println("Distance histo");
for (int i = 0; i < 100; i++) {
System.out.print(histogram[i] + ", ");
}
System.out.println();
System.out.println("Ratio histo");
for (int i = 0; i < 100; i++) {
System.out.print(chanegHisto[i] + ", ");
}
System.out.println();
// if (smallWriter != null) {
// smallWriter.close();
// }
System.out.println(dmax);
long end = System.currentTimeMillis();
System.out.println("Time: " + (end - start));
}
private int countLines(File file) {
BufferedReader br = null;
try {
br = new BufferedReader(new FileReader(file));
String line;
int count = 0;
while ((line = br.readLine()) != null) {
count++;
}
return count;
} catch (IOException e) {
throw new RuntimeException("Failed to read file");
}
}
}