package ids.utils;
import ids.clustering.model.Distance;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.logging.Logger;
import java.util.Random;
import java.util.Set;
import cern.colt.matrix.DoubleMatrix2D;
import cern.colt.matrix.impl.SparseDoubleMatrix2D;
@SuppressWarnings("serial")
public class CommonUtils implements Serializable {
private Random rand;
private boolean verbose = false;
private Logger log;
// Constructor
public CommonUtils(boolean verbose) {
rand = new Random();
this.verbose = verbose;
if (verbose) log = Logger.getLogger(getClass().getName());
}
// FUNCTIONS
// find unique value
public UniqueResult<Double> findUnique(double[] array) {
// Initialize a map <key, value>
Map<Double, Integer> m = new HashMap<Double, Integer>();
// create a map
for (int i = 0; i < array.length; i++) {
double t = array[i];
if (m.containsKey(t)) {
// increase value by 1
Integer v = m.get(t);
m.put(t, v + 1);
} else {
m.put(t, 1);
}
}
// holders
Double[] d = new Double[m.size()];
int[] f = new int[m.size()];
int counter = -1;
for (double key : m.keySet()) {
counter++;
d[counter] = key;
}
// sort domain
Arrays.sort(d);
for (int i = 0; i<d.length; i++) f[i] = m.get(d[i]);
return new UniqueResult<Double>(d, f);
}
// find unique value
public UniqueResult<Integer> findUnique(int[] array) {
// Initialize a map
Map<Integer, Integer> m = new HashMap<Integer, Integer>();
// create a map
for (int i = 0; i < array.length; i++) {
int t = array[i];
if (m.containsKey(t)) {
// increase value by 1
Integer v = m.get(t);
m.put(t, v + 1);
} else {
m.put(t, 1);
}
}
// holders
Integer[] d = new Integer[m.size()]; // values
int[] f = new int[m.size()]; // frequencies
int counter = -1;
for (int key : m.keySet()) {
counter++;
d[counter] = key;
}
// sort domain
Arrays.sort(d);
for (int i = 0; i<d.length; i++) f[i] = m.get(d[i]);
return new UniqueResult<Integer>(d, f);
}
// find sum of the vector
public double findSum(double[] array) {
double res = 0;
for (int i = 0; i < array.length; i++) res += array[i];
return res;
}
public double findSum(int[] array) {
double res = 0;
for (int i = 0; i < array.length; i++) res += array[i];
return res;
}
public int findSum(boolean[] array) {
int res = 0;
for (int i = 0; i < array.length; i++) {
if (array[i]) res += 1;
}
return res;
}
// find mean value of the vector
public double findMean(double[] array) {
int n = array.length;
if (n==0) return 0;
return findSum(array)/n;
}
public double findMean(int[] array) {
int n = array.length;
if (n==0) return 0;
return findSum(array)/n;
}
/**
* Return mode of a vector
* @param input - input array
* @return mode
*/
public double getMode(double[] input) {
// find all unique value and their frequencies
UniqueResult<Double> ur = findUnique(input);
// find domain with the max frequency
SearchResult<Integer> sr = getMaxValue(ur.frequency);
// return the domain with the highest frequency
return ur.domain[sr.getIndex()];
}
public int getMode(int[] input) {
// find all unique value and their frequencies
UniqueResult<Integer> ur = findUnique(input);
SearchResult<Integer> sr = getMaxValue(ur.frequency);
return ur.domain[sr.getIndex()];
}
/**
* Find the column-wise mode of the matrix - mode of each feature
* @param data - input matrix
* @return
*/
public double[] getMode(double[][] data) {
if (data == null) return null;
int n = data.length;
if (n == 0) return null;
int m = data[0].length;
// get number of columns (features)
double[] res = new double[m];
for (int i = 0; i<m; i++) {
double[] col_vector = getColumn(data, i);
res[i] = getMode(col_vector);
}
return res;
}
/**
* Find maximum distance in data set
* @param data - input data set
* @param distance - distance measure
* @return maximum distance in data set
*/
public double getMaxDistance(double[][] data, Distance distance) {
if (data == null) return 0;
int n = data.length;
if (n == 0) return 0;
int dim = data[0].length;
if (distance == Distance.MATCH) return dim;
if (distance == Distance.COSINE) return 1;
double maxD = 0;
int m = n*(n-1)/2;
int index = 0;
int step = (int)(m*0.01);
if (step==0) step = m;
for (int i=0; i<n; i++) {
for (int j=i+1; j<n; j++) {
index = index + 1;
double currentD = getDistance(data[i], data[j], distance);
if (currentD>maxD) {
maxD = currentD;
}
// print
if ((verbose)&((index % step)==0)) System.out.println((double)index/m*100 + "% is done.");
}
}
return maxD;
}
public Distance getDistanceEnum(String str) {
if (str.toUpperCase().equals("MATCH")) {
return Distance.MATCH;
} else if (str.toUpperCase().equals("SQEUCLIDEAN")) {
return Distance.SQEUCLIDEAN;
} else if (str.toUpperCase().equals("EUCLIDEAN")) {
return Distance.EUCLIDEAN;
} else if (str.toUpperCase().equals("COSINE")) {
return Distance.COSINE;
} else {
return null;
}
}
/**
* Returns distance between every point in the input data set with n objects
* @param data - input data set with n objects
* @param distance - distance measure
* @return distance vector of size n(n-1)/2
*/
public float[] getDistance(double[][] data, Distance distance) {
int n = data.length;
if (n==0) return null;
// number of elements in the output vector
int m = n*(n-1)/2;
double s = m*32.0/8.0/1024.0/1000.0;
System.out.printf("Trying to create a array of float of size %d (would take %6.2f MB)\n", m, s);
float[] pd = new float[m];
System.out.println("Calculating the distance..");
int counter = -1;
for (int i = 0; i < n; i++) {
for (int j = i + 1; j < n; j++) {
counter++;
pd[counter] = (float)getDistance(data[i], data[j], distance);
if ( verbose && ( ( counter % (int)(m*0.01) )==0 ) ) System.out.printf("%d is done.\n", (int)(counter*1.0/m*100));
}
}
System.out.println("Done.");
return pd;
}
/**
* Finds the distance between every point in the input data set with n objects
* and saves it to a file.
* Very useful for a very large data sets
* @param data - input data set with n objects
* @param distance - distance measure
* @param filename - output filename
*/
public void getDistance(double[][] data, Distance distance, String filename) {
int n = data.length;
if (n==0) System.out.println("There is no element in data");
// number of elements in the output vector
long m = n*(n-1)/2;
// create a buffer
BufferedWriter out = null;
try {
out = new BufferedWriter(new FileWriter(filename));
} catch (IOException e) {
System.out.println(e.getMessage());
}
// find the distance
System.out.println("Calculating the distance..");
int counter = -1;
for (int i = 0; i < n; i++) {
for (int j = i + 1; j < n; j++) {
counter++;
double pd = getDistance(data[i], data[j], distance);
try {
out.write(String.format("%5.4f\n", pd));
} catch (IOException e) {
System.out.println(e.getMessage());
}
if ( verbose && ( ( counter % (int)(m*0.01) )==0 ) ) System.out.printf("%d is done.\n", (int)(counter*1.0/m*100));
}
}
// close the buffer
try {
out.close();
} catch (IOException e) {
System.out.println(e.getMessage());
}
System.out.println("Done.");
}
/**
* Finds distance between point j and all the other data points
* @param j - target point index
* @param pd - distance vector
* @param n - number of objects
* @return
*/
public double[] getDistance(int j, float[] pd, int n) {
if (n == 0) return null;
double[] res = new double[n];
for (int i = 0; i < n; i++) res[i] = getDistanceFromDistanceVector(pd, i, j, n);
return res;
}
/**
* Returns the distance between two objects row and col. Where distance is represented as the distance vector pd.
* @param pd - distance vector
* @param row - index of the first object (starts from 0!)
* @param col - index of the second object (start from 0!)
* @param n - number of objects in the data set
* @return distance
*/
public float getDistanceFromDistanceVector(float[] pd, int row, int col, int n) {
if (row == col) return 0;
int index = -1;
if (row < col) {
index = getDistanceIndex(row, col, n);
} else {
index = getDistanceIndex(col, row, n);
}
return pd[index];
}
private int getDistanceIndex(int i, int j, int n) {
int index = -1;
if (i>j) {
System.out.println("CommonUtils: getDistanceIndex: I is less than J");
return index;
}
//index = n*(i-1)-i*(i+1)/2+j;
index = (n-1)*i-i*(i-1)/2+j-i-1;
return index;
}
/**
* Converts distance vector to distance in matrix form
* @param pd - distance vector
* @param n - number of objects
* @return distance matrix
*/
public float[][] ConvertToSquareForm(float[] pd, int n) {
if (n==0) return null;
if (pd.length == 0) return null;
float[][] res = new float[n][n];
for (int i = 0; i < n; i++) {
for (int j = i + 1; j < n; j++) {
res[i][j] = getDistanceFromDistanceVector(pd, i, j, n);
res[j][i] = res[i][j];
}
}
return res;
}
/**
* Returns distance between data points and a vector
* @param data - input data
* @param target - target point
* @param distance - distance measure
* @return
*/
public double[] getDistance(double[][] data, double[] target, Distance distance) {
int n = data.length;
if (n == 0) return null;
//int dim = data[0].length;
double[] res = new double[n];
for (int i = 0; i < n; i++) {
res[i] = getDistance(data[i], target, distance);
}
return res;
}
// distance between two vectors
public double getDistance(double[] a, double[] b, Distance distance) {
double pd = 0;
int dim = a.length;
if (dim != b.length) {
System.out.println("Achtung! Input vectors have different size!");
return -1;
}
if ((dim ==0)||(b.length == 0)) {
System.out.println("Achtung! One of the input vectors have size = 0!");
return -1;
}
if (distance==Distance.SQEUCLIDEAN) {
for (int i=0; i<dim; i++) {
pd += (a[i]-b[i])*(a[i]-b[i]);
}
} else if (distance==Distance.EUCLIDEAN) {
for (int i=0; i<dim; i++) {
pd += (a[i]-b[i])*(a[i]-b[i]);
}
pd = Math.sqrt(pd);
} else if (distance==Distance.COSINE) {
double a_norm = 0;
double b_norm = 0;
for (int i = 0; i < dim; i++) {
pd += a[i]*b[i];
a_norm += a[i]*a[i];
b_norm += b[i]*b[i];
}
pd = 1 - pd/Math.sqrt(a_norm)/Math.sqrt(b_norm);
} else if (distance==Distance.MATCH) {
for (int i = 0; i < dim; i++) {
if ((int)a[i] != (int)b[i]) pd++;
}
}
else {
System.out.println("Distance is not set!");
}
//System.out.println("Distance is " + pd);
return pd;
}
// Find mean vector, for each dimension
public double[] findMeanVector(double[][] data) {
int n = data.length;
if (n==0) {
System.out.println("Error:findMeanVector: Division by zero");
return null;
}
int dim = data[0].length;
double[] mean = new double[dim];
// find sum
for (int i=0;i<n;i++) {
for (int j=0; j<dim; j++) {
mean[j] = mean[j] + data[i][j];
}
}
// find mean
for (int j=0; j<dim;j++) mean[j] = mean[j]/n;
return mean;
}
// find sdt vector for each dimension
public double[] findSqStd(double[][] data, double[] data_mean) {
int n = data.length;
if (n==0) return null;
int m = data[0].length;
// find standard deviation for each feature
double[] std = new double[m];
Arrays.fill(std, 0.0);
for (int j = 0; j < m; j++) {
for (int i = 0; i < n; i++) {
std[j] = std[j] + (data[i][j] - data_mean[j])*(data[i][j] - data_mean[j]);
}
std[j] = std[j]/(n-1);
}
return std;
}
public double[] findSqStd(double[][] data) {
int n = data.length;
if (n==0) return null;
//int m = data[0].length;
// get the mean value of each column (feature)
double[] data_mean = findMeanVector(data);
return findSqStd(data, data_mean);
}
public double[] findStd(double[][] data, double[] mean_data) {
int n = data.length;
if (n==0) return null;
double[] res = findSqStd(data, mean_data);
int m = data[0].length;
for (int j = 0; j < m; j++) res[j] = Math.sqrt(res[j]);
return res;
}
public double[] findStd(double[][] data) {
int n = data.length;
if (n==0) return null;
int m = data[0].length;
double[] res = findSqStd(data);
for (int j = 0; j < m; j++) res[j] = Math.sqrt(res[j]);
return res;
}
public double[] findNorm(double[] input, int dim) {
double[] res = new double[dim];
double sum = 0;
for (int i = 0; i < dim; i++) {
sum = sum + input[i]*input[i];
}
sum = Math.sqrt(sum);
for (int i = 0; i < dim; i++) {
res[i] = input[i]/sum;
}
return res;
}
// search
public static <T, E> Set<T> getKeysByValue(Map<T, E> map, E value) {
Set<T> keys = new HashSet<T>();
for (Entry<T, E> entry : map.entrySet()) {
if (value.equals(entry.getValue())) {
keys.add(entry.getKey());
}
}
return keys;
}
/**
* Returns Set of Integers, in condition if input[i] == value, then i is added to the output set
* @param input - input vector
* @param value - input value
* @return set of Integers
*/
public Set<Integer> getIndicesByValue(int[] input, int value) {
Set<Integer> keys = new HashSet<Integer>();
for (int i = 0; i<input.length; i++) {
if (input[i] == value) {
keys.add(i);
//if (verbose) System.out.println("Object " + i + " is a member of cluster " + value);
}
}
return keys;
}
/*
* Returns array of integer 1 - item match the value, 0 - item does not match the value
*/
public boolean[] getIndexByValue(int[] input, int value) {
int n = input.length;
boolean[] index = new boolean[n];
for (int i = 0; i<n; i++) {
if (input[i] == value) {
index[i] = true;
} else {
index[i] = false;
}
}
return index;
}
public boolean[] getIndexByValue(double[] input, double value) {
int n = input.length;
boolean[] index = new boolean[n];
for (int i = 0; i<n; i++) {
if (input[i] == value) {
index[i] = true;
} else {
index[i] = false;
}
}
return index;
}
/**
* Returns true for the element i, if and only if a <= input(i) < b
* @param input - input array
* @param a - lower bound
* @param b - upper bound
* @return
*/
public boolean[] findValueBetweenAandB(double[] input, double a, double b) {
int n = input.length;
if (n == 0) return null;
boolean[] res = new boolean[n];
for (int i = 0; i < n; i++) {
if ((a <= input[i])&(input[i] < b)) {
res[i] = true;
} else {
res[i] = false;
}
}
return res;
}
public int[] getElementsByIndeces(List<Integer> input, Integer[] index) {
int[] res = new int[index.length];
for (int i = 0; i < index.length; i++) res[i] = input.get(index[i]);
return res;
}
// max and min
public SearchResult<Integer> getMaxValue(int[] input) {
int value = Integer.MIN_VALUE;
int index = -1;
for (int i = 0; i < input.length; i++) {
if (input[i] > value) {
value = input[i];
index = i;
}
}
if (verbose) log.info("Maximum value of array input[" + index + "] is " + value);
return new SearchResult<Integer>(value, index);
}
public SearchResult<Double> getMaxValue(double[] input) {
double value = Double.MIN_VALUE;
int index = -1;
for (int i = 0; i < input.length; i++) {
if (input[i] > value) {
value = input[i];
index = i;
}
}
if (verbose) log.info("Maximum value of array input[" + index + "] is " + value);
return new SearchResult<Double>(value, index);
}
/**
* Finds the maximum value in each row of the input matrix
* @param input
* @return
*/
public double[] getMaxValue(double[][] input) {
int rows = input.length;
int cols = input[0].length;
if ((rows == 0)||(cols == 0)) return null;
double[] res = new double[rows];
for (int i = 0; i < rows; i++) {
SearchResult<Double> sr = getMaxValue(input[i]);
res[i] = sr.getValue();
}
return res;
}
public double[] getMaxValue(double[][] input, int dim) {
if ((dim > 2)||(dim < 1)) {
System.out.println("Cannot work with such dimensions: " + dim);
return null;
}
int rows = input.length;
int cols = input[0].length;
if ((rows == 0)||(cols == 0)) return null;
double[] res = null;
if (dim == 1) { // find maximum along columns
res = getMaxValue(transpose(input));
} else {
res = getMaxValue(input);
}
return res;
}
/**
* Finds minimum value in the input matrix
* @param data - input matrix
* @return minimum value
*/
public double getMinValue(double[][] data) {
int n = data.length;
if (n==0) return 0;
int dim = data[0].length;
double res = Double.MAX_VALUE;
for (int i = 0; i < n; i++) {
for (int j = 0; j < dim; j++) {
if (data[i][j] < res) res = data[i][j];
}
}
return res;
}
/**
* Finds a minimum value in of each row in the input matrix
* @param data - input matrix
* @return array of minimum values for each row
*/
public double[] getMinValueInRows(double[][] data) {
int n = data.length;
if (n==0) return null;
double[] res = new double[n];
for (int i = 0; i < n; i++) {
SearchResult<Double> sr = getMinValue(data[i]);
res[i] = sr.getValue();
}
return res;
}
public double getMin(double[] input) {
if (input == null) {
System.out.println("CommonUtilities: getMinValue: The inpur vector is null");
return -1.0;
}
int n = input.length;
if (n == 0) return .0;
double min_value = Double.MAX_VALUE;
for (int i = 0; i < n; i++) {
if (input[i] < min_value) {
min_value = input[i];
}
}
return min_value;
}
public SearchResult<Double> getMinValue(double[] input) {
int n = input.length;
if (n == 0) return null;
double value = Double.MAX_VALUE;
int index = -1;
for (int i = 0; i < n; i++) {
if (input[i] < value) {
value = input[i];
index = i;
}
}
if (verbose) log.info("Minimum value of array input[" + index + "] is " + value);
return new SearchResult<Double>(value, index);
}
/**
* Performs logical a[i]&b[i] with boolean arrays a and b
* @param a - first logical array
* @param b - second logical array
* @return returns a&b
*/
public boolean[] findAandB(boolean[] a, boolean[] b) {
int n = a.length;
if (n != b.length) return null;
boolean[] ab = new boolean[n];
for (int i = 0; i < n; i++) {
ab[i] = a[i]&b[i];
}
return ab;
}
/**
* Add a integer "a" to each element of vector "input"
* @param input - input vector
* @param a - integer
* @return
*/
public int[] addToVector(int[] input, int a) {
if (input==null) return null;
int n = input.length;
if (n==0) return null;
int[] res = new int[n];
for (int i = 0; i < n; i++) res[i] = input[i] + a;
return res;
}
// MATRIX
public double[][] getColumns(double[][] data, int[] indices) {
int n = data.length;
if (n==0) {
System.out.println("CommonUtils: getColumns: No data records");
return null;
}
int dim = indices.length;
if (dim == 0) {
System.out.println("CommonUtils: getColumns: No column indices");
return null;
}
double[][] res = new double[n][dim];
for (int i = 0; i<n; i++) {
for (int j = 0; j < dim; j++) {
int index = indices[j];
res[i][j] = data[i][index];
}
}
return res;
}
public double[] getColumn(double[][] data, int index) {
int n = data.length;
if (n==0) {
System.out.println("CommonUtils: getColumn: No data records");
return null;
}
if (index < 0) {
System.out.println("CommonUtils: getColumn: Column index is not valid, less than 0");
return null;
}
double[] res = new double[n];
for (int i = 0; i<n; i++) res[i] = data[i][index];
return res;
}
/**
* Return a specified column of the input matrix
* @param data - input matrix
* @param index - index of the column
* @return return a column of a matrix
*/
public int[] getColumn(int[][] data, int index) {
int n = data.length;
if (n==0) {
System.out.println("CommonUtils: getColumn: No data records");
return null;
}
if (index < 0) {
System.out.println("CommonUtils: getColumn: Column index is not valid, less than 0");
return null;
}
int[] res = new int[n];
for (int i = 0; i<n; i++) res[i] = data[i][index];
return res;
}
public void fillColumn(int[][] data, int[] column, int index) {
int n = column.length;
if (data.length != n) {
System.out.println("Input matrix and data column has to be the same length");
}
for (int i = 0; i<n; i++) data[i][index] = column[i];
}
/**
* Returns rows selected by rowIndices
* @param data - input data
* @param rowIndices - array of row indices
* @return set (double[][]) of data
*/
public double[][] getRows(double[][] data, int[] rowIndices) {
// error checking
if (rowIndices==null) return null;
int n_rows = rowIndices.length;
if (data.length==0) return null;
int m = data[0].length;
// run
double[][] res = new double[n_rows][m];
for (int i = 0; i < n_rows; i++) {
int row = rowIndices[i];
res[i] = data[row];
}
return res;
}
// Transpose
public double[][] transpose(double[][] input) {
int n = input.length;
int m = input[0].length;
double[][] res = new double[m][n];
for (int i = 0; i < n; i++) {
for (int j = 0; j < m; j++) {
res[j][i] = input[i][j];
}
}
return res;
}
// Invert
/**
* Inverts input matrix, so y[i][j] = 1/x[i][j]
* @param x - input matrix
* @return y - inverted matrix
*/
public double[][] invert(double[][] x) {
int n = x.length;
if (n == 0) return null;
int dim = x[0].length;
if (dim == 0) return null;
// output
double[][] y = new double[n][dim];
for (int i = 0; i < n; i++) {
for (int j = 0; j < dim; j++) {
if (x[i][j] != 0) {
y[i][j] = 1/x[i][j];
} else {
System.out.println("CommonUtils: invert: Divizion by Zero!");
y[i][j] = Double.POSITIVE_INFINITY;
}
}
}
return y;
}
/**
* Counts the number of values in vector x that fall between the elements in the edges vector (which must contain monotonically nondecreasing values)
* @param x
* @param edges
* @return
*/
public int[] histc(double[] x, double[] edges) {
int n = x.length;
if (n ==0) return null;
double[] x_sorted = x.clone();
Arrays.sort(x_sorted);
int n_edges = edges.length;
int[] count = new int[n_edges];
int k = 0;
for (int i = 0; i < n; i++) {
if (k+1 < n_edges) {
if ((x_sorted[i] >= edges[k])&(x_sorted[i] < edges[k+1])) {
count[k]++;
} else {
k++;
i--;
}
} else {
if (x_sorted[i] == edges[k]) {
count[k]++;
} else {
break;
}
}
}
return count;
}
/**
* The linspace function generates linearly spaced vector from a to b
* @param a
* @param b
* @return
*/
public double[] linspace(double a, double b) {
return linspace(a, b, 100);
}
public double[] linspace(double a, double b, int n) {
double[] res = new double[n];
double step = (b-a)/(n-1);
res[0] = a;
for (int i = 1; i < n; i++) res[i] = res[i-1] + step;
return res;
}
// permutations
public int[] getRandomPermutation(int n, int k) {
int[] res = new int[k];
for (int i=0; i<k; i++) {
res[i] = -1;
}
int counter = 1;
// get indices
for (int i=0; i<k; i++) {
while (true) {
int r = rand.nextInt(n);
if (!isContained(res, counter, r)) {
res[i] = r;
counter++;
break;
}
}
}
return res;
}
public boolean isContained(int[] array, int n, int target) {
for (int i=0; i<n; i++) {
if (array[i]==target) return true;
}
return false;
}
// input
/**
* Read data from a file
* @param fileName - name of the data file
* @param n - number of objects to read
* @param dim - number of dimensions
* @return - data matrix double[n][dim]
*/
public double[][] readDataFile(String fileName, int n, int dim) {
double[][] res = new double[n][dim];
try {
BufferedReader in = new BufferedReader(new FileReader(fileName));
String str = "";
for (int i = 0; i < n; i++) {
str = in.readLine();
//System.out.print((i+1) + ": ");
String[] items = str.split(",");
for (int j = 0; j < dim; j++) {
res[i][j] = Double.parseDouble(items[j]);
//System.out.print(res[i][j] + " ");
}
//System.out.print("\n");
}
// close file
in.close();
System.out.println("File has been read: " + fileName);
System.out.println("n: " + n + ", dim: " + dim);
} catch (IOException e) {
System.out.println(e.getMessage());
}
return res;
}
public double[][] readDataFile(String fileName) {
List<Double[]> temp = null;
double[][] res = null;
try {
BufferedReader in = new BufferedReader(new FileReader(fileName));
String line = in.readLine();
int dim = line.split(",").length;
temp = new ArrayList<Double[]>();
while (line != null) {
Double[] d = new Double[dim];
String[] items = line.split(",");
for (int j = 0; j < dim; j++) {
d[j] = Double.parseDouble(items[j]);
//System.out.printf("%5.4f\t", d[j]);
}
temp.add(d);
//System.out.println();
line = in.readLine();
}
// close file
in.close();
// convert from List<Double[]> to double[][]
int n = temp.size();
if (n > 0) {
res = new double[n][dim];
for (int i = 0; i < n; i++) {
Double[] d = temp.get(i);
for (int j = 0; j < dim; j++) {
res[i][j] = d[j];
}
}
System.out.println("File has been read: " + fileName);
System.out.println("n: " + n + ", dim: " + dim);
}
} catch (IOException e) {
System.out.println(e.getMessage());
}
return res;
}
public int[] readVectorFromFile(String fileName) {
List<Integer> temp = new ArrayList<Integer>();
int[] res = null;
try {
BufferedReader in = new BufferedReader(new FileReader(fileName));
String line = in.readLine();
while (line != null) {
int t = Integer.parseInt(line);
temp.add(t);
line = in.readLine();
}
// close file
in.close();
// convert from List<Integer> to int[]
int n = temp.size();
if (n > 0) {
res = new int[n];
for (int i = 0; i < n; i++) res[i] = temp.get(i);
System.out.println("File has been read: " + fileName);
System.out.println("n: " + n);
}
} catch (IOException e) {
System.out.println(e.getMessage());
}
return res;
}
// read sparse matrix
public DoubleMatrix2D readSparseDataFile(String fileName, int n, int dim) {
DoubleMatrix2D res = new SparseDoubleMatrix2D(n, dim);
try {
BufferedReader in = new BufferedReader(new FileReader(fileName));
String str = "";
for (int i = 0; i < n; i++) {
str = in.readLine();
System.out.print((i+1) + ": ");
String[] items = str.split(",");
for (int j = 0; j < dim; j++) {
double temp = Double.parseDouble(items[j]);
res.set(i, j, temp);
System.out.print(temp + " ");
}
System.out.print("\n");
}
// close file
in.close();
} catch (IOException e) {
System.out.println(e.getMessage());
}
return res;
}
// output
public void printMap(Map<Integer, Double> a) {
for (int i=0;i<a.size();i++) System.out.println(i + ": " + a.get(i));
}
public void printVector(int[] a) {
for (int i=0;i<a.length;i++) System.out.println(i + ": " + a[i]);
}
public void printVector(Integer[] a) {
for (int i=0;i<a.length;i++) System.out.println(i + ": " + a[i]);
}
public void printVector(float[] a) {
for (int i=0;i<a.length;i++) System.out.println(i + ": " + a[i]);
}
public void printVector(double[] a) {
for (int i=0;i<a.length;i++) System.out.println(i + ": " + a[i]);
}
public void printVector(Double[] a) {
for (int i=0;i<a.length;i++) System.out.println(i + ": " + a[i]);
}
public void printMatrix(double[][] d, int nrows, int ncols) {
for (int i = 0; i < nrows; i++) {
for (int j = 0; j < ncols; j++) {
//System.out.print(d[i][j] + "\t");
System.out.printf("%5.4f\t", d[i][j]);
}
System.out.print("\n");
}
}
public void printMatrix(double[][] d) {
int rows = d.length;
int cols = d[0].length;
if ((rows == 0)||(cols == 0)) return;
printMatrix(d, rows, cols);
}
public void printMatrix(float[][] d) {
int n = d.length;
if (n == 0) return;
int m = d[0].length;
for (int i = 0; i < n; i++) {
for (int j = 0; j < m; j++) {
System.out.printf("%5.4f\t", d[i][j]);
}
System.out.print("\n");
}
}
public void printMatrix(DoubleMatrix2D d, int nrows, int ncols) {
for (int i = 0; i < nrows; i++) {
for (int j = 0; j < ncols; j++) {
System.out.print(d.getQuick(i, j) + "\t");
}
System.out.print("\n");
}
}
public void printMatrix(int[][] d) {
int nrows = d.length;
if (nrows == 0) return;
int ncols = d[0].length;
if (ncols == 0) return;
for (int i = 0; i < nrows; i++) {
for (int j = 0; j < ncols; j++) {
System.out.printf("%d\t", d[i][j]);
}
System.out.print("\n");
}
}
/**
* Fills input matrix data with value
* @param data - input matrix
* @param value - input value
*/
public void fillMatrix(double[][] data, double value) {
int n = data.length;
if (n == 0) return;
int m = data[0].length;
for (int i = 0; i < n; i++) {
for (int j = 0; j < m; j++) {
data[i][j] = value;
}
}
}
/**
* Randomly shuffles the input data
* @param data - input data
* @return
*/
public double[][] shuffleData(double[][] data) {
int n = data.length;
if (n == 0) return null;
int m = data[0].length;
// get permutations
int index[] = getRandomPermutation(n, n);
// here we going to store the result
double res[][] = new double[n][m];
// for each element
for (int i = 0; i < n; i++) {
res[i] = data[index[i]].clone();
}
return res;
}
/**
* Does z-normalization of the input data set
* @param data
*/
public double[][] znormData(double[][] data) {
int n = data.length;
if (n == 0) return null;
int m = data[0].length;
// output will be here
double[][] res = new double[n][m];
// find mean value for each feature
double[] mean_dim = findMeanVector(data);
// find square of standard deviation
double[] std_dim = findStd(data, mean_dim);
// do normalization x_new = (x-mu)/std
for (int i = 0; i < n; i++) {
for (int j = 0; j < m; j++) {
res[i][j] = (data[i][j]-mean_dim[j])/std_dim[j];
}
}
return res;
}
// write to disk
public void saveToFile(double[][] data, String filename) {
try {
BufferedWriter out = new BufferedWriter(new FileWriter(filename));
int n = data.length;
int dim = data[0].length;
// write
String str = "";
for (int i = 0; i < n; i++) {
str = "";
for (int j = 0; j < dim; j++) {
str += String.format("%5.4f", data[i][j]) + ",";
}
out.write(str.substring(0, str.length()-1) + "\n");
}
// close file
out.close();
} catch (IOException e) {
System.out.println(e.getMessage());
}
}
public void saveToFile(double[] data, String filename) {
if (verbose) System.out.println("Saving data to " + filename);
try {
BufferedWriter out = new BufferedWriter(new FileWriter(filename));
int n = data.length;
// write
String str = "";
for (int i = 0; i < n; i++) {
str = String.format("%5.4f", data[i]) + "\n";
out.write(str);
}
// close file
out.close();
if (verbose) System.out.println("Done saving data");
} catch (IOException e) {
System.out.println(e.getMessage());
}
}
public void saveToFile(float[] data, String filename) {
if (verbose) System.out.println("Saving data to " + filename);
try {
BufferedWriter out = new BufferedWriter(new FileWriter(filename));
int n = data.length;
// write
String str = "";
for (int i = 0; i < n; i++) {
str = String.format("%5.4f", data[i]) + "\n";
out.write(str);
}
// close file
out.close();
if (verbose) System.out.println("Done saving data");
} catch (IOException e) {
System.out.println(e.getMessage());
}
}
}