package it.unito.geosummly.tools;
import it.unito.geosummly.BoundingBox;
import it.unito.geosummly.utils.Pair;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map.Entry;
import java.util.Random;
import java.util.Set;
import java.util.Vector;
import jp.ndca.similarity.distance.Jaccard;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.math3.distribution.NormalDistribution;
public class EvaluationTools {
public EvaluationTools() {
}
/**
* Fill in the matrix of aggregate (frequency) values from a list of CSV
* records. The header won't be considered. Timestamp, latitude and
* longitude columns won't be considered.
*/
public ArrayList<ArrayList<Double>> buildAggregatesFromList(
List<CSVRecord> list) {
ArrayList<ArrayList<Double>> matrix = new ArrayList<ArrayList<Double>>();
// remove the header, so i=1
for (int k = 1; k < list.size(); k++) {
ArrayList<Double> rec = new ArrayList<Double>();
// remove timestamp, latitude and longitude columns, so j=3
for (int j = 3; j < list.get(k).size(); j++)
rec.add(Double.parseDouble(list.get(k).get(j)));
matrix.add(rec);
}
return matrix;
}
/**
* Fill in the matrix of single venues from a list of CSV records. The
* header won't be considered.
*/
public ArrayList<ArrayList<String>> buildSinglesFromList(
List<CSVRecord> list) {
ArrayList<ArrayList<String>> matrix = new ArrayList<ArrayList<String>>();
//remove the header, so i=1
for (int k=1; k<list.size(); k++) {
ArrayList<String> rec = new ArrayList<String>();
for (int j=0; j<list.get(k).size(); j++)
rec.add(list.get(k).get(j));
matrix.add(rec);
}
return matrix;
}
/**
* Fill in the list of features from a list of CSV records. Timestamp column
* won't be considered.
*/
public ArrayList<String> getFeaturesFromListC(List<CSVRecord> list) {
ArrayList<String> features = new ArrayList<String>();
// remove timestamp column, so i=1
for (int i = 1; i < list.get(0).size(); i++) {
features.add(list.get(0).get(i));
}
return features;
}
/**
* Fill in the random matrix of aggregate (frequency) values from the
* original matrix of aggregate (frequency) values.
*/
public ArrayList<ArrayList<Double>> buildFrequencyRandomMatrix(int size,
ArrayList<Double> minArray, ArrayList<Double> maxArray) {
ArrayList<ArrayList<Double>> frm = new ArrayList<ArrayList<Double>>();
ArrayList<Double> randomRecord;
double randomValue;
int min;
int max;
int i = 0;
int j = 0;
// matrix.size() records per matrix
while (i < size) {
randomRecord = new ArrayList<Double>();
// get randomly the feature values
while (j < minArray.size()) {
min = minArray.get(j).intValue();
max = maxArray.get(j).intValue();
randomValue = min + (int) (Math.random() * (max - min + 1)); // random
// number
// from
// min
// to
// max
// included
randomRecord.add(randomValue);
j++;
}
frm.add(randomRecord);
j = 0;
i++;
}
return frm;
}
/**
* Fill in the list of features from a list of CSV records. Timestamp, been
* here, venue id, venue latitude, venue longitude, centroid latitude and
* centroid longitude columns won't be considered.
*/
public ArrayList<String> getFeaturesFromListV(List<CSVRecord> list) {
ArrayList<String> features = new ArrayList<String>();
// Don't consider timestamp, been here, venue id, venueLat, venueLng,
// focalLat, focalLng, so k=7
for (int i = 7; i < list.get(0).size(); i++)
features.add(list.get(0).get(i));
return features;
}
/**
* Get the header of a list of CSV records.
*/
public ArrayList<String> getHeaderFromList(List<CSVRecord> list) {
ArrayList<String> header = new ArrayList<String>();
for (int i=0; i < list.get(0).size(); i++)
header.add(list.get(0).get(i));
return header;
}
/**
* Fill in the list of timestamp values from a list of CSV records.
*/
public ArrayList<Long> getTimestampsFromList(List<CSVRecord> list) {
ArrayList<Long> timestamps = new ArrayList<Long>();
// we don't have to consider the header so i=1
for (int i = 1; i < list.size(); i++)
timestamps.add(Long.parseLong(list.get(i).get(0)));
return timestamps;
}
/**
* Create the folds for cross-validation. The result will be a list of fnum
* matrices of singles with N/fnum random venues for each matrix.
*/
public ArrayList<ArrayList<ArrayList<Double>>> createFolds(
ArrayList<ArrayList<Double>> matrix, int fnum) {
ArrayList<ArrayList<ArrayList<Double>>> allMatrices =
new ArrayList<ArrayList<ArrayList<Double>>>();
ArrayList<ArrayList<Double>> ithMatrix;
int dimension = matrix.size() / fnum;
int randomValue;
Random random = new Random();
for(int i=0; i<fnum; i++) {
ithMatrix = new ArrayList<ArrayList<Double>>();
for(int j=0; j<dimension; j++) {
randomValue = random.nextInt(matrix.size()); // random number
// between 0
// (included)
// and current
// matrix.size()
// (excluded)
ithMatrix.add(matrix.get(randomValue));
}
allMatrices.add(ithMatrix);
matrix.removeAll(ithMatrix);
}
return allMatrices;
}
/**
* Remove the columns: timestamp, been_here_venue_id,
* venue_latitude and venue_longitude from the holdout
*/
public ArrayList<ArrayList<ArrayList<Double>>> removeVenueInformations(
ArrayList<ArrayList<ArrayList<String>>> holdout) {
ArrayList<ArrayList<ArrayList<Double>>> newHoldout =
new ArrayList<ArrayList<ArrayList<Double>>>();
ArrayList<ArrayList<Double>> newSet;
ArrayList<Double> newRecord;
for(ArrayList<ArrayList<String>> set: holdout) {
//create the new set
newSet = new ArrayList<ArrayList<Double>>();
for(ArrayList<String> rec: set) {
newRecord = new ArrayList<Double>();
//create the new record without timestamp, been_here,
//venue_id, venue_lat, venue_lng
for(int i=5; i<rec.size(); i++) {
newRecord.add(Double.parseDouble(rec.get(i)));
}
newSet.add(newRecord);
}
//add the set to the holdout list
newHoldout.add(newSet);
}
return newHoldout;
}
/**Get (as bounding boxes) all the distinct focal coordinates of singles*/
/*public ArrayList<BoundingBox> getFocalPoints(ArrayList<ArrayList<String>> matrix) {
ArrayList<BoundingBox> bbox=new ArrayList<BoundingBox>();
BoundingBox b=new BoundingBox();
b.setCenterLat(new BigDecimal(matrix.get(0).get(5)));
b.setCenterLng(new BigDecimal(matrix.get(0).get(6)));
bbox.add(b);
BigDecimal lat;
BigDecimal lng;
for(int i=1;i<matrix.size();i++) {
lat = new BigDecimal(matrix.get(i).get(5));
lng = new BigDecimal(matrix.get(i).get(6));
//control only the previous venue because only
//consecutive venues have the same focal points
if(!(new BigDecimal(matrix.get(i-1).get(5)).equals(lat)) ||
!(new BigDecimal(matrix.get(i-1).get(6)).equals(lng))) {
b=new BoundingBox();
b.setCenterLat(lat);
b.setCenterLng(lng);
bbox.add(b);
}
}
return bbox;
}*/
/** Haversine formula implementation. It returns the distance in kilometers between
* two points given their latitude and longitude values
*/
public double getDistance(double lat1, double lng1, double lat2, double lng2){
double earthRadius = 6371; //in km
double dLat = Math.toRadians(lat2-lat1);
double dLng = Math.toRadians(lng2-lng1);
double a = Math.sin(dLat/2) * Math.sin(dLat/2) +
Math.cos(Math.toRadians(lat1)) * Math.cos(Math.toRadians(lat2)) *
Math.sin(dLng/2) * Math.sin(dLng/2);
double c = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1-a));
double dist = earthRadius * c;
DecimalFormat df = new DecimalFormat("#.###");
String value = df.format(dist);
value = value.replace(",", ".");
return Double.parseDouble(value);
}
/**
* Get all the areas in (squared kilometers) of the grid cells
* by considering only the focal points
*/
/*public ArrayList<Double> getAreasFromFocalPoints(ArrayList<BoundingBox> data,
int size) {
ArrayList<Double> areas=new ArrayList<Double>();
double edgeValue = getDistance(data.get(0).getCenterLat().doubleValue(),
data.get(0).getCenterLng().doubleValue(),
data.get(1).getCenterLat().doubleValue(),
data.get(1).getCenterLng().doubleValue());
double areaValue=Math.pow(edgeValue, 2);
for(int i=0; i<size ;i++)
areas.add(areaValue);
return areas;
}*/
public ArrayList<Double> getAreas(ArrayList<BoundingBox> data) {
ArrayList<Double> areas = new ArrayList<Double>();
for(BoundingBox b: data) {
areas.add(b.getArea().doubleValue());
}
return areas;
}
/**Get a list with all elements equal to zero*/
public ArrayList<Double> buildListZero(int size) {
ArrayList<Double> toRet=new ArrayList<Double>();
int i=0;
while(i<size) {
toRet.add(0.0);
i++;
}
return toRet;
}
/**Group venues occurrences belonging to the same focal points*/
public ArrayList<Double> groupSinglesToCell(BoundingBox b, ArrayList<ArrayList<Double>> matrix) {
double value;
double cLat=b.getCenterLat().doubleValue(); //focal coordinates of the cell
double cLng=b.getCenterLng().doubleValue();
ArrayList<Double> toRet=buildListZero(matrix.get(0).size());
toRet.set(0, cLat); //focal latitude of the cell
toRet.set(1, cLng); //focal longitude of the cell
//Grouping in cells
for(int i=0;i<matrix.size();i++) {
ArrayList<Double> record=matrix.get(i);
//venues of the same cell
if(record.get(0)==cLat && record.get(1)==cLng) {
for(int j=2;j<record.size();j++) {
value=toRet.get(j)+record.get(j); //grouping by summing the occurrences
toRet.set(j, value);
}
}
}
return toRet;
}
/**
* Group the folds from single venues to cells.
*/
public ArrayList<ArrayList<ArrayList<Double>>> groupFolds(
ArrayList<BoundingBox> data,
ArrayList<ArrayList<ArrayList<Double>>> allMatrices) {
ArrayList<ArrayList<ArrayList<Double>>> allGrouped =
new ArrayList<ArrayList<ArrayList<Double>>>();
ArrayList<ArrayList<Double>> ithGrouped;
for (ArrayList<ArrayList<Double>> m : allMatrices) {
ithGrouped = new ArrayList<ArrayList<Double>>();
for (BoundingBox b : data) {
ithGrouped.add(groupSinglesToCell(b, m));
}
allGrouped.add(ithGrouped);
}
return allGrouped;
}
/**
* Check whether all the cells exist in the holdouts
*/
public ArrayList<ArrayList<ArrayList<Double>>> checkCells(ArrayList<BoundingBox> data,
ArrayList<ArrayList<ArrayList<Double>>> initialSets) {
ArrayList<ArrayList<ArrayList<Double>>> sets =
new ArrayList<ArrayList<ArrayList<Double>>>();
double cLat = 0;
double cLng = 0;
boolean maxLength = false;
//For each set
for(ArrayList<ArrayList<Double>> set : initialSets) {
//For each bbox
for(int i=0; i<data.size() && !maxLength; i++) {
//If I've still cells in the set to control
if(i < set.size()) {
cLat = data.get(i).getCenterLat().doubleValue();
cLng = data.get(i).getCenterLng().doubleValue();
//Check if the ith bbox coordinates match with the ith cell of the set.
//If no, then add the cell
if( ! (set.get(i).get(0)==cLat &&
set.get(i).get(1)==cLng) ) {
ArrayList<Double> record = buildListZero(10);
record.add(0, cLat);
record.add(1, cLng);
set.add(i, record);
}
}
//Else add the rest of the cells to the set
else {
maxLength = true;
for(; i<data.size(); i++) {
cLat = data.get(i).getCenterLat().doubleValue();
cLng = data.get(i).getCenterLng().doubleValue();
ArrayList<Double> record = buildListZero(10);
record.add(0, cLat);
record.add(1, cLng);
set.add(i, record);
}
}
}
maxLength = false;
cLat = 0;
cLng = 0;
sets.add(set);
}
return sets;
}
/**
* Compute the Jaccard similarity coefficient between the holdouts
*/
public StringBuilder computeJaccard(
List<HashMap<String, Vector<Integer>>> holdoutList) {
StringBuilder builder = new StringBuilder();
Jaccard jacc = new Jaccard();
Double jaccOnLabels = 0.0;
Double jaccOnSet = 0.0;
HashMap<String, Vector<Double>> jaccOnSets = new HashMap<>();
int iterations = 0;
for (int i = 0; i < holdoutList.size() - 1; i++) {
HashMap<String, Vector<Integer>> ho1 = holdoutList.get(i);
for (int j = i + 1; j < holdoutList.size(); j++) {
builder.append("pair (" + i + "," + j + ")\n");
HashMap<String, Vector<Integer>> ho2 = holdoutList.get(j);
builder.append("\tjaccard_labels="
+ jacc.calc(ho1.keySet().toArray(), ho2.keySet()
.toArray()) + "\n");
// get cluster names from first set
Set<String> cluster_names = new HashSet<String>();
cluster_names.addAll(ho1.keySet());
cluster_names.addAll(ho2.keySet());
Double jaccOnPair = 0.0;
for (String name : cluster_names) {
Vector<Integer> ho1_objects = (ho1.get(name) == null) ? new Vector<Integer>()
: ho1.get(name);
Vector<Integer> ho2_objects = (ho2.get(name) == null) ? new Vector<Integer>()
: ho2.get(name);
builder.append("\tjaccard_on_set(" + name + ")="
+ jacc.calc(ho1_objects, ho2_objects) + "\n");
if (!jaccOnSets.containsKey(name)) {
Vector<Double> v = new Vector<>();
v.add(jacc.calc(ho1_objects, ho2_objects));
jaccOnSets.put(name, v);
} else {
Vector<Double> v = jaccOnSets.get(name);
jaccOnSets.remove(name);
v.add(jacc.calc(ho1_objects, ho2_objects));
jaccOnSets.put(name, v);
}
jaccOnPair += jacc.calc(ho1_objects, ho2_objects);
}
builder.append("\tjaccard_on_set_average=" + jaccOnPair
/ cluster_names.size() + "\n");
jaccOnLabels += jacc.calc(ho1.keySet().toArray(), ho2.keySet()
.toArray());
jaccOnSet += jaccOnPair / cluster_names.size();
iterations++;
}
}
builder.append("#####\n# Totals\n####\n");
builder.append("avg_jaccard_labels=" + jaccOnLabels / iterations + "\n");
builder.append("avg_jaccard_objects=" + jaccOnSet / iterations + "\n");
for (Entry<String, Vector<Double>> entry : jaccOnSets.entrySet()) {
Double counter = 0.0;
for (Double d : entry.getValue()) {
counter += d;
}
builder.append("avg_jaccard_cluster(" + entry.getKey() + ")="
+ counter / entry.getValue().size() + "\n");
}
return builder;
}
/**
* Compute the Jaccard similarity coefficient between the pair of holdouts, computed
* in a x-fold cross validation
*/
public StringBuilder computeJaccard2(List<Pair<?,?>> folds)
{
StringBuilder builder = new StringBuilder();
Jaccard jacc = new Jaccard();
Double jaccOnLabels = 0.0;
Double jaccOnSet = 0.0;
HashMap<String, Vector<Double>> jaccOnSets = new HashMap<>();
for (int i=0; i < folds.size(); i++)
{
Pair<?,?> fold = folds.get(i);
// locally copy set A
HashMap<String, Vector<Integer>> ho1 = (HashMap<String, Vector<Integer>>) fold.getFirst();
// locally copy set B
HashMap<String, Vector<Integer>> ho2 = (HashMap<String, Vector<Integer>>) fold.getSecond();
builder.append("pair (A,B) of fold=" + (i+1) + "\n");
builder.append("\tjaccard_labels=" +
jacc.calc(ho1.keySet().toArray(), ho2.keySet().toArray()) + "\n");
// get cluster names from first set
Set<String> cluster_names = new HashSet<String>();
cluster_names.addAll(ho1.keySet());
// get cluster names from second set
cluster_names.addAll(ho2.keySet());
Double jaccOnPair = 0.0;
for (String name : cluster_names)
{
Vector<Integer> ho1_objects = (ho1.get(name) == null) ? new Vector<Integer>()
: ho1.get(name);
Vector<Integer> ho2_objects = (ho2.get(name) == null) ? new Vector<Integer>()
: ho2.get(name);
builder.append("\tjaccard_on_set(" + name + ")="
+ jacc.calc(ho1_objects, ho2_objects) + "\n");
if (!jaccOnSets.containsKey(name)) {
Vector<Double> v = new Vector<>();
v.add(jacc.calc(ho1_objects, ho2_objects));
jaccOnSets.put(name, v);
} else {
Vector<Double> v = jaccOnSets.get(name);
jaccOnSets.remove(name);
v.add(jacc.calc(ho1_objects, ho2_objects));
jaccOnSets.put(name, v);
}
jaccOnPair += jacc.calc(ho1_objects, ho2_objects);
}
builder.append("\tjaccard_on_set_average=" +
jaccOnPair / cluster_names.size() + "\n");
jaccOnLabels += jacc.calc(ho1.keySet().toArray(), ho2.keySet()
.toArray());
jaccOnSet += jaccOnPair / cluster_names.size();
}
//end x folds
builder.append("#####\n# Totals\n####\n");
builder.append("avg_jaccard_labels=" + jaccOnLabels / folds.size() + "\n");
builder.append("avg_jaccard_objects=" + jaccOnSet / folds.size() + "\n");
for (Entry<String, Vector<Double>> entry : jaccOnSets.entrySet())
{
Double counter = 0.0;
for (Double d : entry.getValue()) {
counter += d;
}
builder.append("avg_jaccard_cluster(" + entry.getKey() + ")="
+ counter / entry.getValue().size() + "\n");
}
return builder;
}
/**
* Compute the ratio in percentage between the SSE of clustering output on
* the entire dataset (real_SSE) and the minimum SSE value of correctness
* experiment (random_SSE). discard= (real_SSE * 100) / random_SSE
*/
public double getSSERatio(ArrayList<Double> SSEs, double cl_sse)
{
Collections.sort(SSEs);
double min = SSEs.get(0); // get the minimum of SSEs
double discard = (cl_sse * 100) / min;
return discard;
}
public double getPvalue(ArrayList<Double> SSEs, double cl_sse) {
Collections.sort(SSEs);
// for (Double sse : SSEs)
// System.out.println("EvaluationTools425: " + sse);
double mean = getMean(SSEs.toArray(new Double[] {}));
double std = getStd( getVariance(SSEs.toArray(new Double[] {}), mean) );
/*
68,3% = P{ μ - σ < X < μ + σ }
95,0% = P{ μ - 1,96 σ < X < μ + 1,96 σ }
95,5% = P{ μ - 2 σ < X < μ + 2 σ }
99,0% = P{ μ - 2,58 σ < X < μ + 2,58 σ }
99,7% = P{ μ - 3 σ < X < μ + 3 σ }
*/
//http://commons.apache.org/proper/commons-math/apidocs/org/apache/commons/math3/distribution/NormalDistribution.html
NormalDistribution norm = new NormalDistribution(mean, std);
double density = norm.density(cl_sse);
// System.out.println("EvaluationTools439: " + density + " having mean=" + mean + " and std=" + std);
return density;
}
private double getVariance(Double[] array, double mean)
{
double value=0;
for(Double d: array) {
value+=(mean-d)*(mean-d);
}
return value/array.length;
}
/** Change the feature label by replacing 'old' with 'last' */
public ArrayList<String> changeFeaturesLabel(String old,
String last,
ArrayList<String> features) {
String label = "";
ArrayList<String> featuresLabel = new ArrayList<String>();
for (int i=0; i<features.size(); i++) {
//remove character and parenthesis
label = features.get(i).replaceFirst(old, last)
.replaceAll("\\(", "").replaceAll("\\)", "");
featuresLabel.add(label);
}
return featuresLabel;
}
/**
* Get the feature labeled either for frequency, density or normalized
* density without timestamp column
*/
public ArrayList<String> getFeaturesLabelNoTimestamp(
CoordinatesNormalizationType type,
String s,
ArrayList<String> features) {
ArrayList<String> featuresLabel = new ArrayList<String>();
if (type.equals(CoordinatesNormalizationType.NORM) ||
type.equals(CoordinatesNormalizationType.NOTNORM)) {
String label = "";
featuresLabel.add("Latitude");
featuresLabel.add("Longitude");
for (int i = 0; i < features.size(); i++) {
label = s + "(" + features.get(i) + ")";
featuresLabel.add(label);
}
} else if (type.equals(CoordinatesNormalizationType.MISSING)) {
String label = "";
for (int i = 0; i < features.size(); i++) {
label = s + "(" + features.get(i) + ")";
featuresLabel.add(label);
}
}
return featuresLabel;
}
public ArrayList<ArrayList<Double>> build(List<CSVRecord> list) {
ArrayList<ArrayList<Double>> matrix = new ArrayList<ArrayList<Double>>();
//remove the header, so i=1
for(int k=1; k<list.size(); k++) {
ArrayList<Double> rec = new ArrayList<Double>();
//remove timestamp comlumn, so j=1
for(int j=1; j<list.get(k).size(); j++)
rec.add(Double.parseDouble(list.get(k).get(j)));
matrix.add(rec);
}
return matrix;
}
public ArrayList<String> getFeaturesLabel(String s, ArrayList<String> features) {
ArrayList<String> featuresLabel=new ArrayList<String>();
String label="";
featuresLabel.add("Latitude"); //Latitude
featuresLabel.add("Longitude"); //Longitude
//first 2 features area lat and lng so i=2
for(int i=2; i<features.size(); i++) {
label=s+"("+features.get(i)+")";
featuresLabel.add(label);
}
return featuresLabel;
}
public ArrayList<ArrayList<Double>> buildNorm(ArrayList<ArrayList<Double>> matrix) {
Random r = new Random();
int r_size = matrix.get(0).size()-2; //we don't considerate coordinates
ArrayList<ArrayList<Double>> randomNorm = new ArrayList<ArrayList<Double>>();
for(ArrayList<Double> record: matrix) {
ArrayList<Double> randomRec = new ArrayList<Double>();
randomRec.add(record.get(0)); //latitude
randomRec.add(record.get(1)); //longitude
int i=0;
while (i < r_size)
{
double value = r.nextDouble(); //r.nextGaussian();
//increment record position only if the value is between 0 and 1 included
if( (value >= 0.0) && (value <= 1.0) ) {
randomRec.add(value);
i++;
}
}
randomNorm.add(randomRec);
}
return randomNorm;
}
public ArrayList<ArrayList<Double>> buildNormalizedUniformly(ArrayList<ArrayList<Double>> matrix)
{
Random r = new Random();
int n_rows = matrix.size();
int n_columns = matrix.get(0).size();
ArrayList<ArrayList<Double>> randomNorm = new ArrayList<ArrayList<Double>>();
for (int i=0; i<n_rows; i++){
ArrayList<Double> randomRec = new ArrayList<Double>();
for (int j=0; j<n_columns; j++)
randomRec.add(r.nextDouble());
randomNorm.add(randomRec);
}
return randomNorm;
}
public double getMean(Double[] array)
{
double total = 0;
for(int i = 0; i < array.length; i++){
total += array[i]; // this is the calculation for summing up all the values
}
return total / (1.0 *array.length); //mean
}
public double getStd(double variance) {
return Math.sqrt(variance);
}
//TODO
//yet to complete
public ArrayList<ArrayList<ArrayList<String>>> doHoldOut(
ArrayList<ArrayList<String>> matrix,
int num) {
ArrayList<ArrayList<ArrayList<String>>> result =
new ArrayList<ArrayList<ArrayList<String>>>();
ArrayList<ArrayList<String>> ithMatrix;
int dimension = matrix.size() / num;
int randomValue;
Random random = new Random();
for(int i=0; i<num; i++) {
ithMatrix = new ArrayList<ArrayList<String>>();
for(int j=0; j<dimension; j++) {
randomValue = random.nextInt(matrix.size()); // random number
// between 0
// (included)
// and current
// matrix.size()
// (excluded)
ithMatrix.add(matrix.get(randomValue));
}
result.add(ithMatrix);
matrix.removeAll(ithMatrix);
}
return result;
}
public ArrayList<ArrayList<ArrayList<Double>>>
doHoldoutDensity(ArrayList<ArrayList<Double>> matrix) {
ArrayList<ArrayList<ArrayList<Double>>> result =
new ArrayList<ArrayList<ArrayList<Double>>>();
ArrayList<ArrayList<Double>> setA = new ArrayList<ArrayList<Double>>();
ArrayList<ArrayList<Double>> setB = new ArrayList<ArrayList<Double>>();
// Normal (0.5, 0.1)
// value ranges: [0 ... 1] aprox
NormalDistribution normal = new NormalDistribution(0.5, 0.1);
for(ArrayList<Double> record: matrix) {
ArrayList<Double> recA = new ArrayList<Double>();
ArrayList<Double> recB = new ArrayList<Double>();
recA.add(record.get(0)); //lat for set A
recA.add(record.get(1)); //lng
recB.add(record.get(0)); //lat for set B
recB.add(record.get(1)); //lng
//fill the rest of the record
for(int i=2; i<record.size();i++) {
//the Math.abs can isn't needed
double d = Math.abs( normal.sample() * record.get(i) ) ;
recA.add(d);
recB.add(record.get(i)-d);
}
setA.add(recA);
setB.add(recB);
}
result.add(setA);
result.add(setB);
return result;
}
}