package it.unito.geosummly.tools; import it.unito.geosummly.BoundingBox; import java.util.ArrayList; import java.util.List; import org.apache.commons.csv.CSVRecord; public class ImportTools { /**Get a list with all elements equal to zero*/ public ArrayList<Double> buildListZero(int size) { ArrayList<Double> toRet=new ArrayList<Double>(); int i=0; while(i<size) { toRet.add(0.0); i++; } return toRet; } /** * Get all the areas of the grid cells */ public ArrayList<Double> getAreas(ArrayList<BoundingBox> data) { ArrayList<Double> areas=new ArrayList<Double>(); for(BoundingBox b: data) areas.add(b.getArea().doubleValue()); return areas; } /** * Get all the areas of the grid cells by considering only the focal points */ public ArrayList<Double> getAreasFromFocalPoints(ArrayList<BoundingBox> data, int size) { ArrayList<Double> areas=new ArrayList<Double>(); double edgeValue = getDistance(data.get(0).getCenterLat().doubleValue(), data.get(0).getCenterLng().doubleValue(), data.get(1).getCenterLat().doubleValue(), data.get(1).getCenterLng().doubleValue()); double areaValue=Math.pow(edgeValue, 2); for(int i=0; i<size; i++) areas.add(areaValue); return areas; } /**Get (as bounding boxes) all the distinct focal coordinates of singles*/ public ArrayList<BoundingBox> getFocalPoints(ArrayList<ArrayList<Double>> matrix) { ArrayList<BoundingBox> bbox=new ArrayList<BoundingBox>(); BoundingBox b=new BoundingBox(); b.setCenterLat( matrix.get(0).get(2) ); b.setCenterLng( matrix.get(0).get(3) ); bbox.add(b); double lat; double lng; for(int i=1;i<matrix.size();i++) { lat=matrix.get(i).get(2); lng=matrix.get(i).get(3); if((matrix.get(i-1).get(2)!=lat) || (matrix.get(i-1).get(3)!=lng)) { b=new BoundingBox(); b.setCenterLat(lat); b.setCenterLng(lng); bbox.add(b); } } return bbox; } /**Group venues occurrences belonging to the same focal points*/ public ArrayList<Double> groupSinglesToCell(BoundingBox b, ArrayList<ArrayList<Double>> matrix) { double value; double cLat=b.getCenterLat().doubleValue(); //focal coordinates of the cell double cLng=b.getCenterLng().doubleValue(); ArrayList<Double> toRet=buildListZero(matrix.get(0).size()); toRet.set(0, cLat); //focal latitude of the cell toRet.set(1, cLng); //focal longitude of the cell //Grouping in cells for(int i=0;i<matrix.size();i++) { ArrayList<Double> record=matrix.get(i); //venues of the same cell if(record.get(0)==cLat && record.get(1)==cLng) { for(int j=2;j<record.size();j++) { //grouping by summing the occurrences value=toRet.get(j)+record.get(j); toRet.set(j, value); } } } return toRet; } /**Get a matrix with frequency values */ public ArrayList<ArrayList<Double>> buildFrequencyMatrix(ArrayList<BoundingBox> data, ArrayList<ArrayList<Double>> venues) { ArrayList<ArrayList<Double>> frequency = new ArrayList<ArrayList<Double>>(); for(BoundingBox b: data) { frequency.add(groupSinglesToCell(b, venues)); } return frequency; } /**Get a matrix with density values */ public ArrayList<ArrayList<Double>> buildDensityMatrix(CoordinatesNormalizationType type, ArrayList<ArrayList<Double>> matrix, ArrayList<Double> area) { ArrayList<ArrayList<Double>> densMatrix = new ArrayList<ArrayList<Double>>(); if(type.equals(CoordinatesNormalizationType.NORM) || type.equals(CoordinatesNormalizationType.NOTNORM)) { ArrayList<Double> densRecord; for(int i=0;i<matrix.size();i++) { densRecord=new ArrayList<Double>(); densRecord.add(matrix.get(i).get(0)); //latitude densRecord.add(matrix.get(i).get(1)); //longitude //first 2 columns are for latitude and longitude, so j=2 for(int j=2;j<matrix.get(i).size();j++) { densRecord.add(matrix.get(i).get(j)/area.get(i)); } densMatrix.add(densRecord); } } else if(type.equals(CoordinatesNormalizationType.MISSING)) { ArrayList<Double> densRecord; for(int i=0;i<matrix.size();i++) { densRecord=new ArrayList<Double>(); for(int j=0;j<matrix.get(i).size();j++) { densRecord.add(matrix.get(i).get(j)/area.get(i)); } densMatrix.add(densRecord); } } return densMatrix; } /**Get a matrix normalized in [0,1]. Before normalization, densities are intra-feature normalized*/ public ArrayList<ArrayList<Double>> buildNormalizedMatrix( CoordinatesNormalizationType type, ArrayList<ArrayList<Double>> matrix) { ArrayList<ArrayList<Double>> intraFeatureMatrix = new ArrayList<ArrayList<Double>>(); ArrayList<ArrayList<Double>> normalizedMatrix = new ArrayList<ArrayList<Double>>(); ArrayList<Double> sumArray; switch (type) { case MISSING: //get all the sums of the features values per column sumArray=getSumArray(0, matrix); //get an intra-feature normalized matrix for(ArrayList<Double> record: matrix) { ArrayList<Double> intraFeatureRecord = getIntraFeatureNormalizationNoCoord(record, sumArray); intraFeatureMatrix.add(intraFeatureRecord); } break; case NORM: //get all the sums of the features values per column sumArray=getSumArray(2, matrix); //it starts from index 2 //because first two are for //lat and lng //get an intra-feature normalized matrix except //for the first two columns (lat and lng) for(ArrayList<Double> record: matrix) { ArrayList<Double> intraFeatureRecord = getIntraFeatureNormalization(record, sumArray); intraFeatureMatrix.add(intraFeatureRecord); } break; case NOTNORM: //get all the sums of the features values per column sumArray=getSumArray(2, matrix); // it starts from index 2 //because first two are for //lat and lng //get an intra-feature normalized matrix except //for the first two columns (lat and lng) for(ArrayList<Double> record: matrix) { ArrayList<Double> intraFeatureRecord = getIntraFeatureNormalization(record, sumArray); intraFeatureMatrix.add(intraFeatureRecord); } break; } //get the arrays of min and max values ArrayList<Double> minArray=getMinArray(intraFeatureMatrix); ArrayList<Double> maxArray=getMaxArray(intraFeatureMatrix); //Shift all the values in [0,1] according to each //min and max value of the column for(ArrayList<Double> record: intraFeatureMatrix) { ArrayList<Double> normalizedRecord=normalizeRow(type, record, minArray, maxArray); normalizedMatrix.add(normalizedRecord); } return normalizedMatrix; } /**Get the total number of elements of all the categories*/ public ArrayList<Double> getSumArray(int start, ArrayList<ArrayList<Double>> matrix) { ArrayList<Double> sumArray=new ArrayList<Double>(); double sum=0; for(int j=start; j<matrix.get(0).size(); j++) { sum=getSum(matrix, j); sumArray.add(sum); } return sumArray; } /**Get the total number of elements of a specific category*/ public double getSum(ArrayList<ArrayList<Double>> matrix, int index) { double sum=0; for(ArrayList<Double> record: matrix) { sum+=record.get(index); } return sum; } /**Get an intra-feature normalized row of the matrix * without considering lat and lng coordinates */ public ArrayList<Double> getIntraFeatureNormalizationNoCoord(ArrayList<Double> record, ArrayList<Double> sumArray) { ArrayList<Double> normalizedRecord=new ArrayList<Double>(); double currentValue=0.0; double normalizedValue=0.0; double denominator=0.0; for(int j=0; j<record.size(); j++) { currentValue=record.get(j); //get the value denominator=sumArray.get(j); //check if denominator is bigger than 0 if(denominator > 0.0) normalizedValue=(currentValue/denominator); //intra-feature normalized value else normalizedValue=0.0; normalizedRecord.add(normalizedValue); } return normalizedRecord; } /**Get an intra-feature normalized row of the matrix*/ public ArrayList<Double> getIntraFeatureNormalization(ArrayList<Double> record, ArrayList<Double> sumArray) { ArrayList<Double> normalizedRecord=new ArrayList<Double>(); double currentValue=0; double denominator=0; double normalizedValue=0; normalizedRecord.add(record.get(0)); //latitude normalizedRecord.add(record.get(1)); //longitude for(int j=2; j<record.size(); j++) { currentValue=record.get(j); denominator=sumArray.get(j-2); //check if denominator is bigger than 0 if(denominator > 0) normalizedValue=currentValue/denominator; //intra-feature normalized value else normalizedValue=0; normalizedRecord.add(normalizedValue); } return normalizedRecord; } /**Get the min value of a column*/ public double getMin(ArrayList<ArrayList<Double>> matrix, int index) { double min=1*Double.MAX_VALUE; double current; for(ArrayList<Double> record: matrix) { current=record.get(index); if(current < min) min=current; } return min; } /**Get min values of all the columns of the matrix*/ public ArrayList<Double> getMinArray(ArrayList<ArrayList<Double>> matrix){ ArrayList<Double> minArray=new ArrayList<Double>(); for(int i=0; i<matrix.get(0).size(); i++) { double min=getMin(matrix, i); minArray.add(min); //max value of column j } return minArray; } /**Get the max value of a column*/ public double getMax(ArrayList<ArrayList<Double>> matrix, int index) { double max=-1*Double.MAX_VALUE; double current; for(ArrayList<Double> record: matrix) { current=record.get(index); if(current>max) max=current; } return max; } /**Get max values of all the columns of the matrix*/ public ArrayList<Double> getMaxArray(ArrayList<ArrayList<Double>> matrix){ ArrayList<Double> maxArray=new ArrayList<Double>(); for(int i=0; i<matrix.get(0).size(); i++) { double max=getMax(matrix, i); maxArray.add(max); //max value of column j } return maxArray; } /**Normalize the values of a row in [0,1] with respect to * their own min and max values */ public ArrayList<Double> normalizeRow(CoordinatesNormalizationType type, ArrayList<Double> array, ArrayList<Double> minArray, ArrayList<Double> maxArray) { ArrayList<Double> normalizedArray=new ArrayList<Double>(); double normalizedValue; double min=0; double max=0; switch (type) { case NORM: for(int i=0;i<array.size();i++) { min=minArray.get(i); max=maxArray.get(i); normalizedValue=normalizeValues(min, max, array.get(i)); normalizedArray.add(normalizedValue); } break; case NOTNORM: normalizedArray.add(array.get(0)); //latitude normalizedArray.add(array.get(1)); //longitude for(int i=2;i<array.size();i++) { min=minArray.get(i); max=maxArray.get(i); normalizedValue=normalizeValues(min, max, array.get(i)); normalizedArray.add(normalizedValue); } break; case MISSING: for(int i=0;i<array.size();i++) { min=minArray.get(i); max=maxArray.get(i); normalizedValue=normalizeValues(min, max, array.get(i)); normalizedArray.add(normalizedValue); } break; } return normalizedArray; } /**Normalize a value in [0,1]*/ public double normalizeValues(double min, double max, double c) { double norm_c=0; if(max!=0 || min!=0) norm_c=(c-min)/(max-min); return norm_c; } /**Get the feature labeled either for * frequency, density or normalized density */ public ArrayList<String> getFeaturesLabel(CoordinatesNormalizationType type, String s, ArrayList<String> features) { ArrayList<String> featuresLabel=new ArrayList<String>(); if(type.equals(CoordinatesNormalizationType.NORM) || type.equals(CoordinatesNormalizationType.NOTNORM)) { String label=""; featuresLabel.add("Timestamps(ms)"); //Timestamps featuresLabel.add("Latitude"); //Latitude featuresLabel.add("Longitude"); //Longitude //first 2 features area lat and lng so i=2 for(int i=2; i<features.size(); i++) { label=s+"("+features.get(i)+")"; featuresLabel.add(label); } } else if(type.equals(CoordinatesNormalizationType.MISSING)) { String label=""; for(int i=2;i<features.size();i++) { label=s+"("+features.get(i)+")"; featuresLabel.add(label); } } return featuresLabel; } /** * Get the header of the dataset of single venues * The header will include only the values focal_latitude, focal_longitude, f1,...,fn. * f1,....,fn are the venue categories. */ public ArrayList<String> getHeader(List<CSVRecord> list) { ArrayList<String> header = new ArrayList<String>(); for(int i=5; i<list.get(0).size(); i++) { header.add(list.get(0).get(i)); } return header; } /** * Get the values of the dataset of single venues * The columns included for the values correspond to * focal_latitude, focal_longitude, f1,...,fn. * f1,....,fn are the venue categories. */ public ArrayList<ArrayList<Double>> getValues(List<CSVRecord> list) { ArrayList<ArrayList<Double>> venues = new ArrayList<ArrayList<Double>>(); ArrayList<Double> rec; //i=1 because of the header for(int i=1;i<list.size();i++) { rec = new ArrayList<Double>(); //we don't consider timestamp, been_here, venue_id, venue_lat, venue_lng for(int j=5;j<list.get(i).size();j++) { rec.add(Double.parseDouble(list.get(i).get(j))); } venues.add(rec); } return venues; } /** Haversine formula implementation. It returns the distance between * two points given latitude and longitude values in Km */ public double getDistance(double lat1, double lng1, double lat2, double lng2){ double earthRadius = 6372.8; //in Km double dLat = Math.toRadians(lat2-lat1); double dLng = Math.toRadians(lng2-lng1); double a = Math.sin(dLat/2) * Math.sin(dLat/2) + Math.cos(Math.toRadians(lat1)) * Math.cos(Math.toRadians(lat2)) * Math.sin(dLng/2) * Math.sin(dLng/2); double c = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1-a)); double dist = earthRadius * c; //round to the 3rd digit return Math.floor(dist*1000)/1000; } public static void main(String[] args) { ImportTools tools = new ImportTools(); System.out.println(tools.getDistance(36.12, -86.67, 33.94, -118.40)); } }