/* * Apache License * Version 2.0, January 2004 * http://www.apache.org/licenses/ * * Copyright 2013 Aurelian Tutuianu * Copyright 2014 Aurelian Tutuianu * Copyright 2015 Aurelian Tutuianu * Copyright 2016 Aurelian Tutuianu * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package rapaio.core.correlation; import rapaio.core.stat.Mean; import rapaio.core.stat.Variance; import rapaio.data.Frame; import rapaio.data.Mapping; import rapaio.data.Var; import rapaio.printer.Printable; import java.util.Arrays; import java.util.List; import java.util.stream.IntStream; import static rapaio.sys.WS.*; /** * /** * Pearson product-moment correlation coefficient. * <p> * See * http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient * <p> * User: <a href="mailto:padreati@yahoo.com">Aurelian Tutuianu</a> */ public class CorrPearson implements Printable { public static CorrPearson from(Frame df) { return new CorrPearson(df); } public static CorrPearson from(Var... vars) { return new CorrPearson(vars); } private final String[] names; private final double[][] pearson; private CorrPearson(Frame df) { List<Var> varList = df.varList(); this.names = df.varNames(); this.pearson = new double[varList.size()][varList.size()]; for (int i = 0; i < df.varCount(); i++) { pearson[i][i] = 1; for (int j = i + 1; j < varList.size(); j++) { pearson[i][j] = compute(varList.get(i), varList.get(j)); pearson[j][i] = pearson[i][j]; } } } private CorrPearson(Var... vars) { List<Var> varList = Arrays.asList(vars); this.names = new String[vars.length]; for (int i = 0; i < names.length; i++) { names[i] = vars[i].name(); if (names[i].isEmpty()) names[i] = "V" + i; } this.pearson = new double[vars.length][vars.length]; for (int i = 0; i < vars.length; i++) { pearson[i][i] = 1; for (int j = i + 1; j < vars.length; j++) { pearson[i][j] = compute(vars[i], vars[j]); pearson[j][i] = pearson[i][j]; } } } private double compute(Var x, Var y) { double sum = 0; int len = Math.min(x.rowCount(), y.rowCount()); Mapping map = Mapping.copy(IntStream.range(0, len) .filter(i -> !(x.missing(i) || y.missing(i))) .toArray()); double xMean = Mean.from(x.mapRows(map)).value(); double yMean = Mean.from(y.mapRows(map)).value(); double sdp = Variance.from(x.mapRows(map)).sdValue() * Variance.from(y.mapRows(map)).sdValue(); for (int i = 0; i < map.size(); i++) { int pos = map.get(i); sum += ((x.value(pos) - xMean) * (y.value(pos) - yMean)); } return sdp == 0 ? 0.0 : sum / (sdp * (map.size() - 1)); } public double[][] values() { return pearson; } public double singleValue() { if (names.length == 1) return 1; return pearson[0][1]; } @Override public String summary() { StringBuilder sb = new StringBuilder(); switch (names.length) { case 1: summaryOne(sb); break; case 2: summaryTwo(sb); break; default: summaryMore(sb); } return sb.toString(); } private void summaryOne(StringBuilder sb) { sb.append(String.format("\n" + "> pearson[%s] - Pearson product-moment correlation coefficient\n", names[0])); sb.append("1\n"); sb.append("pearson correlation is 1 for identical vectors\n"); } private void summaryTwo(StringBuilder sb) { sb.append(String.format("\n" + "> pearson[%s, %s] - Pearson product-moment correlation coefficient\n", names[0], names[1])); sb.append(formatFlex(pearson[0][1])).append("\n"); } private void summaryMore(StringBuilder sb) { sb.append(String.format("\n" + "> pearson[%s] - Pearson product-moment correlation coefficient\n", Arrays.deepToString(names))); String[][] table = new String[names.length + 1][names.length + 1]; table[0][0] = ""; for (int i = 1; i < names.length + 1; i++) { table[0][i] = i + "."; table[i][0] = i + "." + names[i - 1]; for (int j = 1; j < names.length + 1; j++) { table[i][j] = formatShort(pearson[i - 1][j - 1]); if (i == j) { table[i][j] = "x"; } } } int width = getPrinter().textWidth(); int start = 0; int end = start; int[] ws = new int[table[0].length]; for (int i = 0; i < table.length; i++) { for (int j = 0; j < table[0].length; j++) { ws[i] = Math.max(ws[i], table[i][j].length()); } } while (start < names.length + 1) { int w = 0; while ((end < (table[0].length - 1)) && ws[end + 1] + w + 1 < width) { w += ws[end + 1] + 1; end++; } for (int j = 0; j < table.length; j++) { for (int i = start; i <= end; i++) { sb.append(String.format("%" + ws[i] + "s", table[i][j])).append(" "); } sb.append("\n"); } start = end + 1; } } }