/*
* Apache License
* Version 2.0, January 2004
* http://www.apache.org/licenses/
*
* Copyright 2013 Aurelian Tutuianu
* Copyright 2014 Aurelian Tutuianu
* Copyright 2015 Aurelian Tutuianu
* Copyright 2016 Aurelian Tutuianu
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package rapaio.core.correlation;
import rapaio.data.*;
import rapaio.data.filter.var.VFRefSort;
import rapaio.printer.Printable;
import java.util.Arrays;
import java.util.stream.IntStream;
import static rapaio.sys.WS.*;
/**
* Spearman's rank correlation coefficient.
* <p>
* You can compute coefficient for multiple vectors at the same time.
* <p>
* See: http://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient
* <p>
* User: <a href="mailto:padreati@yahoo.com">Aurelian Tutuianu</a>
*/
public class CorrSpearman implements Printable {
public static CorrSpearman from(Frame df) {
return new CorrSpearman(df);
}
public static CorrSpearman from(Var... vars) {
return new CorrSpearman(vars);
}
private final String[] names;
private final Var[] vars;
private final double[][] rho;
private CorrSpearman(Var... vars) {
int rowCount = Integer.MAX_VALUE;
for (Var var : vars) {
rowCount = Math.min(var.rowCount(), rowCount);
}
Mapping map = Mapping.copy(IntStream.range(0, rowCount)
.filter(row -> {
for (Var var : vars) {
if (var.missing(row))
return false;
}
return true;
})
.toArray());
this.names = new String[vars.length];
for (int i = 0; i < names.length; i++) {
names[i] = "V" + i;
}
this.vars = new Var[vars.length];
for (int i = 0; i < vars.length; i++) {
this.vars[i] = vars[i].mapRows(map);
}
this.rho = compute();
}
private CorrSpearman(Frame df) {
Mapping map = Mapping.copy(IntStream.range(0, df.rowCount())
.filter(row -> !df.missing(row))
.toArray());
this.names = df.varNames();
this.vars = new Var[df.varCount()];
for (int i = 0; i < df.varCount(); i++) {
vars[i] = df.var(i).mapRows(map);
}
this.rho = compute();
}
private double[][] compute() {
Var[] sorted = new Var[vars.length];
Var[] ranks = new Var[vars.length];
for (int i = 0; i < sorted.length; i++) {
Index index = Index.seq(vars[i].rowCount());
sorted[i] = new VFRefSort(RowComparators.numeric(vars[i], true)).fitApply(index);
ranks[i] = Numeric.fill(vars[i].rowCount());
}
// compute ranks
for (int i = 0; i < sorted.length; i++) {
int start = 0;
while (start < sorted[i].rowCount()) {
int end = start;
while (end < sorted[i].rowCount() - 1 && sorted[i].value(end) == sorted[i].value(end + 1)) {
end++;
}
double value = 1 + (start + end) / 2.;
for (int j = start; j <= end; j++) {
ranks[i].setValue(sorted[i].index(j), value);
}
start = end + 1;
}
}
// compute Pearson on ranks
return CorrPearson.from(ranks).values();
}
public double[][] values() {
return rho;
}
@Override
public String summary() {
StringBuilder sb = new StringBuilder();
switch (vars.length) {
case 1:
summaryOne(sb);
break;
case 2:
summaryTwo(sb);
break;
default:
summaryMore(sb);
}
return sb.toString();
}
private void summaryOne(StringBuilder sb) {
sb.append(String.format("\n" +
"> spearman[%s] - Spearman's rank correlation coefficient\n", names[0]));
sb.append("1\n");
sb.append("spearman's rank correlation is 1 for identical vectors\n");
}
private void summaryTwo(StringBuilder sb) {
sb.append(String.format("\n" +
"> spearman[%s, %s] - Spearman's rank correlation coefficient\n",
names[0], names[1]));
sb.append(formatFlex(rho[0][1])).append("\n");
}
private void summaryMore(StringBuilder sb) {
sb.append(String.format("\n" +
"> spearman[%s] - Spearman's rank correlation coefficient\n",
Arrays.deepToString(names)));
String[][] table = new String[vars.length + 1][vars.length + 1];
table[0][0] = "";
for (int i = 1; i < vars.length + 1; i++) {
table[0][i] = i + ".";
table[i][0] = i + "." + names[i - 1];
for (int j = 1; j < vars.length + 1; j++) {
table[i][j] = formatFlex(rho[i - 1][j - 1]);
if (i == j) {
table[i][j] = "x";
}
}
}
int width = getPrinter().textWidth();
int start = 0;
int end = start;
int[] ws = new int[table[0].length];
for (int i = 0; i < table.length; i++) {
for (int j = 0; j < table[0].length; j++) {
ws[i] = Math.max(ws[i], table[i][j].length());
}
}
while (start < vars.length + 1) {
int w = 0;
while ((end < (table[0].length - 1)) && ws[end + 1] + w + 1 < width) {
w += ws[end + 1] + 1;
end++;
}
for (int j = 0; j < table.length; j++) {
for (int i = start; i <= end; i++) {
sb.append(String.format("%" + ws[i] + "s", table[i][j])).append(" ");
}
sb.append("\n");
}
start = end + 1;
}
}
public double singleValue() {
if (names.length == 1)
return 1;
return rho[0][1];
}
}