/* * Apache License * Version 2.0, January 2004 * http://www.apache.org/licenses/ * * Copyright 2013 Aurelian Tutuianu * Copyright 2014 Aurelian Tutuianu * Copyright 2015 Aurelian Tutuianu * Copyright 2016 Aurelian Tutuianu * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package rapaio.core.stat; import rapaio.data.Mapping; import rapaio.data.Var; import rapaio.printer.Printable; import java.util.stream.IntStream; import static java.util.stream.Collectors.toList; import static rapaio.core.CoreTools.mean; import static rapaio.sys.WS.formatFlex; /** * Compute covariance of two variables * <p> * Created by <a href="mailto:padreati@yahoo.com">Aurelian Tutuianu</a> on 3/24/15. */ public class Covariance implements Printable { public static Covariance from(Var var1, Var var2) { return new Covariance(var1, var2); } private final String varName1; private final String varName2; private final double value; private int completeCount; private int missingCount; private Covariance(Var var1, Var var2) { this.varName1 = var1.name(); this.varName2 = var2.name(); this.value = compute(var1, var2); } private double compute(final Var x, final Var y) { Mapping map = Mapping.wrap(IntStream.range(0, Math.min(x.rowCount(), y.rowCount())).filter(row -> !x.missing(row) && !y.missing(row)).boxed().collect(toList())); completeCount = map.size(); missingCount = Math.max(x.rowCount(), y.rowCount()) - completeCount; if (map.size() < 2) { return 0; } Var xx = x.mapRows(map); Var yy = y.mapRows(map); double m1 = mean(xx).value(); double m2 = mean(yy).value(); double cov = 0; for (int i = 0; i < completeCount; i++) { cov += (xx.value(i) - m1) * (yy.value(i) - m2); } return cov / (completeCount - 1.0); } public double value() { return value; } @Override public String summary() { return "\n" + "> cov[" + varName1 + ", " + varName2 + "]\n" + "total rows: " + (completeCount + missingCount) + " (complete: " + completeCount + ", missing: " + missingCount + " )\n" + "covariance: " + formatFlex(value) + "\n"; } }