/*
* Apache License
* Version 2.0, January 2004
* http://www.apache.org/licenses/
*
* Copyright 2013 Aurelian Tutuianu
* Copyright 2014 Aurelian Tutuianu
* Copyright 2015 Aurelian Tutuianu
* Copyright 2016 Aurelian Tutuianu
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package rapaio.experiment.ml.eval;
import rapaio.data.Numeric;
import rapaio.data.RowComparators;
import rapaio.data.Var;
import rapaio.data.filter.var.VFCumulativeSum;
import rapaio.data.filter.var.VFRefSort;
import java.util.Comparator;
import java.util.stream.IntStream;
/**
* Created by <a href="mailto:padreati@yahoo.com">Aurelian Tutuianu</a> on 7/17/15.
*/
@Deprecated
public class NormalizedGini {
/*
SumModelGini <- function(solution, submission) {
df = data.frame(solution = solution, submission = submission)
df <- df[order(df$submission, decreasing = TRUE),]
df
df$random = (1:nrow(df))/nrow(df)
df
totalPos <- sum(df$solution)
df$cumPosFound <- cumsum(df$solution) # this will store the cumulative number of positive examples found (used for computing "Model Lorentz")
df$Lorentz <- df$cumPosFound / totalPos # this will store the cumulative proportion of positive examples found ("Model Lorentz")
df$Gini <- df$Lorentz - df$random # will store Lorentz minus random
print(df)
return(sum(df$Gini))
}
NormalizedGini <- function(solution, submission) {
SumModelGini(solution, submission) / SumModelGini(solution, solution)
}
*/
private final double value;
public NormalizedGini(Var solution, Var submission) {
value = sumModelGini(solution, submission) / sumModelGini(solution, solution);
}
private double sumModelGini(Var solution, Var submission) {
Comparator<Integer> cmp = RowComparators.numeric(submission, false);
Var sol = new VFRefSort(cmp).fitApply(solution);
Var sub = new VFRefSort(cmp).fitApply(submission);
int n = sub.rowCount();
Numeric rand = IntStream.range(1, n + 1).mapToDouble(x -> x / (double) n).boxed().collect(Numeric.collector());
double totalPos = sol.stream().mapToDouble().sum();
Var cumPosFound = new VFCumulativeSum().fitApply(sol.solidCopy());
Var lorentz = cumPosFound.stream().transValue(x -> x / totalPos).mapToDouble().boxed().collect(Numeric.collector());
return IntStream.range(0, n).mapToDouble(row -> lorentz.value(row) - rand.value(row)).sum();
}
public double value() {
return value;
}
}