/*
* Apache License
* Version 2.0, January 2004
* http://www.apache.org/licenses/
*
* Copyright 2013 Aurelian Tutuianu
* Copyright 2014 Aurelian Tutuianu
* Copyright 2015 Aurelian Tutuianu
* Copyright 2016 Aurelian Tutuianu
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package rapaio.data.filter.var;
import rapaio.core.CoreTools;
import rapaio.data.Nominal;
import rapaio.data.Var;
import rapaio.sys.WS;
import rapaio.util.func.SPredicate;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Make a numerical variable a nominal one with intervals specified by quantiles.
* <p>
* Created by <a href="mailto:padreati@yahoo.com">Aurelian Tutuianu</a> on 1/18/16.
*/
public class VFQuantileDiscrete extends AbstractVF {
private static final long serialVersionUID = -6702714518094848749L;
private final int k;
List<String> dict = new ArrayList<>();
Map<String, SPredicate<Double>> predicates = new HashMap<>();
double[] qv;
public VFQuantileDiscrete(int k) {
this.k = k;
if (k <= 1) {
throw new IllegalArgumentException(String.format("k=%d should be greater than 1", k));
}
}
@Override
public void fit(Var... vars) {
super.checkSingleVar(vars);
double len = 1.0 / k;
double[] q = new double[k - 1];
for (int i = 0; i < q.length; i++) {
q[i] = len * (i + 1);
}
Var original = vars[0];
qv = CoreTools.quantiles(original, q).values();
// first interval
dict.add("-Inf~" + WS.formatFlexShort(qv[0]));
predicates.put("-Inf~" + WS.formatFlexShort(qv[0]), x -> x <= qv[0]);
// mid intervals
for (int i = 1; i < qv.length; i++) {
int index = i;
dict.add(WS.formatFlexShort(qv[i - 1]) + "~" + WS.formatFlexShort(qv[i]));
predicates.put(WS.formatFlexShort(qv[i - 1]) + "~" + WS.formatFlexShort(qv[i]), x -> x > qv[index - 1] && x <= qv[index]);
}
// last interval
dict.add(WS.formatFlexShort(qv[qv.length - 1]) + "~Inf");
predicates.put(WS.formatFlexShort(qv[qv.length - 1]) + "~Inf", x -> x > qv[qv.length - 1]);
}
@Override
public Var apply(Var... vars) {
super.checkSingleVar(vars);
Var original = vars[0];
Nominal result = Nominal.empty(0, dict).withName(original.name());
for (int i = 0; i < original.rowCount(); i++) {
if (original.missing(i))
result.addMissing();
for (Map.Entry<String, SPredicate<Double>> e : predicates.entrySet()) {
if (e.getValue().test(original.value(i))) {
result.addLabel(e.getKey());
}
}
}
return result;
}
}