/*
* Apache License
* Version 2.0, January 2004
* http://www.apache.org/licenses/
*
* Copyright 2013 Aurelian Tutuianu
* Copyright 2014 Aurelian Tutuianu
* Copyright 2015 Aurelian Tutuianu
* Copyright 2016 Aurelian Tutuianu
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package rapaio.data.filter.frame;
import rapaio.core.SamplingTools;
import rapaio.core.distributions.Normal;
import rapaio.data.Frame;
import rapaio.data.SolidFrame;
import rapaio.data.VRange;
import rapaio.math.linear.RM;
import rapaio.math.linear.RV;
import rapaio.math.linear.dense.SolidRM;
import rapaio.math.linear.dense.SolidRV;
import java.util.stream.IntStream;
/**
* Builds a random projection of some give numeric features.
* <p>
* Created by <a href="mailto:padreati@yahoo.com">Aurelian Tutuianu</a> on 1/28/16.
*/
public class FFRandomProjection extends AbstractFF {
private static final long serialVersionUID = -2790372378136065870L;
private final int k;
private final Method method;
private RM rp;
public static FFRandomProjection newGaussianSd(int k, VRange vRange) {
return new FFRandomProjection(k, gaussian(k), vRange);
}
public static FFRandomProjection newAchlioptas(int k, VRange vRange) {
return new FFRandomProjection(k, achlioptas(3), vRange);
}
public static FFRandomProjection newAchlioptas(int k, double s, VRange vRange) {
return new FFRandomProjection(k, achlioptas(s), vRange);
}
private FFRandomProjection(int k, Method method, VRange vRange) {
super(vRange);
this.k = k;
this.method = method;
}
@Override
public FFRandomProjection newInstance() {
return new FFRandomProjection(k, method, vRange);
}
@Override
public void train(Frame df) {
parse(df);
// build k random projections
rp = SolidRM.empty(varNames.length, k);
for (int i = 0; i < k; i++) {
RV v = method.projection(varNames.length);
for (int j = 0; j < varNames.length; j++) {
rp.set(j, i, v.get(j));
}
}
}
@Override
public Frame apply(Frame df) {
RM X = SolidRM.copy(df.mapVars(varNames));
RM p = X.dot(rp);
Frame non = df.removeVars(varNames);
Frame trans = SolidFrame.matrix(p, IntStream.range(1, k + 1).boxed().map(i -> "RP_" + i).toArray(String[]::new));
return non.bindVars(trans);
}
public interface Method {
RV projection(int rowCount);
}
private static Method gaussian(int k) {
return rowCount -> {
Normal norm = new Normal(0, 1);
RV v = SolidRV.empty(rowCount);
for (int i = 0; i < v.count(); i++) {
v.set(i, norm.sampleNext() / Math.sqrt(k));
}
v.normalize(2);
return v;
};
}
private static Method achlioptas(double s) {
double[] p = new double[3];
p[0] = 1 / (2 * s);
p[1] = 1 - 1 / s;
p[2] = 1 / (2 * s);
double sqrt = Math.sqrt(s);
return rowCount -> {
int[] sample = SamplingTools.sampleWeightedWR(rowCount, p);
RV v = SolidRV.empty(rowCount);
for (int i = 0; i < sample.length; i++) {
if (sample[i] == 0) {
v.set(i, -sqrt);
continue;
}
if (sample[i] == 1) {
v.set(i, 0);
continue;
}
v.set(i, sqrt);
}
return v.normalize(2);
};
}
}