/*
* ARX: Powerful Data Anonymization
* Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.deidentifier.arx.risk;
import org.deidentifier.arx.ARXPopulationModel;
import org.deidentifier.arx.common.WrappedBoolean;
import org.deidentifier.arx.common.WrappedInteger;
import de.linearbits.newtonraphson.Function;
import de.linearbits.newtonraphson.NewtonRaphson2D;
import de.linearbits.newtonraphson.NewtonRaphsonConfiguration;
import de.linearbits.newtonraphson.SquareMatrix2D;
import de.linearbits.newtonraphson.Vector2D;
/**
* This class implements the SNB model for population uniqueness, for details see Chen, 1998
*
* @author Fabian Prasser
* @author Michael Schneider
*
* @version 1.0
*/
class ModelSNB extends RiskModelPopulation {
/** The result */
private final double uniques;
/**
* Creates a new instance
*
* @param model
* @param histogram
* @param config
* @param stop
*/
ModelSNB(final ARXPopulationModel model,
final RiskModelHistogram histogram,
final NewtonRaphsonConfiguration<?> config,
final WrappedBoolean stop) {
super(histogram, model, stop, new WrappedInteger());
// Prepare
int[] _histogram = super.getHistogram().getHistogram();
double c1 = super.getNumClassesOfSize(1);
double c2 = super.getNumClassesOfSize(2);
double k = estimateNonEmptyEquivalenceClasses(_histogram, super.getNumClasses(),
c1, super.getSamplingFraction());
double f = getSamplingFraction();
// Solve the maximum likelihood estimates
Vector2D result = new NewtonRaphson2D(getObjectFunction(k, f, c1, c2),
getDerivatives(k, f, c1, c2))
.configure(config)
.solve();
// Compile and store
this.uniques = k * Math.pow(result.y, result.x);
}
/**
* Returns the number of uniques
*
* @return
*/
public double getNumUniques() {
return this.uniques;
}
/**
* @return Shlosser estimator for variable K, giving number of non zero
* classes in the population estimated according to Haas, 1998 and
* Shlosser
*/
private double estimateNonEmptyEquivalenceClasses(int[] histogram,
double n,
double n1,
double f) {
double var1 = 0, var2 = 0, var3 = 0, var4 = 0;
double var5 = f * f;
for (int i = 0; i < histogram.length; i += 2) {
double val0 = histogram[i];
double val1 = histogram[i + 1];
double val2 = Math.pow(1 - f, val0) * val1;
var1 += val0 * var5 * Math.pow(1 - var5, val0 - 1) * val1;
var2 += val2 * Math.pow(1 + f, val0) - 1;
var3 += val2;
var4 += val0 * f * Math.pow(1 - f, val0 - 1) * val1;
checkInterrupt();
}
return n + n1 * (var1 / var2) * (var3 / var4) * (var3 / var4);
}
/**
* Returns the derivatives
*
* @param k
* @param f
* @param c1
* @param c2
* @return
*/
private Function<Vector2D, SquareMatrix2D> getDerivatives(final double k,
final double f,
final double c1,
final double c2) {
return new Function<Vector2D, SquareMatrix2D>() {
public SquareMatrix2D evaluate(Vector2D input) {
// The derivation of the following formulas has been obtained using Matlab
final double a = input.x;
final double b = input.y;
final double val0 = (b - 1d) * (f - 1d);
final double val1 = val0 - 1d;
final double val2 = 1d - val0;
final double val3 = a * val0 / val1 - 1d;
final double val4 = Math.pow(-b / val1, a);
final double val6 = Math.pow(f, 2d);
final double val7 = Math.pow(b, a);
final double val8 = val7 * val6 * k;
final double val9 = a * val8;
final double val10 = 2d * Math.pow(val2, a + 2d);
final double val11 = Math.pow((val1), 2d);
final double val13 = f * k;
final double val14 = f - 1d;
final double val15 = a - 1d;
final double val16 = b - 1d;
final double val17 = val15 * val0;
final double val18 = val6 * k;
final double val19 = (val17 + 2d);
final double val20 = val18 * val19;
SquareMatrix2D result = new SquareMatrix2D();
// Formula 1d, d alpha
result.x1 = -val13 * Math.log(-b / val1) * val3 * val4 -
(val13 * val4 * val0) / val1;
// Formula 1d, d beta
result.x2 = a * val13 * (1d / val1 - (b * val14) / val11) *
val3 * Math.pow((-b / val1), val15) - val13 *
val4 * ((a * val14) / val1 - (a * val16 * Math.pow(val14, 2d)) / val11);
// Formula 2d, d alpha
result.y1 = (val9 * Math.log(val2) * val19 * val16) / (val10) -
(val9 * Math.pow(val16, 2d) * val14) / (val10) -
(val7 * val20 * val16) / (val10) -
(a * val7 * val18 * Math.log(b) * val19 * val16) /
(val10);
// Formula 2d, d beta
result.y2 = -(val9 * val19) / (val10) - (Math.pow(a, 2d) * Math.pow(b, val15) * val20 * val16) /
val10 - (a * val7 * val18 * val17) / val10 -
(a * val7 * val20 * (a + 2d) * val0) /
(2d * Math.pow((val2), (a + 3d)));
return result;
}
};
}
/**
* Returns the object function
*
* @param k
* @param f
* @param c1
* @param c2
* @return
*/
private Function<Vector2D, Vector2D> getObjectFunction(final double k,
final double f,
final double c1,
final double c2) {
return new Function<Vector2D, Vector2D>() {
public Vector2D evaluate(Vector2D input) {
// The derivatives of the following formulas have been obtained using Matlab
final double a = input.x;
final double b = input.y;
final double dividend = (1 - f) * (1 - b);
// Original equations to determine the value of the parameters alpha and beta in the SNB Model
Vector2D result = new Vector2D();
result.x = k * f * Math.pow(b / (1 - dividend), a) *
(((a * dividend) / (1 - dividend)) + 1) - c1;
result.y = k * a * Math.pow(b, a) * (f * f) * (1 - b) / 2 *
Math.pow(1 - dividend, a + 2) *
(2 - (1 - a) * dividend) - c2;
return result;
}
};
}
}