/** * Copyright (c) 2011, SOCIETIES Consortium (WATERFORD INSTITUTE OF TECHNOLOGY (TSSG), HERIOT-WATT UNIVERSITY (HWU), SOLUTA.NET * (SN), GERMAN AEROSPACE CENTRE (Deutsches Zentrum fuer Luft- und Raumfahrt e.V.) (DLR), Zavod za varnostne tehnologije * informacijske družbe in elektronsko poslovanje (SETCCE), INSTITUTE OF COMMUNICATION AND COMPUTER SYSTEMS (ICCS), LAKE * COMMUNICATIONS (LAKE), INTEL PERFORMANCE LEARNING SOLUTIONS LTD (INTEL), PORTUGAL TELECOM INOVAÇÃO, SA (PTIN), IBM Corp., * INSTITUT TELECOM (ITSUD), AMITEC DIACHYTI EFYIA PLIROFORIKI KAI EPIKINONIES ETERIA PERIORISMENIS EFTHINIS (AMITEC), TELECOM * ITALIA S.p.a.(TI), TRIALOG (TRIALOG), Stiftelsen SINTEF (SINTEF), NEC EUROPE LTD (NEC)) * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following * conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT * SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.societies.privacytrust.privacyprotection.assessment.logic; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Estimation of correlation between two events (data access and data transmission) based on size * difference between the data in events. * * The function itself is basically e^(-x^2), usually without x axis shift, with max value * of 1. * This results in a correlation that: <br/> * - is 1 if size of accessed data matches size of transmitted data. <br/> * - continuously decreases with size difference <br/> * - then asymptotically approaches a value greater than zero (multiple data can be accumulated and * sent at once in a bigger chunk; on the other hand data can be compressed and smaller transmitted * data does not necessarily mean the events are not correlated) <br/> * * @author Mitja Vardjan * */ public class CorrelationInData { private static Logger LOG = LoggerFactory.getLogger(CorrelationInData.class); private final double VALUE_AT_INF_DEFAULT = 0.1; private final double SIZE_SCALE_DEFAULT = 1; private double valueAtInf; private double xScaleLeft; private double xScaleRight; private double normalizationFactor; private double normalizationOffset; /** * Constructor with default values. */ public CorrelationInData() { valueAtInf = VALUE_AT_INF_DEFAULT; xScaleLeft = SIZE_SCALE_DEFAULT; xScaleRight = SIZE_SCALE_DEFAULT; calculateNormalizationParameters(); } /** * Constructor. * * @param valueAtInf Minimal correlation value for events that are most far apart. * * @param sizeScaleLeft x axis scaling factor for cases when data size difference is negative. * Negative difference can occur for example when data has been compressed before sending. * If greater than 1, the correlation function gets wider (less sensitive to size differences). * If smaller than 1, the function gets more narrow (more sensitive to size differences). * * @param sizeScaleRight x axis scaling factor for cases when data size difference is positive. * Positive difference can occur for example when multiple pieces of data have been accumulated * before sending everything in a single packet. * If greater than 1, the correlation function gets wider (less sensitive to size differences). * If smaller than 1, the function gets more narrow (more sensitive to size differences). */ public CorrelationInData(double valueAtInf, double sizeScaleLeft, double sizeScaleRight) { if (valueAtInf >= 1 || valueAtInf < 0) { LOG.warn("Unexpected value for valueAtInf: {}. Setting default value: {}", valueAtInf, VALUE_AT_INF_DEFAULT); this.valueAtInf = VALUE_AT_INF_DEFAULT; } else { this.valueAtInf = valueAtInf; } this.xScaleLeft = sizeScaleLeft; this.xScaleRight = sizeScaleRight; calculateNormalizationParameters(); } private double correlationUnnormalized(long deltaSize) { double c; double xScale; if (deltaSize < 0) { xScale = this.xScaleLeft; } else { xScale = this.xScaleRight; } c = Math.exp(-Math.pow(deltaSize / xScale, 2)); return c; } /** * Estimates correlation between two events (data access and data transmission) based on sizes * of data in both events. * * @param deltaSize Difference in size of data in bytes. * Size of transmitted data - size of accessed data. * * @return correlation based on difference in data sizes. */ public double correlation(long deltaSize) { double c; c = normalize(correlationUnnormalized(deltaSize)); return c; } /** * Normalize to interval [valueAtInf, 1] * * @param x The value to normalize * @return Normalized value */ private double normalize(double x) { return normalizationFactor * x + normalizationOffset; } private void calculateNormalizationParameters() { // Value of Math.exp(-Math.pow(0 / xScale, 2)) is always 1 // => no need to divide normalizationFactor with it this.normalizationFactor = (1 - valueAtInf); this.normalizationOffset = valueAtInf; } public double getMeanCorrelation() { return (1 - valueAtInf) / 2; } }