/*******************************************************************************
* Copyright 2013
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package org.dkpro.bigdata.collocations;
import org.apache.commons.lang.NotImplementedException;
/**
* Implementation of association metrics from Stefan Everts' www.collocations.de
*
* @author zorn
*
*/
public class AssociationMetrics
{
// observed frequencies
double o11;
double o12;
double o21;
double o22;
// marginals
double r1;
double r2;
double c1;
double c2;
double n;
// expected values
double e11 = 0.0;
double e12 = 0.0;
double e21 = 0.0;
double e22 = 0.0;
// observed frequencies
double O11;
double O12;
double O21;
double O22;
// marginals
double R1;
double R2;
double C1;
double C2;
double N;
public void init(long o11, long o12, long o21, long o22)
{
this.o11 = o11;
this.o12 = o12;
this.o21 = o21;
this.o22 = o22;
// calculate marginals
R1 = o11 + o12;
R2 = o21 + o22;
C1 = o11 + o21;
C2 = o12 + o22;
N = R1 + R2;
// pre-type convert, not sure whether this really is necessary, need to benchmark
r1 = R1;
r2 = R2;
c1 = C1;
c2 = C2;
n = N;
// calculate expected values;
e11 = (r1 * c1) / N;
e12 = (r1 * c2) / N;
e21 = (r2 * c1) / N;
e22 = (r2 * c2) / N;
}
public double pmi()
{
return Math.log(e11 / (e12) * (e21)) / Math.log(2);
}
public double chisquared()
{
return (N * (o11 - e11) * N * (o11 - e11)) / (e11 * e22);
}
public double chisquared_corr()
{
return ((N * (o11 * o22 - e11) * N * (o12 - o21)) - (n / 2)) / (r1 * r2 * c1 * c2);
}
public double mutual_information()
{
return Math.log(o11 / e11);
}
public double odds_ratio_discounted()
{
return ((o11 + 0.5) * (o22 + 0.5)) / ((o12 + 0.5) * (o21 + 0.5));
}
public double dice()
{
return (2 * o11 / (r1 + c1));
}
public double ms()
{
return Math.min(o11 / r1, o11 / c1);
}
public double gmean()
{
return o11 / Math.sqrt(N * e11);
}
public double local_mi()
{
return 0;
}
public double average_mi()
{
return 0;
}
// public double fisher() {
// double sum=0;
// for (long k=(long) o11;k<Math.max(r1,c1);k++) {
// sum+=(binomial((long) c1,k)*binomial((long) c2,(long)r1-k))/binomial((long)N,(long)r1);
// }
// }
//
//
// private double binomial(long n, long k)
// {
// // TODO Auto-generated method stub
// return 0;
// }
}