/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
http://www.cs.umass.edu/~mccallum/mallet
This software is provided under the terms of the Common Public License,
version 1.0, as published by http://www.opensource.org. For further
information, see the file `LICENSE' included with this distribution. */
/**
@author Andrew McCallum <a href="mailto:mccallum@cs.umass.edu">mccallum@cs.umass.edu</a>
*/
package cc.mallet.util;
import java.util.logging.*;
import cc.mallet.util.MalletLogger;
// Obtained from http://www.stat.vt.edu/~sundar/java/code/Univariate.html
// August 2002
/** * @(#)Univariate.java * * DAMAGE (c) 2000 by Sundar Dorai-Raj
* * @author Sundar Dorai-Raj
* * Email: sdoraira@vt.edu
* * This program is free software; you can redistribute it and/or
* * modify it under the terms of the GNU General Public License
* * as published by the Free Software Foundation; either version 2
* * of the License, or (at your option) any later version,
* * provided that any use properly credits the author.
* * This program is distributed in the hope that it will be useful,
* * but WITHOUT ANY WARRANTY; without even the implied warranty of
* * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* * GNU General Public License for more details at http://www.gnu.org * * */
public class Univariate {
private static Logger logger = MalletLogger.getLogger(Univariate.class.getName());
private double[] x,sortx;
private double[] summary=new double[6];
private boolean isSorted=false;
public double[] five=new double[5];
private int n;
private double mean,variance,stdev;
private double median,min,Q1,Q3,max;
public Univariate(double[] data) {
x=(double[])data.clone();
n=x.length;
createSummaryStats();
}
private void createSummaryStats() {
int i;
mean=0;
for(i=0;i<n;i++)
mean+=x[i];
mean/=n;
variance=variance();
stdev=stdev();
double sumxx=0;
variance=0;
for(i=0;i<n;i++)
sumxx+=x[i]*x[i];
if(n>1) variance=(sumxx-n*mean*mean)/(n-1);
stdev=Math.sqrt(variance);
}
public double[] summary() {
summary[0]=n;
summary[1]=mean;
summary[2]=variance;
summary[3]=stdev;
summary[4]=Math.sqrt(variance/n);
summary[5]=mean/summary[4];
return(summary);
}
public double mean() {
return(mean);
}
public double variance() {
return(variance);
}
public double stdev() {
return(stdev);
}
public double SE() {
return(Math.sqrt(variance/n));
}
public double max() {
if(!isSorted) sortx=sort();
return(sortx[n-1]);
}
public double min() {
if(!isSorted) sortx=sort();
return(sortx[0]);
}
public double median() {
return(quant(0.50));
}
public double quant(double q) {
if(!isSorted) sortx=sort();
if (q > 1 || q < 0)
return (0);
else {
double index=(n+1)*q;
if (index-(int)index == 0)
return sortx[(int)index - 1];
else
return q*sortx[(int)Math.floor(index)-1]+(1-q)*sortx[(int)Math.ceil(index)-1];
}
}
public double[] sort() {
sortx=(double[])x.clone();
int incr=(int)(n*.5);
while (incr >= 1) {
for (int i=incr;i<n;i++) {
double temp=sortx[i];
int j=i;
while (j>=incr && temp<sortx[j-incr]) {
sortx[j]=sortx[j-incr];
j-=incr;
}
sortx[j]=temp;
}
incr/=2;
}
isSorted=true;
return(sortx);
}
public double[] getData() {
return(x);
}
public int size() {
return (n);
}
public double elementAt(int index) {
double element=0;
try {
element=x[index];
}
catch(ArrayIndexOutOfBoundsException e) {
logger.info ("Index "+ index +" does not exist in data.");
}
return(element);
}
public double[] subset(int[] indices) {
int k=indices.length,i=0;
double elements[]=new double[k];
try {
for(i=0;i<k;i++)
elements[i]=x[k];
}
catch(ArrayIndexOutOfBoundsException e) {
logger.info ("Index "+ i +" does not exist in data.");
}
return(elements);
}
public int compare(double t) {
int index=n-1;
int i;
boolean found=false;
for(i=0;i<n && !found;i++)
if(sortx[i]>t) {
index=i;
found=true;
}
return(index);
}
public int[] between(double t1,double t2) {
int[] indices=new int[2];
indices[0]=compare(t1);
indices[1]=compare(t2);
return(indices);
}
public int indexOf(double element) {
int index=-1;
for(int i=0;i<n;i++)
if(Math.abs(x[i]-element)<1e-6) index=i;
return(index);
}
}