package hu.u_szeged.utils;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.ObjectOutputStream;
import java.nio.charset.Charset;
import java.util.AbstractSequentialList;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
public class NLPUtils {
private static final String DEFAULT_CHARSET = "UTF-8";
private static Charset charset = Charset.forName(DEFAULT_CHARSET);
public static double log2 = Math.log(2);
public static double SMALL = 1e-6;
/**
* Computes the mean for an array of Numbers.
*
* @param vector
* the array
* @return the mean
*/
public static/* @pure@ */double mean(Number[] vector) {
double sum = 0;
if (vector.length == 0) {
return 0;
}
for (int i = 0; i < vector.length; i++) {
sum += vector[i].doubleValue();
}
return sum / (double) vector.length;
}
public static/* @pure@ */double mean(Collection<Number> vector) {
return mean(vector.toArray(new Number[vector.size()]));
}
public static/* @pure@ */double mean(double[] vector) {
double sum = 0;
if (vector.length == 0) {
return 0;
}
for (int i = 0; i < vector.length; i++) {
sum += vector[i];
}
return sum / (double) vector.length;
}
public static/* @pure@ */double[] meanAndVariance(Collection<Number> vector) {
return meanAndVariance(vector.toArray(new Number[vector.size()]), true);
}
public static/* @pure@ */double[] meanAndVariance(Collection<Number> vector, boolean biased) {
return meanAndVariance(vector.toArray(new Number[vector.size()]), biased);
}
public static/* @pure@ */double[] meanAndVariance(Number[] vector, boolean biased) {
double sum = 0.0d, ssum = 0.0d;
if (vector.length == 0) {
return new double[] { 0.0d, 0.0d };
}
for (int i = 0; i < vector.length; i++) {
sum += vector[i].doubleValue();
ssum += vector[i].doubleValue() * vector[i].doubleValue();
}
double mean = sum / (double) vector.length;
double variance = (ssum / (double) vector.length) - (mean * mean);
if (!biased) {
variance *= vector.length / (double) (vector.length - 1);
}
return new double[] { mean, variance };
}
/**
* Returns the kth-smallest value in the array.
*
* @param array
* the array of integers
* @param k
* the value of k
* @return the kth-smallest value
*/
public static double kthSmallestValue(int[] array, int k) {
int[] index = new int[array.length];
for (int i = 0; i < index.length; i++) {
index[i] = i;
}
return array[index[select(array, index, 0, array.length - 1, k)]];
}
/**
* Returns the kth-smallest value in the array
*
* @param array
* the array of double
* @param k
* the value of k
* @return the kth-smallest value
*/
public static double kthSmallestValue(double[] array, int k) {
int[] index = new int[array.length];
for (int i = 0; i < index.length; i++) {
index[i] = i;
}
return array[index[select(array, index, 0, array.length - 1, k)]];
}
/**
* Returns index of maximum element in a given array of doubles. First maximum is returned.
*
* @param doubles
* the array of doubles
* @return the index of the maximum element
*/
public static/* @pure@ */int maxIndex(double[] doubles) {
double maximum = 0;
int maxIndex = 0;
for (int i = 0; i < doubles.length; i++) {
if ((i == 0) || (doubles[i] > maximum)) {
maxIndex = i;
maximum = doubles[i];
}
}
return maxIndex;
}
/**
* Returns index of maximum element in a given array of integers. First maximum is returned.
*
* @param ints
* the array of integers
* @return the index of the maximum element
*/
public static/* @pure@ */int maxIndex(int[] ints) {
int maximum = 0;
int maxIndex = 0;
for (int i = 0; i < ints.length; i++) {
if ((i == 0) || (ints[i] > maximum)) {
maxIndex = i;
maximum = ints[i];
}
}
return maxIndex;
}
/**
* Returns index of minimum element in a given array of integers. First minimum is returned.
*
* @param ints
* the array of integers
* @return the index of the minimum element
*/
public static/* @pure@ */int minIndex(int[] ints) {
int minimum = 0;
int minIndex = 0;
for (int i = 0; i < ints.length; i++) {
if ((i == 0) || (ints[i] < minimum)) {
minIndex = i;
minimum = ints[i];
}
}
return minIndex;
}
/**
* Returns index of minimum element in a given array of doubles. First minimum is returned.
*
* @param doubles
* the array of doubles
* @return the index of the minimum element
*/
public static/* @pure@ */int minIndex(double[] doubles) {
double minimum = 0;
int minIndex = 0;
for (int i = 0; i < doubles.length; i++) {
if ((i == 0) || (doubles[i] < minimum)) {
minIndex = i;
minimum = doubles[i];
}
}
return minIndex;
}
private static int select(/* @non_null@ */double[] array, /* @non_null@ */int[] index, int left, int right, int k) {
if (left == right) {
return left;
} else {
int middle = partition(array, index, left, right);
if ((middle - left + 1) >= k) {
return select(array, index, left, middle, k);
} else {
return select(array, index, middle + 1, right, k - (middle - left + 1));
}
}
}
private static int select(/* @non_null@ */int[] array, /* @non_null@ */int[] index, int left, int right, int k) {
if (left == right) {
return left;
} else {
int middle = partition(array, index, left, right);
if ((middle - left + 1) >= k) {
return select(array, index, left, middle, k);
} else {
return select(array, index, middle + 1, right, k - (middle - left + 1));
}
}
}
public <T extends Comparable<? super T>> void sort(T[] arr) {
// do quicksort
Arrays.sort(arr);
Collection<T> list = new ArrayList<T>();
int i;
for (i = 0; i < arr.length - 1; i++) {
if (arr[i].compareTo(arr[i + 1]) != 0) { // if not duplicate, add to the list
list.add(arr[i]);
}
}
list.add(arr[i]); // add last element
// btw how do You know that last is not duplicate
}
private static <T extends Comparable<? super T>> int partition(T[] array, int[] index, int l, int r) {
T pivot = array[index[(l + r) / 2]];
int help;
while (l < r) {
while (array[index[l]].compareTo(pivot) < 0 && l < r) {
l++;
}
while (array[index[r]].compareTo(pivot) > 0 && l < r) {
r--;
}
if (l < r) {
help = index[l];
index[l] = index[r];
index[r] = help;
l++;
r--;
}
}
if (l == r && array[index[r]].compareTo(pivot) > 0) {
r--;
}
return r;
}
private static int partition(double[] array, int[] index, int l, int r) {
double pivot = array[index[(l + r) / 2]];
int help;
while (l < r) {
while ((array[index[l]] < pivot) && (l < r)) {
l++;
}
while ((array[index[r]] > pivot) && (l < r)) {
r--;
}
if (l < r) {
help = index[l];
index[l] = index[r];
index[r] = help;
l++;
r--;
}
}
if ((l == r) && (array[index[r]] > pivot)) {
r--;
}
return r;
}
private static int partition(int[] array, int[] index, int l, int r) {
double pivot = array[index[(l + r) / 2]];
int help;
while (l < r) {
while ((array[index[l]] < pivot) && (l < r)) {
l++;
}
while ((array[index[r]] > pivot) && (l < r)) {
r--;
}
if (l < r) {
help = index[l];
index[l] = index[r];
index[r] = help;
l++;
r--;
}
}
if ((l == r) && (array[index[r]] > pivot)) {
r--;
}
return r;
}
public static/* @pure@ */double xlogx(double c) {
if (c == 0) {
return 0.0;
}
return c * NLPUtils.log2((double) c);
}
public static/* @pure@ */double log2(double a) {
return Math.log(a) / log2;
}
public static <T extends Comparable<? super T>> int[] stableSort(T[] array) {
int[] index = new int[array.length];
int[] newIndex = new int[array.length];
int[] helpIndex;
int numEqual;
array = (T[]) array.clone();
for (int i = 0; i < index.length; i++) {
index[i] = i;
}
quickSort(array, index, 0, array.length - 1);
// Make sort stable
int i = 0;
while (i < index.length) {
numEqual = 1;
for (int j = i + 1; ((j < index.length) && array[index[i]].equals(array[index[j]])); j++)
numEqual++;
if (numEqual > 1) {
helpIndex = new int[numEqual];
for (int j = 0; j < numEqual; j++)
helpIndex[j] = i + j;
quickSort(index, helpIndex, 0, numEqual - 1);
for (int j = 0; j < numEqual; j++)
newIndex[i + j] = index[helpIndex[j]];
i += numEqual;
} else {
newIndex[i] = index[i];
i++;
}
}
return newIndex;
}
public static/* @pure@ */int[] stableSort(double[] array) {
int[] index = new int[array.length];
int[] newIndex = new int[array.length];
int[] helpIndex;
int numEqual;
array = (double[]) array.clone();
for (int i = 0; i < index.length; i++) {
index[i] = i;
if (Double.isNaN(array[i])) {
array[i] = Double.MAX_VALUE;
}
}
quickSort(array, index, 0, array.length - 1);
// Make sort stable
int i = 0;
while (i < index.length) {
numEqual = 1;
for (int j = i + 1; ((j < index.length) && eq(array[index[i]], array[index[j]])); j++)
numEqual++;
if (numEqual > 1) {
helpIndex = new int[numEqual];
for (int j = 0; j < numEqual; j++)
helpIndex[j] = i + j;
quickSort(index, helpIndex, 0, numEqual - 1);
for (int j = 0; j < numEqual; j++)
newIndex[i + j] = index[helpIndex[j]];
i += numEqual;
} else {
newIndex[i] = index[i];
i++;
}
}
return newIndex;
}
private static <T extends Comparable<? super T>> void quickSort(T[] array, int[] index, int left, int right) {
if (left < right) {
int middle = partition(array, index, left, right);
quickSort(array, index, left, middle);
quickSort(array, index, middle + 1, right);
}
}
private static void quickSort(/* @non_null@ */double[] array, /* @non_null@ */int[] index, int left, int right) {
if (left < right) {
int middle = partition(array, index, left, right);
quickSort(array, index, left, middle);
quickSort(array, index, middle + 1, right);
}
}
private static void quickSort(/* @non_null@ */int[] array, /* @non_null@ */int[] index, int left, int right) {
if (left < right) {
int middle = partition(array, index, left, right);
quickSort(array, index, left, middle);
quickSort(array, index, middle + 1, right);
}
}
public static/* @pure@ */boolean grOrEq(double a, double b) {
return (b - a < SMALL);
}
public static/* @pure@ */boolean eq(double a, double b) {
return (a - b < SMALL) && (b - a < SMALL);
}
public static/* @pure@ */double variance(double[] vector) {
double sum = 0, sumSquared = 0;
if (vector.length <= 1) {
return 0;
}
for (int i = 0; i < vector.length; i++) {
sum += vector[i];
sumSquared += (vector[i] * vector[i]);
}
double result = (sumSquared - (sum * sum / (double) vector.length)) / (double) (vector.length - 1);
// We don't like negative variance
if (result < 0) {
return 0;
} else {
return result;
}
}
public static List<List<String>> readAsList(String file, String delimiter, Charset characterSet) {
charset = characterSet;
return readAsList(file, delimiter);
}
public static List<List<String>> readAsList(String file, String delimiter) {
List<List<String>> list = new LinkedList<List<String>>();
String line;
try {
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), charset));
while ((line = br.readLine()) != null)
list.add(Arrays.asList(line.split(delimiter)));
br.close();
} catch (Exception e) {
e.printStackTrace();
}
return list;
}
public static void readDocToCollection(String file, Collection<String> collection, Charset chSet) {
charset = chSet;
readDocToCollection(file, collection);
}
public static void readDocToCollection(String file, Collection<String> collection) {
try {
readDocToCollection(new FileInputStream(new File(file)), collection, charset);
} catch (FileNotFoundException e) {
System.err.println("Error during reading input stream " + file);
}
}
public static void readDocToCollection(File file, Collection<String> collection, Charset chSet) {
charset = chSet;
readDocToCollection(file, collection);
}
public static void readDocToCollection(File file, Collection<String> collection) {
try {
readDocToCollection(new FileInputStream(file), collection, charset);
} catch (FileNotFoundException e) {
System.err.println("Error during reading input stream " + file);
e.printStackTrace();
}
}
public static void readDocToCollection(InputStream is, Collection<String> collection) {
String line;
try {
BufferedReader br = new BufferedReader(new InputStreamReader(is, charset));
while ((line = br.readLine()) != null)
collection.add(line);
br.close();
} catch (NullPointerException ne) {
System.err.println("Collection to add elements into not initialized...");
} catch (IOException e) {
System.err.println("Error during reading input stream " + is);
}
}
public static void readDocToCollection(InputStream is, Collection<String> collection, Charset chSet) {
charset = chSet;
readDocToCollection(is, collection);
}
public static Map<String, Integer> readDocToMap(String file, Charset chSet) {
charset = chSet;
return readDocToMap(file, chSet);
}
public static Map<String, Integer> readDocToMap(String file) {
Map<String, Integer> map = new HashMap<String, Integer>();
String line;
try {
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), charset));
while ((line = br.readLine()) != null) {
String[] split = line.split("\t");
if (split.length == 2)
map.put(split[0], Integer.parseInt(split[1]));
}
br.close();
} catch (Exception e) {
System.err.println("Error during reading file " + file);
}
return map;
}
public static void serialize(Object obj, String file) {
try {
int lastDash = Math.max(file.lastIndexOf('\\'), file.lastIndexOf('/'));
if (lastDash != -1)
new File(file.substring(0, lastDash)).mkdirs();
ObjectOutputStream out = new ObjectOutputStream(new BufferedOutputStream(new FileOutputStream(file)));
out.writeObject(obj);
out.flush();
out.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* Joins an array of strings to a single string.
*/
public static String join(String[] str) {
return join(str, ' ');
}
public static String join(String[] str, char joiner) {
if (str.length == 0)
return "";
StringBuffer result = new StringBuffer(str[0]);
for (int i = 1; i < str.length; i++)
result.append(joiner + str[i]);
return result.toString();
}
public static String join(AbstractSequentialList<String> str) {
return join(str, ' ');
}
public static String join(AbstractSequentialList<String> str, char joiner) {
return join(str.toArray(new String[str.size()]), joiner);
}
}