package com.linkedin.thirdeye.dataframe;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import org.apache.commons.math.stat.correlation.Covariance;
import org.apache.commons.math.stat.correlation.PearsonsCorrelation;
/**
* Series container for primitive double.
*/
public final class DoubleSeries extends TypedSeries<DoubleSeries> {
public static final double NULL = Double.NaN;
public static final double INFINITY = Double.POSITIVE_INFINITY;
public static final double POSITIVE_INFINITY = Double.POSITIVE_INFINITY;
public static final double NEGATIVE_INFINITY = Double.NEGATIVE_INFINITY;
public static final double DEFAULT = 0.0d;
public static final double MIN_VALUE = Double.MIN_VALUE;
public static final double MAX_VALUE = Double.MAX_VALUE;
public static final DoubleFunction SUM = new DoubleSum();
public static final DoubleFunction PRODUCT = new DoubleProduct();
public static final DoubleFunction LAST = new DoubleLast();
public static final DoubleFunction MIN = new DoubleMin();
public static final DoubleFunction MAX = new DoubleMax();
public static final DoubleFunction MEAN = new DoubleMean();
public static final DoubleFunction STD = new DoubleStandardDeviation();
public static final DoubleFunction NEGATIVE = new DoubleNegative();
public static final class DoubleSum implements DoubleFunction {
@Override
public double apply(double[] values) {
if(values.length <= 0)
return NULL;
// TODO sort, add low to high for accuracy?
double result = 0.0d;
for(double v : values)
result += v;
return result;
}
}
public static final class DoubleProduct implements DoubleFunction {
@Override
public double apply(double[] values) {
if(values.length <= 0)
return NULL;
// TODO sort for accuracy?
double result = 1.0d;
for(double v : values)
result *= v;
return result;
}
}
public static final class DoubleMean implements DoubleFunction {
@Override
public double apply(double[] values) {
if(values.length <= 0)
return NULL;
// TODO sort, add low to high for accuracy?
double sum = 0.0d;
int count = 0;
for(double v : values) {
sum += v;
count++;
}
return sum / count;
}
}
public static final class DoubleFirst implements DoubleFunction {
@Override
public double apply(double[] values) {
if(values.length <= 0)
return NULL;
return values[0];
}
}
public static final class DoubleLast implements DoubleFunction {
@Override
public double apply(double[] values) {
if(values.length <= 0)
return NULL;
return values[values.length - 1];
}
}
public static final class DoubleMin implements DoubleFunction {
@Override
public double apply(double[] values) {
if(values.length <= 0)
return NULL;
double min = values[0];
for(double v : values)
min = Math.min(min, v);
return min;
}
}
public static final class DoubleMax implements DoubleFunction {
@Override
public double apply(double[] values) {
if (values.length <= 0)
return NULL;
double max = values[0];
for (double v : values)
max = Math.max(max, v);
return max;
}
}
public static final class DoubleNegative implements DoubleFunction {
@Override
public double apply(double... values) {
if(values.length <= 0)
return NULL;
return -values[0];
}
}
public static final class DoubleStandardDeviation implements DoubleFunction {
@Override
public double apply(double... values) {
if(values.length <= 1)
return NULL;
double mean = MEAN.apply(values);
double var = 0.0;
for(double v : values)
var += (v - mean) * (v - mean);
return Math.sqrt(var);
}
}
public static final class DoubleMapZScore implements DoubleFunction {
final double mean;
final double std;
public DoubleMapZScore(double mean, double std) {
if(std <= 0.0d)
throw new IllegalArgumentException("std must be greater than 0");
this.mean = mean;
this.std = std;
}
@Override
public double apply(double... values) {
return (values[0] - this.mean) / this.std;
}
}
public static final class DoubleMapNormalize implements DoubleFunction {
final double min;
final double max;
public DoubleMapNormalize(double min, double max) {
if(min == max)
throw new IllegalArgumentException("min and max must be different");
this.min = min;
this.max = max;
}
@Override
public double apply(double... values) {
return (values[0] - this.min) / (this.max - this.min);
}
}
public static class Builder extends Series.Builder {
final List<double[]> arrays = new ArrayList<>();
private Builder() {
// left blank
}
public Builder addValues(double... values) {
this.arrays.add(values);
return this;
}
public Builder addValues(double value) {
return this.addValues(new double[] { value });
}
public Builder addValues(Collection<Double> values) {
double[] newValues = new double[values.size()];
int i = 0;
for(Double v : values)
newValues[i++] = valueOf(v);
return this.addValues(newValues);
}
public Builder addValues(Double... values) {
return this.addValues(Arrays.asList(values));
}
public Builder addValues(Double value) {
return this.addValues(new double[] { valueOf(value) });
}
public Builder fillValues(int count, double value) {
double[] values = new double[count];
Arrays.fill(values, value);
return this.addValues(values);
}
public Builder fillValues(int count, Double value) {
return this.fillValues(count, valueOf(value));
}
@Override
public Builder addSeries(Collection<Series> series) {
for(Series s : series)
this.addValues(s.getDoubles().values);
return this;
}
@Override
public DoubleSeries build() {
int totalSize = 0;
for(double[] array : this.arrays)
totalSize += array.length;
int offset = 0;
double[] values = new double[totalSize];
for(double[] array : this.arrays) {
System.arraycopy(array, 0, values, offset, array.length);
offset += array.length;
}
return new DoubleSeries(values);
}
}
public static Builder builder() {
return new Builder();
}
public static DoubleSeries buildFrom(double... values) {
return new DoubleSeries(values);
}
public static DoubleSeries empty() {
return new DoubleSeries();
}
public static DoubleSeries nulls(int size) {
return builder().fillValues(size, NULL).build();
}
public static DoubleSeries zeros(int size) {
return builder().fillValues(size, 0.0d).build();
}
public static DoubleSeries ones(int size) {
return builder().fillValues(size, 1.0d).build();
}
public static DoubleSeries fillValues(int size, double value) { return builder().fillValues(size, value).build(); }
// CAUTION: The array is final, but values are inherently modifiable
final double[] values;
private DoubleSeries(double... values) {
this.values = values;
}
@Override
public Builder getBuilder() {
return new Builder();
}
@Override
public DoubleSeries getDoubles() {
return this;
}
@Override
public double getDouble(int index) {
return getDouble(this.values[index]);
}
public static double getDouble(double value) {
return value;
}
@Override
public long getLong(int index) {
return getLong(this.values[index]);
}
public static long getLong(double value) {
if(DoubleSeries.isNull(value))
return LongSeries.NULL;
if(value == NEGATIVE_INFINITY)
return LongSeries.MIN_VALUE;
return (long) value;
}
@Override
public byte getBoolean(int index) {
return getBoolean(this.values[index]);
}
public static byte getBoolean(double value) {
if(DoubleSeries.isNull(value))
return BooleanSeries.NULL;
return BooleanSeries.valueOf(value != 0.0d);
}
@Override
public String getString(int index) {
return getString(this.values[index]);
}
public static String getString(double value) {
if(DoubleSeries.isNull(value))
return StringSeries.NULL;
return String.valueOf(value);
}
@Override
public boolean isNull(int index) {
return isNull(this.values[index]);
}
@Override
public int size() {
return this.values.length;
}
@Override
public SeriesType type() {
return SeriesType.DOUBLE;
}
public double[] values() {
return this.values;
}
public double value() {
if(this.size() != 1)
throw new IllegalStateException("Series must contain exactly one element");
return this.values[0];
}
/**
* Returns the value of the first element in the series
*
* @throws IllegalStateException if the series is empty
* @return first element in the series
*/
public double first() {
assertNotEmpty(this.values);
return this.values[0];
}
/**
* Returns the value of the last element in the series
*
* @throws IllegalStateException if the series is empty
* @return last element in the series
*/
public double last() {
assertNotEmpty(this.values);
return this.values[this.values.length-1];
}
@Override
public DoubleSeries slice(int from, int to) {
return buildFrom(Arrays.copyOfRange(this.values, from, to));
}
@Override
public String toString() {
StringBuilder builder = new StringBuilder();
builder.append("DoubleSeries{");
for(double d : this.values) {
if(isNull(d)) {
builder.append("null");
} else {
builder.append(d);
}
builder.append(" ");
}
builder.append("}");
return builder.toString();
}
@Override
public String toString(int index) {
if(this.isNull(index))
return TOSTRING_NULL;
return String.valueOf(this.values[index]);
}
public double min() {
return this.aggregate(MIN).value();
}
public double max() {
return this.aggregate(MAX).value();
}
public double sum() {
return this.aggregate(SUM).value();
}
public double product() {
return this.aggregate(PRODUCT).value();
}
public double mean() {
return this.aggregate(MEAN).value();
}
public double std() {
return this.aggregate(STD).value();
}
public double corr(Series other) {
return corr(this, other);
}
public double cov(Series other) {
return cov(this, other);
}
public DoubleSeries normalize() {
try {
return this.map(new DoubleMapNormalize(this.min(), this.max()));
} catch (Exception e) {
return DoubleSeries.builder().fillValues(this.size(), NULL).build();
}
}
public DoubleSeries zscore() {
try {
return this.map(new DoubleMapZScore(this.mean(), this.std()));
} catch (Exception e) {
return DoubleSeries.builder().fillValues(this.size(), NULL).build();
}
}
public DoubleSeries add(Series other) {
return map(new DoubleFunction() {
@Override
public double apply(double... values) {
return values[0] + values[1];
}
}, this, other);
}
public DoubleSeries add(final double constant) {
if(isNull(constant))
return nulls(this.size());
return this.map(new DoubleFunction() {
@Override
public double apply(double... values) {
return values[0] + constant;
}
});
}
public DoubleSeries subtract(Series other) {
return map(new DoubleFunction() {
@Override
public double apply(double... values) {
return values[0] - values[1];
}
}, this, other);
}
public DoubleSeries subtract(final double constant) {
if(isNull(constant))
return nulls(this.size());
return this.map(new DoubleFunction() {
@Override
public double apply(double... values) {
return values[0] - constant;
}
});
}
public DoubleSeries multiply(Series other) {
return map(new DoubleFunction() {
@Override
public double apply(double... values) {
return values[0] * values[1];
}
}, this, other);
}
public DoubleSeries multiply(final double constant) {
if(isNull(constant))
return nulls(this.size());
return this.map(new DoubleFunction() {
@Override
public double apply(double... values) {
return values[0] * constant;
}
});
}
public DoubleSeries divide(Series other) {
DoubleSeries o = other.getDoubles();
if(o.contains(0.0d))
throw new ArithmeticException("/ by zero");
return map(new DoubleFunction() {
@Override
public double apply(double... values) {
return values[0] / values[1];
}
}, this, o);
}
public DoubleSeries divide(final double constant) {
if(isNull(constant))
return nulls(this.size());
if(constant == 0.0d)
throw new ArithmeticException("/ by zero");
return this.map(new DoubleFunction() {
@Override
public double apply(double... values) {
return values[0] / constant;
}
});
}
public BooleanSeries eq(Series other) {
return map(new DoubleConditional() {
@Override
public boolean apply(double... values) {
return values[0] == values[1];
}
}, this, other);
}
public BooleanSeries eq(final double constant) {
if(isNull(constant))
return BooleanSeries.nulls(this.size());
return this.map(new DoubleConditional() {
@Override
public boolean apply(double... values) {
return values[0] == constant;
}
});
}
public BooleanSeries eq(final double constant, final double epsilon) {
return this.eq(fillValues(this.size(), constant), epsilon);
}
public BooleanSeries eq(Series other, final double epsilon) {
return map(new DoubleConditional() {
@Override
public boolean apply(double... values) {
return values[0] - epsilon <= values[1] && values[0] + epsilon >= values[1];
}
}, this, other);
}
public DoubleSeries set(BooleanSeries where, double value) {
double[] values = new double[this.values.length];
for(int i=0; i<where.size(); i++) {
if(BooleanSeries.isTrue(where.getBoolean(i))) {
values[i] = value;
} else {
values[i] = this.values[i];
}
}
return buildFrom(values);
}
public int count(double value) {
int count = 0;
for(double v : this.values)
if(nullSafeDoubleComparator(v, value) == 0)
count++;
return count;
}
public boolean contains(double value) {
return this.count(value) > 0;
}
public DoubleSeries replace(double find, double by) {
if(isNull(find))
return this.fillNull(by);
return this.set(this.eq(find), by);
}
@Override
public DoubleSeries filter(BooleanSeries filter) {
return this.set(filter.fillNull().not(), NULL);
}
@Override
public DoubleSeries fillNull() {
return this.fillNull(DEFAULT);
}
/**
* Return a copy of the series with all <b>null</b> values replaced by
* {@code value}.
*
* @param value replacement value for <b>null</b>
* @return series copy without nulls
*/
public DoubleSeries fillNull(double value) {
double[] values = Arrays.copyOf(this.values, this.values.length);
for(int i=0; i<values.length; i++) {
if(isNull(values[i])) {
values[i] = value;
}
}
return buildFrom(values);
}
public DoubleSeries fillInfinite(double value) {
double[] values = Arrays.copyOf(this.values, this.values.length);
for(int i=0; i<values.length; i++) {
if(Double.isInfinite(values[i])) {
values[i] = value;
}
}
return buildFrom(values);
}
@Override
DoubleSeries project(int[] fromIndex) {
double[] values = new double[fromIndex.length];
for(int i=0; i<fromIndex.length; i++) {
if(fromIndex[i] == -1) {
values[i] = NULL;
} else {
values[i] = this.values[fromIndex[i]];
}
}
return buildFrom(values);
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
DoubleSeries that = (DoubleSeries) o;
return Arrays.equals(this.values, that.values);
}
@Override
int compare(Series that, int indexThis, int indexThat) {
return nullSafeDoubleComparator(this.values[indexThis], that.getDouble(indexThat));
}
/**
* @see DataFrame#map(Series.Function, Series...)
*/
public static DoubleSeries map(DoubleFunction function, Series... series) {
if(series.length <= 0)
return empty();
DataFrame.assertSameLength(series);
// Note: code-specialization to help hot-spot vm
if(series.length == 1)
return mapUnrolled(function, series[0]);
if(series.length == 2)
return mapUnrolled(function, series[0], series[1]);
if(series.length == 3)
return mapUnrolled(function, series[0], series[1], series[2]);
double[] input = new double[series.length];
double[] output = new double[series[0].size()];
for(int i=0; i<series[0].size(); i++) {
output[i] = mapRow(function, series, input, i);
}
return buildFrom(output);
}
private static double mapRow(DoubleFunction function, Series[] series, double[] input, int row) {
for(int j=0; j<series.length; j++) {
double value = series[j].getDouble(row);
if(isNull(value))
return NULL;
input[j] = value;
}
return function.apply(input);
}
private static DoubleSeries mapUnrolled(DoubleFunction function, Series a) {
double[] output = new double[a.size()];
for(int i=0; i<a.size(); i++) {
if(a.isNull(i)) {
output[i] = NULL;
} else {
output[i] = function.apply(a.getDouble(i));
}
}
return buildFrom(output);
}
private static DoubleSeries mapUnrolled(DoubleFunction function, Series a, Series b) {
double[] output = new double[a.size()];
for(int i=0; i<a.size(); i++) {
if(a.isNull(i) || b.isNull(i)) {
output[i] = NULL;
} else {
output[i] = function.apply(a.getDouble(i), b.getDouble(i));
}
}
return buildFrom(output);
}
private static DoubleSeries mapUnrolled(DoubleFunction function, Series a, Series b, Series c) {
double[] output = new double[a.size()];
for(int i=0; i<a.size(); i++) {
if(a.isNull(i) || b.isNull(i) || c.isNull(i)) {
output[i] = NULL;
} else {
output[i] = function.apply(a.getDouble(i), b.getDouble(i), c.getDouble(i));
}
}
return buildFrom(output);
}
/**
* @see DataFrame#map(Series.Function, Series...)
*/
public static BooleanSeries map(DoubleConditional function, Series... series) {
if(series.length <= 0)
return BooleanSeries.empty();
DataFrame.assertSameLength(series);
double[] input = new double[series.length];
byte[] output = new byte[series[0].size()];
for(int i=0; i<series[0].size(); i++) {
output[i] = mapRow(function, series, input, i);
}
return BooleanSeries.buildFrom(output);
}
private static byte mapRow(DoubleConditional function, Series[] series, double[] input, int row) {
for(int j=0; j<series.length; j++) {
double value = series[j].getDouble(row);
if(isNull(value))
return BooleanSeries.NULL;
input[j] = value;
}
return BooleanSeries.valueOf(function.apply(input));
}
/**
* @see Series#aggregate(Function)
*/
public static DoubleSeries aggregate(DoubleFunction function, Series series) {
if(series.hasNull())
return buildFrom(NULL);
return buildFrom(function.apply(series.getDoubles().values));
}
/**
* @see Series#aggregate(Function)
*/
public static BooleanSeries aggregate(DoubleConditional function, Series series) {
if(series.hasNull())
return BooleanSeries.buildFrom(BooleanSeries.NULL);
return BooleanSeries.builder().addBooleanValues(function.apply(series.getDoubles().values)).build();
}
public static double corr(Series a, Series b) {
if(a.hasNull() || b.hasNull())
return NULL;
return new PearsonsCorrelation().correlation(a.getDoubles().values(), b.getDoubles().values());
}
public static double cov(Series a, Series b) {
if(a.hasNull() || b.hasNull())
return NULL;
return new Covariance().covariance(a.getDoubles().values(), b.getDoubles().values());
}
private static int nullSafeDoubleComparator(double a, double b) {
if(isNull(a) && isNull(b))
return 0;
if(isNull(a))
return -1;
if(isNull(b))
return 1;
return Double.compare(a, b);
}
@Override
public int hashCode() {
return Arrays.hashCode(this.values);
}
public static double valueOf(Double value) {
if(value == null)
return NULL;
return value;
}
public static boolean isNull(double value) {
return Double.isNaN(value);
}
private static double[] assertNotEmpty(double[] values) {
if(values.length <= 0)
throw new IllegalStateException("Must contain at least one value");
return values;
}
@Override
public DoubleSeries shift(int offset) {
double[] values = new double[this.values.length];
if(offset >= 0) {
Arrays.fill(values, 0, Math.min(offset, values.length), NULL);
System.arraycopy(this.values, 0, values, Math.min(offset, values.length), Math.max(values.length - offset, 0));
} else {
System.arraycopy(this.values, Math.min(-offset, values.length), values, 0, Math.max(values.length + offset, 0));
Arrays.fill(values, Math.max(values.length + offset, 0), values.length, NULL);
}
return buildFrom(values);
}
@Override
public DoubleSeries sorted() {
double[] values = Arrays.copyOf(this.values, this.values.length);
Arrays.sort(values);
// order NaNs first
int count = 0;
while(count < values.length && isNull(values[values.length - count - 1]))
count++;
if(count <= 0 || count >= values.length)
return buildFrom(values);
double[] newValues = new double[values.length];
Arrays.fill(newValues, 0, count, Double.NaN);
System.arraycopy(values, 0, newValues, count, values.length - count);
return buildFrom(newValues);
}
@Override
int[] sortedIndex() {
List<DoubleSortTuple> tuples = new ArrayList<>();
for (int i = 0; i < this.values.length; i++) {
tuples.add(new DoubleSortTuple(this.values[i], i));
}
Collections.sort(tuples, new Comparator<DoubleSortTuple>() {
@Override
public int compare(DoubleSortTuple a, DoubleSortTuple b) {
return nullSafeDoubleComparator(a.value, b.value);
}
});
int[] fromIndex = new int[tuples.size()];
for (int i = 0; i < tuples.size(); i++) {
fromIndex[i] = tuples.get(i).index;
}
return fromIndex;
}
static final class DoubleSortTuple {
final double value;
final int index;
DoubleSortTuple(double value, int index) {
this.value = value;
this.index = index;
}
}
}