package com.linkedin.thirdeye.dataframe;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import org.apache.commons.lang.math.NumberUtils;
/**
* Series container for String objects.
*/
public final class StringSeries extends TypedSeries<StringSeries> {
public static final String NULL = null;
public static final String DEFAULT = "";
public static final StringFunction CONCAT = new StringConcat();
public static final StringFunction FIRST = new StringFirst();
public static final StringFunction LAST = new StringLast();
public static final class StringConcat implements StringFunction {
final String delimiter;
public StringConcat() {
this.delimiter = "";
}
public StringConcat(String delimiter) {
this.delimiter = delimiter;
}
@Override
public String apply(String[] values) {
if(values.length <= 0)
return "";
StringBuilder builder = new StringBuilder();
for(int i=0; i<values.length - 1; i++) {
builder.append(values[i]);
builder.append(this.delimiter);
}
builder.append(values[values.length - 1]);
return builder.toString();
}
}
public static final class StringFirst implements StringFunction {
@Override
public String apply(String[] values) {
if(values.length <= 0)
return NULL;
return values[0];
}
}
public static final class StringLast implements StringFunction {
@Override
public String apply(String[] values) {
if(values.length <= 0)
return NULL;
return values[values.length-1];
}
}
public static class Builder extends Series.Builder {
final List<String> values = new ArrayList<>();
private Builder() {
// left blank
}
public Builder addValues(Collection<String> values) {
this.values.addAll(values);
return this;
}
public Builder addValues(String... values) {
return this.addValues(Arrays.asList(values));
}
@Override
public Builder addSeries(Collection<Series> series) {
for(Series s : series)
this.addValues(s.getStrings().values);
return this;
}
public Builder fillValues(int count, String value) {
String[] values = new String[count];
Arrays.fill(values, value);
return this.addValues(values);
}
@Override
public StringSeries build() {
return StringSeries.buildFrom(this.values.toArray(new String[this.values.size()]));
}
}
public static Builder builder() {
return new Builder();
}
public static StringSeries buildFrom(String... values) {
return new StringSeries(values);
}
public static StringSeries empty() {
return new StringSeries();
}
public static StringSeries nulls(int size) {
return builder().fillValues(size, NULL).build();
}
public static StringSeries fillValues(int size, String value) {
return builder().fillValues(size, value).build();
}
// CAUTION: The array is final, but values are inherently modifiable
final String[] values;
private StringSeries(String... values) {
this.values = values;
}
@Override
public Builder getBuilder() {
return new Builder();
}
@Override
public StringSeries getStrings() {
return this;
}
@Override
public double getDouble(int index) {
return getDouble(this.values[index]);
}
public static double getDouble(String value) {
if(StringSeries.isNull(value) || value.length() <= 0)
return DoubleSeries.NULL;
return Double.parseDouble(value);
}
@Override
public long getLong(int index) {
return getLong(this.values[index]);
}
public static long getLong(String value) {
if(StringSeries.isNull(value) || value.length() <= 0)
return LongSeries.NULL;
try {
return Long.parseLong(value);
} catch (NumberFormatException e) {
return (long) Double.parseDouble(value);
}
}
@Override
public byte getBoolean(int index) {
return getBoolean(this.values[index]);
}
public static byte getBoolean(String value) {
if(StringSeries.isNull(value) || value.length() <= 0)
return BooleanSeries.NULL;
if(NumberUtils.isNumber(value))
return BooleanSeries.valueOf(Double.parseDouble(value) != 0.0d);
return BooleanSeries.valueOf(Boolean.parseBoolean(value));
}
@Override
public String getString(int index) {
return getString(this.values[index]);
}
public static String getString(String string) {
return string;
}
@Override
public boolean isNull(int index) {
return isNull(this.values[index]);
}
@Override
public int size() {
return this.values.length;
}
@Override
public SeriesType type() {
return SeriesType.STRING;
}
public String[] values() {
return this.values;
}
public String value() {
if(this.size() != 1)
throw new IllegalStateException("Series must contain exactly one element");
return this.values[0];
}
@Override
public StringSeries unique() {
Set<String> uniques = new HashSet<>(Arrays.asList(this.values));
String[] values = new String[uniques.size()];
return StringSeries.buildFrom(uniques.toArray(values));
}
/**
* Returns the contents of the series wrapped as list.
*
* @return list of series elements
*/
public List<String> toList() {
return Arrays.asList(this.values);
}
/**
* Attempts to infer a tighter native series type based on pattern matching
* against individual values in the series.
*
* @return inferred series type
*/
public SeriesType inferType() {
if(this.isEmpty())
return SeriesType.STRING;
boolean isBoolean = true;
boolean isLong = true;
boolean isDouble = true;
for(String s : this.values) {
isBoolean &= (s == null) || (s.length() <= 0) || (s.compareToIgnoreCase("true") == 0 || s.compareToIgnoreCase("false") == 0);
isLong &= (s == null) || (s.length() <= 0) || (NumberUtils.isNumber(s) && !s.contains(".") && !s.contains("e"));
isDouble &= (s == null) || (s.length() <= 0) || NumberUtils.isNumber(s);
}
if(isBoolean)
return SeriesType.BOOLEAN;
if(isLong)
return SeriesType.LONG;
if(isDouble)
return SeriesType.DOUBLE;
return SeriesType.STRING;
}
/**
* Attempts to infer a tighter native series type based on pattern matching against individual
* values in the series. Returns a copy of the series with the inferred type.
*
* @return series copy of inferred type
*/
public Series toInferredType() {
return this.get(this.inferType());
}
/**
* Returns the value of the first element in the series
*
* @throws IllegalStateException if the series is empty
* @return first element in the series
*/
public String first() {
assertNotEmpty(this.values);
return this.values[0];
}
/**
* Returns the value of the last element in the series
*
* @throws IllegalStateException if the series is empty
* @return last element in the series
*/
public String last() {
assertNotEmpty(this.values);
return this.values[this.values.length-1];
}
@Override
public StringSeries slice(int from, int to) {
return StringSeries.buildFrom(Arrays.copyOfRange(this.values, from, to));
}
public String join() {
return this.aggregate(CONCAT).value();
}
public String join(String delimiter) {
return this.aggregate(new StringConcat(delimiter)).value();
}
public StringSeries concat(final String constant) {
if(isNull(constant))
return nulls(this.size());
return this.map(new StringFunction() {
@Override
public String apply(String... values) {
return values[0] + constant;
}
});
}
public StringSeries concat(Series other) {
return map(new StringFunction() {
@Override
public String apply(String... values) {
return values[0] + values[1];
}
}, this, other);
}
public BooleanSeries eq(final String constant) {
if(isNull(constant))
return BooleanSeries.nulls(this.size());
return this.map(new StringConditional() {
@Override
public boolean apply(String... values) {
return constant.equals(values[0]);
}
});
}
public BooleanSeries eq(Series other) {
return map(new StringConditional() {
@Override
public boolean apply(String... values) {
return values[1].equals(values[0]);
}
}, this, other);
}
public StringSeries set(BooleanSeries where, String value) {
String[] values = new String[this.values.length];
for(int i=0; i<where.size(); i++) {
if(BooleanSeries.isTrue(where.getBoolean(i))) {
values[i] = value;
} else {
values[i] = this.values[i];
}
}
return buildFrom(values);
}
public int count(String value) {
int count = 0;
for(String v : this.values)
if(nullSafeStringComparator(v, value) == 0)
count++;
return count;
}
public boolean contains(String value) {
return this.count(value) > 0;
}
public StringSeries replace(String find, String by) {
if(isNull(find))
return this.fillNull(by);
return this.set(this.eq(find), by);
}
@Override
public StringSeries filter(BooleanSeries filter) {
return this.set(filter.fillNull().not(), NULL);
}
@Override
public String toString() {
StringBuilder builder = new StringBuilder();
builder.append("StringSeries{");
for(String s : this.values) {
if(isNull(s)) {
builder.append("null ");
} else {
builder.append("'");
builder.append(s);
builder.append("' ");
}
}
builder.append("}");
return builder.toString();
}
@Override
public String toString(int index) {
if(this.isNull(index))
return TOSTRING_NULL;
return this.values[index];
}
@Override
public StringSeries fillNull() {
return this.fillNull(DEFAULT);
}
/**
* Return a copy of the series with all <b>null</b> values replaced by
* {@code value}.
*
* @param value replacement value for <b>null</b>
* @return series copy without nulls
*/
public StringSeries fillNull(String value) {
String[] values = Arrays.copyOf(this.values, this.values.length);
for(int i=0; i<values.length; i++) {
if(isNull(values[i])) {
values[i] = value;
}
}
return buildFrom(values);
}
@Override
StringSeries project(int[] fromIndex) {
String[] values = new String[fromIndex.length];
for(int i=0; i<fromIndex.length; i++) {
if(fromIndex[i] == -1) {
values[i] = NULL;
} else {
values[i] = this.values[fromIndex[i]];
}
}
return StringSeries.buildFrom(values);
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
StringSeries that = (StringSeries) o;
return Arrays.equals(this.values, that.values);
}
@Override
int compare(Series that, int indexThis, int indexThat) {
return nullSafeStringComparator(this.values[indexThis], that.getString(indexThat));
}
@Override
public int hashCode() {
return Arrays.hashCode(this.values);
}
/**
* @see DataFrame#map(Series.Function, Series...)
*/
public static StringSeries map(StringFunction function, Series... series) {
if(series.length <= 0)
return empty();
DataFrame.assertSameLength(series);
// Note: code-specialization to help hot-spot vm
if(series.length == 1)
return mapUnrolled(function, series[0]);
if(series.length == 2)
return mapUnrolled(function, series[0], series[1]);
if(series.length == 3)
return mapUnrolled(function, series[0], series[1], series[2]);
String[] input = new String[series.length];
String[] output = new String[series[0].size()];
for(int i=0; i<series[0].size(); i++) {
output[i] = mapRow(function, series, input, i);
}
return buildFrom(output);
}
private static String mapRow(StringFunction function, Series[] series, String[] input, int row) {
for(int j=0; j<series.length; j++) {
String value = series[j].getString(row);
if(isNull(value))
return NULL;
input[j] = value;
}
return function.apply(input);
}
private static StringSeries mapUnrolled(StringFunction function, Series a) {
String[] output = new String[a.size()];
for(int i=0; i<a.size(); i++) {
if(a.isNull(i)) {
output[i] = NULL;
} else {
output[i] = function.apply(a.getString(i));
}
}
return buildFrom(output);
}
private static StringSeries mapUnrolled(StringFunction function, Series a, Series b) {
String[] output = new String[a.size()];
for(int i=0; i<a.size(); i++) {
if(a.isNull(i) || b.isNull(i)) {
output[i] = NULL;
} else {
output[i] = function.apply(a.getString(i), b.getString(i));
}
}
return buildFrom(output);
}
private static StringSeries mapUnrolled(StringFunction function, Series a, Series b, Series c) {
String[] output = new String[a.size()];
for(int i=0; i<a.size(); i++) {
if(a.isNull(i) || b.isNull(i) || c.isNull(i)) {
output[i] = NULL;
} else {
output[i] = function.apply(a.getString(i), b.getString(i), c.getString(i));
}
}
return buildFrom(output);
}
/**
* @see DataFrame#map(Series.Function, Series...)
*/
public static BooleanSeries map(StringConditional function, Series... series) {
if(series.length <= 0)
return BooleanSeries.empty();
DataFrame.assertSameLength(series);
String[] input = new String[series.length];
byte[] output = new byte[series[0].size()];
for(int i=0; i<series[0].size(); i++) {
output[i] = mapRow(function, series, input, i);
}
return BooleanSeries.buildFrom(output);
}
private static byte mapRow(StringConditional function, Series[] series, String[] input, int row) {
for(int j=0; j<series.length; j++) {
String value = series[j].getString(row);
if(isNull(value))
return BooleanSeries.NULL;
input[j] = value;
}
return BooleanSeries.valueOf(function.apply(input));
}
/**
* @see Series#aggregate(Function)
*/
public static StringSeries aggregate(StringFunction function, Series series) {
if(series.hasNull())
return buildFrom(NULL);
return buildFrom(function.apply(series.getStrings().values));
}
/**
* @see Series#aggregate(Function)
*/
public static BooleanSeries aggregate(StringConditional function, Series series) {
if(series.hasNull())
return BooleanSeries.buildFrom(BooleanSeries.NULL);
return BooleanSeries.builder().addBooleanValues(function.apply(series.getStrings().values)).build();
}
public static boolean isNull(String value) {
return Objects.equals(value, NULL);
}
private static int nullSafeStringComparator(String a, String b) {
if (isNull(a) && isNull(b))
return 0;
if (isNull(a))
return -1;
if (isNull(b))
return 1;
return a.compareTo(b);
}
private static String[] assertNotEmpty(String[] values) {
if(values.length <= 0)
throw new IllegalStateException("Must contain at least one value");
return values;
}
@Override
public StringSeries shift(int offset) {
String[] values = new String[this.values.length];
if(offset >= 0) {
Arrays.fill(values, 0, Math.min(offset, values.length), NULL);
System.arraycopy(this.values, 0, values, Math.min(offset, values.length), Math.max(values.length - offset, 0));
} else {
System.arraycopy(this.values, Math.min(-offset, values.length), values, 0, Math.max(values.length + offset, 0));
Arrays.fill(values, Math.max(values.length + offset, 0), values.length, NULL);
}
return buildFrom(values);
}
@Override
public StringSeries sorted() {
String[] values = Arrays.copyOf(this.values, this.values.length);
Arrays.sort(values, new Comparator<String>() {
@Override
public int compare(String a, String b) {
return nullSafeStringComparator(a, b);
}
});
return buildFrom(values);
}
@Override
int[] sortedIndex() {
List<StringSortTuple> tuples = new ArrayList<>();
for (int i = 0; i < this.values.length; i++) {
tuples.add(new StringSortTuple(this.values[i], i));
}
Collections.sort(tuples, new Comparator<StringSortTuple>() {
@Override
public int compare(StringSortTuple a, StringSortTuple b) {
return nullSafeStringComparator(a.value, b.value);
}
});
int[] fromIndex = new int[tuples.size()];
for (int i = 0; i < tuples.size(); i++) {
fromIndex[i] = tuples.get(i).index;
}
return fromIndex;
}
static final class StringSortTuple {
final String value;
final int index;
StringSortTuple(String value, int index) {
this.value = value;
this.index = index;
}
}
}