DataFrame.java example

Explorer
pinot-master
package com.linkedin.thirdeye.dataframe;

import com.linkedin.pinot.client.ResultSet;
import com.linkedin.pinot.client.ResultSetGroup;
import com.udojava.evalex.Expression;
import java.io.IOException;
import java.io.Reader;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVRecord;


/**
 * Container class for a data frame with multiple typed series with equivalent row count.
 */
public class DataFrame {
  public static Pattern SERIES_NAME_PATTERN = Pattern.compile("([A-Za-z_]\\w*)");

  public static final String COLUMN_INDEX_DEFAULT = "index";
  public static final String COLUMN_JOIN_LEFT = "_left";
  public static final String COLUMN_JOIN_RIGHT = "_right";
  public static final int DEFAULT_MAX_COLUMN_WIDTH = 30;

  /**
   * Strategy interface for resampling series with different native types with a common
   * strategy.
   */
  public interface ResamplingStrategy {
    DataFrame apply(Series.SeriesGrouping grouping, Series s);
  }

  /**
   * Resampling by last value in each grouped interval
   */
  public static final class ResampleLast implements ResamplingStrategy {
    @Override
    public DataFrame apply(Series.SeriesGrouping grouping, Series s) {
      switch(s.type()) {
        case DOUBLE:
          return grouping.applyTo(s).aggregate(new DoubleSeries.DoubleLast());
        case LONG:
          return grouping.applyTo(s).aggregate(new LongSeries.LongLast());
        case STRING:
          return grouping.applyTo(s).aggregate(new StringSeries.StringLast());
        case BOOLEAN:
          return grouping.applyTo(s).aggregate(new BooleanSeries.BooleanLast());
        default:
          throw new IllegalArgumentException(String.format("Cannot resample series type '%s'", s.type()));
      }
    }
  }

  /**
   * Container object for the grouping of multiple rows across different series
   * based on a common key.
   */
  public static final class DataFrameGrouping {
    final String keyName;
    final Series keys;
    final List<Series.Bucket> buckets;
    final DataFrame source;

    DataFrameGrouping(String keyName, Series keys, DataFrame source, List<Series.Bucket> buckets) {
      this.keyName = keyName;
      this.keys = keys;
      this.buckets = buckets;
      this.source = source;
    }

    DataFrameGrouping(Series keys, DataFrame source, List<Series.Bucket> buckets) {
      this(Series.GROUP_KEY, keys, source, buckets);
    }

    public int size() {
      return this.keys.size();
    }

    public DataFrame source() {
      return this.source;
    }

    public boolean isEmpty() {
      return this.keys.isEmpty();
    }

    public Series.SeriesGrouping get(String seriesName) {
      return new Series.SeriesGrouping(this.keys, this.source.get(seriesName), this.buckets);
    }

    public DataFrame aggregate(String seriesName, Series.Function function) {
      return this.get(seriesName).aggregate(function)
          .renameSeries(Series.GROUP_KEY, this.keyName)
          .renameSeries(Series.GROUP_VALUE, seriesName)
          .setIndex(this.keyName);
    }
  }

  /**
   * Builder for DataFrame in row-by-row sequence. Constructs each column as a StringSeries
   * and attempts to infer a tighter native type on completion.
   */
  public static final class Builder {
    final List<String> seriesNames;
    final List<Object[]> rows = new ArrayList<>();

    Builder(List<String> seriesNames) {
      this.seriesNames = seriesNames;
    }

    public Builder append(Collection<Object[]> rows) {
      for(Object[] row : rows) {
        if (row.length != this.seriesNames.size())
          throw new IllegalArgumentException(String.format("Expected %d values, but got %d", seriesNames.size(), row.length));
        this.rows.add(row);
      }
      return this;
    }

    public Builder append(Object[]... rows) {
      return this.append(Arrays.asList(rows));
    }

    public Builder append(Object... row) {
      return this.append(Collections.singleton(row));
    }

    public DataFrame build() {
      DataFrame df = new DataFrame();

      // infer column types
      for(int i=0; i<seriesNames.size(); i++) {
        String rawName = seriesNames.get(i);

        boolean isDynamicType = true;
        String name = rawName;
        Series.SeriesType type = Series.SeriesType.STRING;

        String[] parts = rawName.split(":", 2);
        if(parts.length == 2) {
          name = parts[0];
          type = Series.SeriesType.valueOf(parts[1].toUpperCase());
          isDynamicType = false;
        }

        Series series = buildSeries(type, i);

        // infer type if not provided
        if(isDynamicType) {
          series = series.get(((StringSeries) series).inferType());
        }

        df.addSeries(name, series);
      }

      return df;
    }

    private Series buildSeries(Series.SeriesType type, int columnIndex) {
      switch (type) {
        case DOUBLE:
          return buildDoubleSeries(columnIndex);
        case LONG:
          return buildLongSeries(columnIndex);
        case STRING:
          return buildStringSeries(columnIndex);
        case BOOLEAN:
          return buildBooleanSeries(columnIndex);
        default:
          throw new IllegalArgumentException(String.format("Unknown series type '%s'", type));
      }
    }

    // TODO implement ObjectSeries
    private DoubleSeries buildDoubleSeries(int columnIndex) {
      double[] values = new double[this.rows.size()];
      int i = 0;
      for(Object[] r : this.rows) {
        values[i++] = toDouble(r[columnIndex]);
      }
      return DoubleSeries.buildFrom(values);
    }

    private static double toDouble(Object o) {
      if(o == null)
        return DoubleSeries.NULL;
      if(o instanceof Number)
        return ((Number)o).doubleValue();
      return StringSeries.getDouble(o.toString());
    }

    private LongSeries buildLongSeries(int columnIndex) {
      long[] values = new long[this.rows.size()];
      int i = 0;
      for(Object[] r : this.rows) {
        values[i++] = toLong(r[columnIndex]);
      }
      return LongSeries.buildFrom(values);
    }

    private static long toLong(Object o) {
      if(o == null)
        return LongSeries.NULL;
      if(o instanceof Number)
        return ((Number)o).longValue();
      return StringSeries.getLong(o.toString());
    }

    private StringSeries buildStringSeries(int columnIndex) {
      String[] values = new String[this.rows.size()];
      int i = 0;
      for(Object[] r : this.rows) {
        values[i++] = toString(r[columnIndex]);
      }
      return StringSeries.buildFrom(values);
    }

    private static String toString(Object o) {
      if(o == null)
        return StringSeries.NULL;
      return StringSeries.getString(o.toString());
    }

    private BooleanSeries buildBooleanSeries(int columnIndex) {
      byte[] values = new byte[this.rows.size()];
      int i = 0;
      for(Object[] r : this.rows) {
        values[i++] = toBoolean(r[columnIndex]);
      }
      return BooleanSeries.buildFrom(values);
    }

    private static byte toBoolean(Object o) {
      if(o == null)
        return BooleanSeries.NULL;
      if(o instanceof Number)
        return BooleanSeries.valueOf(((Number)o).doubleValue() != 0.0d);
      return StringSeries.getBoolean(o.toString());
    }
  }

  String indexName = null;
  Map<String, Series> series = new HashMap<>();

  /**
   * Returns a DoubleSeries wrapping the values array
   *
   * @param values base array
   * @return LongSeries wrapping the array
   */
  public static DoubleSeries toSeries(double... values) {
    return DoubleSeries.buildFrom(values);
  }

  /**
   * Returns a LongSeries wrapping the values array
   *
   * @param values base array
   * @return LongSeries wrapping the array
   */
  public static LongSeries toSeries(long... values) {
    return LongSeries.buildFrom(values);
  }

  /**
   * Returns a StringSeries wrapping the values array
   *
   * @param values base array
   * @return StringSeries wrapping the array
   */
  public static StringSeries toSeries(String... values) {
    return StringSeries.buildFrom(values);
  }

  /**
   * Returns a BooleanSeries wrapping the values array
   *
   * @param values base array
   * @return BooleanSeries wrapping the array
   */
  public static BooleanSeries toSeries(byte... values) {
    return BooleanSeries.buildFrom(values);
  }

  /**
   * Returns a BooleanSeries wrapping the values array (as converted to byte)
   *
   * @param values base array
   * @return BooleanSeries wrapping the array
   */
  public static BooleanSeries toSeries(boolean... values) {
    return BooleanSeries.builder().addBooleanValues(values).build();
  }

  /**
   * Returns a builder instance for DataFrame
   *
   * @param seriesNames series names of the DataFrame
   * @return FDataFrame builder
   */
  public static Builder builder(String... seriesNames) {
    return new Builder(Arrays.asList(seriesNames));
  }

  /**
   * Returns a builder instance for DataFrame
   *
   * @param seriesNames series names of the DataFrame
   * @return FDataFrame builder
   */
  public static Builder builder(List<String> seriesNames) {
    return new Builder(seriesNames);
  }

  /**
   * Returns a builder instance for DoubleSeries
   *
   * @return DoubleSeries builder
   */
  public static DoubleSeries.Builder buildDoubles() {
    return DoubleSeries.builder();
  }

  /**
   * Returns a builder instance for LongSeries
   *
   * @return LongSeries builder
   */
  public static LongSeries.Builder buildLongs() {
    return LongSeries.builder();
  }

  /**
   * Returns a builder instance for StringSeries
   *
   * @return StringSeries builder
   */
  public static StringSeries.Builder buildStrings() {
    return StringSeries.builder();
  }

  /**
   * Returns a builder instance for BooleanSeries
   *
   * @return BooleanSeries builder
   */
  public static BooleanSeries.Builder buildBooleans() {
    return BooleanSeries.builder();
  }

  /**
   * Creates a new DataFrame with a column "index" (as determined by {@code COLUMN_INDEX_DEFAULT}) with
   * length {@code defaultIndexSize}, ranging from 0 to {@code defaultIndexSize - 1}.
   *
   * @param defaultIndexSize index column size
   */
  public DataFrame(int defaultIndexSize) {
    long[] indexValues = new long[defaultIndexSize];
    for(int i=0; i<defaultIndexSize; i++) {
      indexValues[i] = i;
    }
    this.addSeries(COLUMN_INDEX_DEFAULT, LongSeries.buildFrom(indexValues));
    this.indexName = COLUMN_INDEX_DEFAULT;
  }

  /**
   * Creates a new DataFrame with a column "index" (as determined by {@code COLUMN_INDEX_DEFAULT}) that
   * wraps the array {@code indexValues}.
   *
   * @param indexValues index values
   */
  public DataFrame(long... indexValues) {
    this.addSeries(COLUMN_INDEX_DEFAULT, LongSeries.buildFrom(indexValues));
    this.indexName = COLUMN_INDEX_DEFAULT;
  }

  /**
   * Creates a new DataFrame with a column "index" (as determined by {@code COLUMN_INDEX_DEFAULT}) referencing
   * the Series {@code index}.
   *
   * @param index index series
   */
  public DataFrame(Series index) {
    this.addSeries(COLUMN_INDEX_DEFAULT, index);
    this.indexName = COLUMN_INDEX_DEFAULT;
  }

  /**
   * Creates a new DataFrame that copies the properties of {@code df}.
   *
   * <br/><b>NOTE:</b> the copy is shallow, i.e. the contained series are not copied but referenced.
   *
   * @param df DataFrame to copy properties from
   */
  public DataFrame(DataFrame df) {
    this.indexName = df.indexName;
    this.series = new HashMap<>(df.series);
  }

  /**
   * Creates a new DataFrame without any columns. The row count of the DataFrame is determined
   * by the first series added.
   */
  public DataFrame() {
    // left blank
  }

  /**
   * Sets the index name to the specified series name in-place.
   *
   * @param seriesName index series name
   * @throws IllegalArgumentException if the series does not exist
   * @return reference to the modified DataFrame (this)
   */
  public DataFrame setIndex(String seriesName) {
    assertSeriesExists(seriesName);
    this.indexName = seriesName;
    return this;
  }

  /**
   * Resets the index name to {@code null} in-place.
   *
   * @return reference to the modified DataFrame (this)
   */
  public DataFrame resetIndex() {
    this.indexName = null;
    return this;
  }

  /**
   * Returns the series referenced by indexName.
   *
   * @throws IllegalArgumentException if the series does not exist
   * @return index series
   */
  public Series getIndex() {
    return assertSeriesExists(this.indexName);
  }

  /**
   * Returns {@code true} if a valid index name is set. Otherwise, returns {@code false}.
   *
   * @return {@code true} if a valid index name is set, {@code false} otherwise
   */
  public boolean hasIndex() {
    return this.indexName != null;
  }

  /**
   * Returns the series name of the index, or {@code null} if no index name is set.
   *
   * @return index series name
   */
  public String getIndexName() {
    return this.indexName;
  }

  /**
   * Returns the row count of the DataFrame
   *
   * @return row count
   */
  public int size() {
    if(this.series.isEmpty())
      return 0;
    return this.series.values().iterator().next().size();
  }

  /**
   * Returns a copy of the DataFrame sliced from index {@code from} (inclusive) to index {@code to}
   * (exclusive).
   *
   * @param from start index (inclusive), must be >= 0
   * @param to end index (exclusive), must be <= size
   * @return sliced DataFrame copy
   */
  public DataFrame slice(int from, int to) {
    DataFrame df = new DataFrame(this);
    df.series.clear();
    for(Map.Entry<String, Series> e : this.series.entrySet()) {
      df.addSeries(e.getKey(), e.getValue().slice(from, to));
    }
    return df;
  }

  /**
   * Returns a copy of the DataFrame omitting any elements before index {@code n}.
   * If {@code n} is {@code 0}, the entire DataFrame is returned. If {@code n} is greater than
   * the DataFrame size, an empty DataFrame is returned.
   *
   * @param from start index of copy (inclusive)
   * @return DataFrame copy with elements from index {@code from}.
   */
  public DataFrame sliceFrom(int from) {
    return this.slice(from, this.size());
  }

  /**
   * Returns a copy of the DataFrame omitting any elements equal to or after index {@code n}.
   * If {@code n} is equal or greater than the DataFrame size, the entire series is returned.
   * If {@code n} is {@code 0}, an empty DataFrame is returned.
   *
   * @param to end index of copy (exclusive)
   * @return DataFrame copy with elements before from index {@code from}.
   */
  public DataFrame sliceTo(int to) {
    return this.slice(0, to);
  }

  /**
   * Returns a copy of the DataFrame containing (up to) {@code n} first rows.
   *
   * @param n number of rows to include
   * @return DataFrame copy with first {@code n} rows
   */
  public DataFrame head(int n) {
    return this.slice(0, n);
  }

  /**
   * Returns a copy of the DataFrame containing (up to) {@code n} last rows.
   *
   * @param n number of rows to include
   * @return DataFrame copy with last {@code n} rows
   */
  public DataFrame tail(int n) {
    return this.slice(this.size() - n, this.size());
  }

  /**
   * Returns {@code true} is the DataFrame does not hold any rows. Otherwise, returns {@code false}.
   *
   * @return {@code true} is empty, {@code false} otherwise.
   */
  public boolean isEmpty() {
    return this.size() <= 0;
  }

  /**
   * Returns a deep copy of the DataFrame. Duplicates each series as well as the DataFrame itself.
   * <br/><b>NOTE:</b> use caution when applying this to large DataFrames.
   *
   * @return deep copy of DataFrame
   */
  public DataFrame copy() {
    DataFrame df = new DataFrame(this);
    for(Map.Entry<String, Series> e : this.series.entrySet()) {
      df.addSeries(e.getKey(), e.getValue().copy());
    }
    return df;
  }

  /**
   * Adds a new series to the DataFrame in-place. The new series must have the same row count
   * as the DataFrame. If this is the first series added to an empty DataFrame, it determines
   * the DataFrame size. Further, {@code seriesName} must match the pattern {@code SERIES_NAME_PATTERN}.
   * If a series with {@code seriesName} already exists in the DataFrame it is replaced by
   * {@code series}.
   *
   * @param seriesName series name
   * @param series series
   * @throws IllegalArgumentException if the series does not have the same size or the series name does not match the pattern
   * @return reference to the modified DataFrame (this)
   */
  public DataFrame addSeries(String seriesName, Series series) {
    if(seriesName == null || !SERIES_NAME_PATTERN.matcher(seriesName).matches())
      throw new IllegalArgumentException(String.format("Series name must match pattern '%s'", SERIES_NAME_PATTERN));
    if(!this.series.isEmpty() && series.size() != this.size())
      throw new IllegalArgumentException("DataFrame index and series must be of same length");
    this.series.put(seriesName, series);
    return this;
  }

  /**
   * Adds a new series to the DataFrame in-place. Wraps {@code values} with a series before adding
   * it to the DataFrame with semantics similar to {@code addSeries(String seriesName, Series series)}
   *
   * @param seriesName series name
   * @param values series
   * @return reference to the modified DataFrame (this)
   */
  public DataFrame addSeries(String seriesName, double... values) {
    return addSeries(seriesName, DataFrame.toSeries(values));
  }

  /**
   * Adds a new series to the DataFrame in-place. Wraps {@code values} with a series before adding
   * it to the DataFrame with semantics similar to {@code addSeries(String seriesName, Series series)}
   *
   * @param seriesName series name
   * @param values series
   * @return reference to the modified DataFrame (this)
   */
  public DataFrame addSeries(String seriesName, long... values) {
    return addSeries(seriesName, DataFrame.toSeries(values));
  }

  /**
   * Adds a new series to the DataFrame in-place. Wraps {@code values} with a series before adding
   * it to the DataFrame with semantics similar to {@code addSeries(String seriesName, Series series)}
   *
   * @param seriesName series name
   * @param values series
   * @return reference to the modified DataFrame (this)
   */
  public DataFrame addSeries(String seriesName, String... values) {
    return addSeries(seriesName, DataFrame.toSeries(values));
  }

  /**
   * Adds a new series to the DataFrame in-place. Wraps {@code values} with a series before adding
   * it to the DataFrame with semantics similar to {@code addSeries(String seriesName, Series series)}
   *
   * @param seriesName series name
   * @param values series
   * @return reference to the modified DataFrame (this)
   */
  public DataFrame addSeries(String seriesName, byte... values) {
    return addSeries(seriesName, DataFrame.toSeries(values));
  }

  /**
   * Adds a new series to the DataFrame in-place. Wraps {@code values} with a series before adding
   * it to the DataFrame with semantics similar to {@code addSeries(String seriesName, Series series)}
   *
   * @param seriesName series name
   * @param values series
   * @return reference to the modified DataFrame (this)
   */
  public DataFrame addSeries(String seriesName, boolean... values) {
    return addSeries(seriesName, DataFrame.toSeries(values));
  }

  /**
   * Removes a series from the DataFrame in-place.
   *
   * @param seriesName
   * @throws IllegalArgumentException if the series does not exist
   * @return reference to the modified DataFrame (this)
   */
  public DataFrame dropSeries(String seriesName) {
    assertSeriesExists(seriesName);
    this.series.remove(seriesName);
    if(seriesName.equals(this.indexName))
      this.indexName = null;
    return this;
  }

  /**
   * Renames a series in the DataFrame in-place. If a series with name {@code newName} already
   * exists it is replaced by the series referenced by {@code oldName}.
   *
   * @param oldName name of existing series
   * @param newName new name of series
   * @throws IllegalArgumentException if the series referenced by {@code oldName} does not exist
   * @return reference to the modified DataFrame (this)
   */
  public DataFrame renameSeries(String oldName, String newName) {
    Series s = assertSeriesExists(oldName);
    String indexName = this.indexName;

    this.dropSeries(oldName).addSeries(newName, s);

    if(oldName.equals(indexName))
      this.indexName = newName;
    return this;
  }

  /**
   * Converts a series in the DataFrame to a new type. The DataFrame is modified in-place, but
   * the series is allocated new memory.
   *
   * @param seriesName name of existing series
   * @param type new native type of series
   * @throws IllegalArgumentException if the series does not exist
   * @return reference to the modified DataFrame (this)
   */
  public DataFrame convertSeries(String seriesName, Series.SeriesType type) {
    this.series.put(seriesName, assertSeriesExists(seriesName).get(type));
    return this;
  }

  /**
   * Returns the set of names of series contained in the DataFrame.
   *
   * @return series names
   */
  public Set<String> getSeriesNames() {
    return Collections.unmodifiableSet(this.series.keySet());
  }

  /**
   * Returns a copy of the mapping of series names to series encapsulated by this DataFrame
   *
   * @return series mapping
   */
  public Map<String, Series> getSeries() {
    return Collections.unmodifiableMap(this.series);
  }

  /**
   * Returns the series referenced by {@code seriesName}.
   *
   * @param seriesName series name
   * @throws IllegalArgumentException if the series does not exist
   * @return series
   */
  public Series get(String seriesName) {
    return assertSeriesExists(seriesName);
  }

  /**
   * Returns the series referenced by {@code seriesNames}.
   *
   * @param seriesNames series names
   * @throws IllegalArgumentException if any one series does not exist
   * @return series array
   */
  public Series[] get(String... seriesNames) {
    Series[] series = new Series[seriesNames.length];
    int i = 0;
    for(String name : seriesNames) {
      series[i++] = assertSeriesExists(name);
    }
    return series;
  }

  /**
   * Returns {@code true} if the DataFrame contains a series {@code seriesName}. Otherwise,
   * return {@code false}.
   *
   * @param seriesName series name
   * @return {@code true} if series exists, {@code false} otherwise.
   */
  public boolean contains(String seriesName) {
    return this.series.containsKey(seriesName);
  }

  /**
   * Returns the series referenced by {@code seriesName}. If the series' native type is not
   * {@code DoubleSeries} it is converted transparently.
   *
   * @param seriesName series name
   * @throws IllegalArgumentException if the series does not exist
   * @return DoubleSeries
   */
  public DoubleSeries getDoubles(String seriesName) {
    return assertSeriesExists(seriesName).getDoubles();
  }

  /**
   * Returns the series referenced by {@code seriesName}. If the series' native type is not
   * {@code LongSeries} it is converted transparently.
   *
   * @param seriesName series name
   * @throws IllegalArgumentException if the series does not exist
   * @return LongSeries
   */
  public LongSeries getLongs(String seriesName) {
    return assertSeriesExists(seriesName).getLongs();
  }

  /**
   * Returns the series referenced by {@code seriesName}. If the series' native type is not
   * {@code StringSeries} it is converted transparently.
   *
   * @param seriesName series name
   * @throws IllegalArgumentException if the series does not exist
   * @return StringSeries
   */
  public StringSeries getStrings(String seriesName) {
    return assertSeriesExists(seriesName).getStrings();
  }

  /**
   * Returns the series referenced by {@code seriesName}. If the series' native type is not
   * {@code BooleanSeries} it is converted transparently.
   *
   * @param seriesName series name
   * @throws IllegalArgumentException if the series does not exist
   * @return BooleanSeries
   */
  public BooleanSeries getBooleans(String seriesName) {
   return assertSeriesExists(seriesName).getBooleans();
  }

  public double getDouble(String seriesName, int index) {
    return assertSeriesExists(seriesName).getDouble(index);
  }

  public long getLong(String seriesName, int index) {
    return assertSeriesExists(seriesName).getLong(index);
  }

  public String getString(String seriesName, int index) {
    return assertSeriesExists(seriesName).getString(index);
  }

  public byte getBoolean(String seriesName, int index) {
    return assertSeriesExists(seriesName).getBoolean(index);
  }

  /**
   * Applies {@code function} to the series referenced by {@code seriesNames} row by row
   * and returns the results as a new series. The series' values are mapped to arguments
   * of {@code function} in the same order as they appear in {@code seriesNames}.
   * If the series' native types do not match the required input types of {@code function},
   * the series are converted transparently. The native type of the returned series is
   * determined by {@code function}'s output type.
   *
   * @param function function to apply to each row
   * @param seriesNames names of input series
   * @throws IllegalArgumentException if the series does not exist
   * @return series with evaluation results
   */
  public Series map(Series.Function function, String... seriesNames) {
    return map(function, names2series(seriesNames));
  }

  /**
   * @see DataFrame#map(Series.Function, Series...)
   */
  public DoubleSeries map(Series.DoubleFunction function, String... seriesNames) {
    return map(function, names2series(seriesNames));
  }

  /**
   * @see DataFrame#map(Series.Function, Series...)
   */
  public LongSeries map(Series.LongFunction function, String... seriesNames) {
    return map(function, names2series(seriesNames));
  }

  /**
   * @see DataFrame#map(Series.Function, Series...)
   */
  public StringSeries map(Series.StringFunction function, String... seriesNames) {
    return map(function, names2series(seriesNames));
  }

  /**
   * @see DataFrame#map(Series.Function, Series...)
   */
  public BooleanSeries map(Series.BooleanFunction function, String... seriesNames) {
    return map(function, names2series(seriesNames));
  }

  /**
   * @see DataFrame#map(Series.Function, Series...)
   */
  public BooleanSeries map(Series.BooleanFunctionEx function, String... seriesNames) {
    return map(function, names2series(seriesNames));
  }

  /**
   * @see DataFrame#map(Series.Function, Series...)
   */
  public BooleanSeries map(Series.DoubleConditional function, String... seriesNames) {
    return map(function, names2series(seriesNames));
  }

  /**
   * @see DataFrame#map(Series.Function, Series...)
   */
  public BooleanSeries map(Series.LongConditional function, String... seriesNames) {
    return map(function, names2series(seriesNames));
  }

  /**
   * @see DataFrame#map(Series.Function, Series...)
   */
  public BooleanSeries map(Series.StringConditional function, String... seriesNames) {
    return map(function, names2series(seriesNames));
  }

  /**
   * @see DataFrame#map(Series.Function, Series...)
   */
  public BooleanSeries map(Series.BooleanConditional function, String... seriesNames) {
    return map(function, names2series(seriesNames));
  }

  /**
   * Applies {@code function} to the series referenced by {@code seriesNames} row by row
   * and adds the result to the DataFrame as a new series with name {@code outputName}.
   * The series' values are mapped to arguments of {@code function} in the same order
   * as they appear in {@code seriesNames}.
   * If the series' native types do not match the required input types of {@code function},
   * the series are converted transparently. The native type of the returned series is
   * determined by {@code function}'s output type.
   *
   * @param function function to apply to each row
   * @param outputName name of output series
   * @param inputNames names of input series, or none to use output series name as only input
   * @throws IllegalArgumentException if the series does not exist
   * @return series with evaluation results
   */
  public DataFrame mapInPlace(Series.Function function, String outputName, String... inputNames) {
    return this.addSeries(outputName, map(function, names2series(inputNames)));
  }

  /**
   * Applies {@code function} to the series referenced by {@code seriesName} row by row
   * and adds the result to the DataFrame as a new series with the same name.
   * If the series' native types do not match the required input types of {@code function},
   * the series are converted transparently. The native type of the returned series is
   * determined by {@code function}'s output type.
   *
   * @param function function to apply to each row
   * @param seriesName name of series
   * @throws IllegalArgumentException if the series does not exist
   * @return series with evaluation results
   */
  public DataFrame mapInPlace(Series.Function function, String seriesName) {
    return this.addSeries(seriesName, map(function, this.get(seriesName)));
  }

  /**
   * Applies {@code function} to {@code series} row by row
   * and returns the results as a new series. The series' values are mapped to arguments
   * of {@code function} in the same order as they appear in {@code series}.
   * If the series' native types do not match the required input types of {@code function},
   * the series are converted transparently. The native type of the returned series is
   * determined by {@code function}'s output type.
   *
   * @param function function to apply to each row
   * @param series input series for function
   * @throws IllegalArgumentException if the series does not exist
   * @return series with evaluation results
   */
  public static Series map(Series.Function function, Series... series) {
    return Series.map(function, series);
  }

  /**
   * @see DataFrame#map(Series.Function, Series...)
   */
  public static DoubleSeries map(Series.DoubleFunction function, Series... series) {
    return (DoubleSeries)map((Series.Function)function, series);
  }

  /**
   * @see DataFrame#map(Series.Function, Series...)
   */
  public static LongSeries map(Series.LongFunction function, Series... series) {
    return (LongSeries)map((Series.Function)function, series);
  }

  /**
   * @see DataFrame#map(Series.Function, Series...)
   */
  public static StringSeries map(Series.StringFunction function, Series... series) {
    return (StringSeries)map((Series.Function)function, series);
  }

  /**
   * @see DataFrame#map(Series.Function, Series...)
   */
  public static BooleanSeries map(Series.BooleanFunction function, Series... series) {
    return (BooleanSeries)map((Series.Function)function, series);
  }

  /**
   * @see DataFrame#map(Series.Function, Series...)
   */
  public static BooleanSeries map(Series.BooleanFunctionEx function, Series... series) {
    return (BooleanSeries)map((Series.Function)function, series);
  }

  /**
   * @see DataFrame#map(Series.Function, Series...)
   */
  public static BooleanSeries map(Series.DoubleConditional function, Series... series) {
    return (BooleanSeries)map((Series.Function)function, series);
  }

  /**
   * @see DataFrame#map(Series.Function, Series...)
   */
  public static BooleanSeries map(Series.LongConditional function, Series... series) {
    return (BooleanSeries)map((Series.Function)function, series);
  }

  /**
   * @see DataFrame#map(Series.Function, Series...)
   */
  public static BooleanSeries map(Series.StringConditional function, Series... series) {
    return (BooleanSeries)map((Series.Function)function, series);
  }

  /**
   * @see DataFrame#map(Series.Function, Series...)
   */
  public static BooleanSeries map(Series.BooleanConditional function, Series... series) {
    return (BooleanSeries)map((Series.Function)function, series);
  }

  /**
   * Applies {@code doubleExpression} compiled to an expression to the series referenced by
   * {@code seriesNames} row by row and returns the results as a new series. The series' values
   * are mapped to variables in {@code doubleExpression} by series names. Only series referenced
   * by {@code seriesNames} can be referenced by the expression.
   * The series are converted to {@code DoubleSeries} transparently and the results
   * are returned as DoubleSeries as well.
   *
   * <br/><b>NOTE:</b> doubleExpression is compiled to an {@code EvalEx} expression.
   *
   * @param doubleExpression expression to be compiled and applied using EvalEx
   * @throws IllegalArgumentException if the series does not exist
   * @return series with evaluation results
   */
  public DoubleSeries map(String doubleExpression, final String... seriesNames) {
    final Expression e = new Expression(doubleExpression);

    return (DoubleSeries)this.map(new Series.DoubleFunction() {
      @Override
      public double apply(double[] values) {
        for(int i=0; i<values.length; i++) {
          e.with(seriesNames[i], new BigDecimal(values[i]));
        }
        return e.eval().doubleValue();
      }
    }, seriesNames);
  }

  /**
   * Applies {@code doubleExpression} compiled to an expression to the series referenced by
   * {@code seriesNames} row by row and returns the results as a new series. The series' values
   * are mapped to variables in {@code doubleExpression} by series names. All series contained
   * in the DataFrame can be referenced by the expression.
   * The series are converted to {@code DoubleSeries} transparently and the results
   * are returned as DoubleSeries as well.
   *
   * <br/><b>NOTE:</b> doubleExpression is compiled to an {@code EvalEx} expression.
   *
   * @param doubleExpression expression to be compiled and applied using EvalEx
   * @throws IllegalArgumentException if the series does not exist
   * @return series with evaluation results
   */
  public DoubleSeries map(String doubleExpression) {
    Set<String> variables = extractSeriesNames(doubleExpression);
    return this.map(doubleExpression, variables.toArray(new String[variables.size()]));
  }

  /**
   * Returns a projection of the DataFrame.
   *
   * <br/><b>NOTE:</b> fromIndex <= -1 is filled with {@code null}.
   * <br/><b>NOTE:</b> array with length 0 produces empty series.
   *
   * @param fromIndex array with indices to project from (must be <= series size)
   * @return DataFrame projection
   */
  public DataFrame project(int[] fromIndex) {
    DataFrame newDataFrame = new DataFrame(this);
    newDataFrame.series.clear();
    for(Map.Entry<String, Series> e : this.series.entrySet()) {
      newDataFrame.addSeries(e.getKey(), e.getValue().project(fromIndex));
    }
    return newDataFrame;
  }

  /**
   * Returns a copy of the DataFrame sorted by series values referenced by {@code seriesNames}.
   * The resulting sorted order is the equivalent of applying a stable sort to the nth series
   * first, and then sorting iteratively by series until the 1st series.
   *
   * @param seriesNames 1st series, 2nd series, ..., nth series
   * @throws IllegalArgumentException if the series does not exist
   * @return sorted DataFrame copy
   */
  public DataFrame sortedBy(String... seriesNames) {
    DataFrame df = this;
    for(int i=seriesNames.length-1; i>=0; i--) {
      df = df.project(assertSeriesExists(seriesNames[i]).sortedIndex());
    }
    return df;
  }

  /**
   * Returns a copy of the DataFrame with the order of values in the series reversed.
   *
   * @return reversed DataFrame copy
   */
  public DataFrame reverse() {
    DataFrame newDataFrame = new DataFrame(this);
    for(Map.Entry<String, Series> e : this.series.entrySet()) {
      newDataFrame.addSeries(e.getKey(), e.getValue().reverse());
    }
    return newDataFrame;
  }

  /**
   * Returns a copy of the DataFrame with values resampled by {@code interval} using {@code strategy}
   * on the series referenced by {@code seriesName}. The method first applies an interval-based
   * grouping to the series and then aggregates the DataFrame using the specified strategy. If
   * the series referenced by {@code seriesName} is not of native type {@code LongSeries} it is
   * converted transparently.
   *
   * @param seriesName target series for resampling
   * @param interval resampling interval
   * @param strategy resampling strategy
   * @throws IllegalArgumentException if the series does not exist
   * @return resampled DataFrame copy
   */
  public DataFrame resampledBy(String seriesName, long interval, ResamplingStrategy strategy) {
    DataFrame baseDataFrame = this.sortedBy(seriesName);

    Series.SeriesGrouping grouping = baseDataFrame.getLongs(seriesName).groupByInterval(interval);

    // resample series
    DataFrame newDataFrame = new DataFrame(this);
    newDataFrame.series.clear();

    for(Map.Entry<String, Series> e : baseDataFrame.getSeries().entrySet()) {
      if(e.getKey().equals(seriesName))
        continue;
      newDataFrame.addSeries(e.getKey(), strategy.apply(grouping, e.getValue()).get(Series.GROUP_VALUE));
    }

    // new series
    newDataFrame.addSeries(seriesName, grouping.keys());
    return newDataFrame;
  }

  /**
   * Returns a copy of the DataFrame with rows filtered by {@code series}. If the value of {@code series}
   * associated with a row is {@code true} the row is copied, otherwise it is set to {@code null}.
   *
   * @param series filter series
   * @return filtered DataFrame copy
   */
  public DataFrame filter(BooleanSeries series) {
    if(series.size() != this.size())
      throw new IllegalArgumentException("Series size must be equal to index size");

    int[] fromIndex = new int[series.size()];
    for(int i=0; i<series.size(); i++) {
      if(BooleanSeries.isTrue(series.values[i])) {
        fromIndex[i] = i;
      } else {
        fromIndex[i] = -1;
      }
    }

    return this.project(fromIndex);
  }

  public DataFrame filter(String seriesName) {
    return this.filter(this.getBooleans(seriesName));
  }

  public DataFrame filter(Series.Conditional conditional, String... seriesNames) {
    return filter(conditional, names2series(seriesNames));
  }

  public DataFrame filter(Series.Conditional conditional, Series... series) {
    return filter((BooleanSeries)Series.map(conditional, series));
  }

  public DataFrame filterEquals(String seriesName, final double value) {
    return this.filter(new Series.DoubleConditional() {
      @Override
      public boolean apply(double... v) {
        return value == v[0];
      }
    }, seriesName);
  }

  public DataFrame filterEquals(String seriesName, final long value) {
    return this.filter(new Series.LongConditional() {
      @Override
      public boolean apply(long... v) {
        return value == v[0];
      }
    }, seriesName);
  }

  public DataFrame filterEquals(String seriesName, final String value) {
    return this.filter(new Series.StringConditional() {
      @Override
      public boolean apply(String... v) {
        return value.equals(v[0]);
      }
    }, seriesName);
  }

  public DataFrame filterEquals(String seriesName, final boolean value) {
    return this.filter(new Series.BooleanConditional() {
      @Override
      public boolean apply(boolean... v) {
        return value == v[0];
      }
    }, seriesName);
  }

  /**
   * Returns a DataFrameGrouping based on the labels provided by {@code labels} row by row.
   * The size of {@code labels} must match the size of the DataFrame.
   *
   * @param labels grouping labels
   * @return DataFrameGrouping
   */
  public DataFrameGrouping groupBy(Series labels) {
    Series.SeriesGrouping grouping = labels.groupByValue();
    return new DataFrameGrouping(grouping.keys(), this, grouping.buckets);
  }

  /**
   * Returns a DataFrameGrouping based on the labels provided by the series referenced by
   * {@code seriesName} row by row.
   *
   * @param seriesName series containing grouping labels
   * @return DataFrameGrouping
   */
  public DataFrameGrouping groupBy(String seriesName) {
    Series.SeriesGrouping grouping = this.get(seriesName).groupByValue();
    return new DataFrameGrouping(seriesName, grouping.keys(), this, grouping.buckets);
  }

  /**
   * Returns a copy of the DataFrame omitting rows that contain a {@code null} value in any series.
   *
   * @return DataFrame copy without null rows
   */
  public DataFrame dropNull() {
    BooleanSeries isNull = BooleanSeries.fillValues(this.size(), false);
    for(Series s : this.series.values()) {
      isNull = isNull.or(s.isNull());
    }

    int[] fromIndex = new int[isNull.count(false)];
    int countNotNull = 0;
    for(int i=0; i<this.size(); i++) {
      if(BooleanSeries.isFalse(isNull.getBoolean(i))) {
        fromIndex[countNotNull++] = i;
      }
    }

    return this.project(Arrays.copyOf(fromIndex, countNotNull));
  }

  /**
   * Returns a copy of the DataFrame omitting series that contain a {@code null} value.
   *
   * @return DataFrame copy without null series
   */
  public DataFrame dropNullColumns() {
    DataFrame df = new DataFrame(this);
    df.series.clear();
    for(Map.Entry<String, Series> e : this.getSeries().entrySet()) {
      if(!e.getValue().hasNull())
        df.addSeries(e.getKey(), e.getValue());
    }
    return df;
  }

  /**
   * Returns a copy of the DataFrame with series {@code seriesName} replacing {@code null}
   * values with its native default value.
   *
   * @param seriesName
   * @return
   */
  public DataFrame fillNull(String seriesName) {
    DataFrame df = new DataFrame(this);
    return df.addSeries(seriesName, assertSeriesExists(seriesName).fillNull());
  }

  /* **************************************************************************
   * Joins across data frames
   ***************************************************************************/

  public DataFrame joinInner(DataFrame other) {
    assertIndex(this, other);
    return this.joinInner(other, this.getIndexName(), other.getIndexName());
  }

  public DataFrame joinInner(DataFrame other, String onSeries) {
    return this.joinInner(other, onSeries, onSeries);
  }

  public DataFrame joinInner(DataFrame other, String onSeriesLeft, String onSeriesRight) {
    List<Series.JoinPair> pairs = this.get(onSeriesLeft).join(other.get(onSeriesRight), Series.JoinType.INNER);
    return DataFrame.join(this, other, pairs, onSeriesLeft, onSeriesRight);
  }

  public DataFrame joinLeft(DataFrame other) {
    assertIndex(this, other);
    return this.joinLeft(other, this.getIndexName(), other.getIndexName());
  }

  public DataFrame joinLeft(DataFrame other, String onSeries) {
    return this.joinLeft(other, onSeries, onSeries);
  }

  public DataFrame joinLeft(DataFrame other, String onSeriesLeft, String onSeriesRight) {
    List<Series.JoinPair> pairs = this.get(onSeriesLeft).join(other.get(onSeriesRight), Series.JoinType.LEFT);
    return DataFrame.join(this, other, pairs, onSeriesLeft, onSeriesRight);
  }

  public DataFrame joinRight(DataFrame other) {
    assertIndex(this, other);
    return this.joinRight(other, this.getIndexName(), other.getIndexName());
  }

  public DataFrame joinRight(DataFrame other, String onSeries) {
    return this.joinRight(other, onSeries, onSeries);
  }

  public DataFrame joinRight(DataFrame other, String onSeriesLeft, String onSeriesRight) {
    List<Series.JoinPair> pairs = this.get(onSeriesLeft).join(other.get(onSeriesRight), Series.JoinType.RIGHT);
    return DataFrame.join(this, other, pairs, onSeriesLeft, onSeriesRight);
  }

  public DataFrame joinOuter(DataFrame other) {
    assertIndex(this, other);
    return this.joinOuter(other, this.getIndexName(), other.getIndexName());
  }

  public DataFrame joinOuter(DataFrame other, String onSeries) {
    return this.joinOuter(other, onSeries, onSeries);
  }

  public DataFrame joinOuter(DataFrame other, String onSeriesLeft, String onSeriesRight) {
    List<Series.JoinPair> pairs = this.get(onSeriesLeft).join(other.get(onSeriesRight), Series.JoinType.OUTER);
    return DataFrame.join(this, other, pairs, onSeriesLeft, onSeriesRight);
  }

  private static DataFrame join(DataFrame left, DataFrame right, List<Series.JoinPair> pairs, String onSeriesLeft, String onSeriesRight) {
    int[] fromIndexLeft = new int[pairs.size()];
    int i=0;
    for(Series.JoinPair p : pairs) {
      fromIndexLeft[i++] = p.left;
    }

    int[] fromIndexRight = new int[pairs.size()];
    int j=0;
    for(Series.JoinPair p : pairs) {
      fromIndexRight[j++] = p.right;
    }

    DataFrame leftData = left.project(fromIndexLeft);
    DataFrame rightData = right.project(fromIndexRight);

    Set<String> seriesLeft = left.getSeriesNames();
    Set<String> seriesRight = right.getSeriesNames();

    DataFrame joined = new DataFrame();

    for(String name : seriesRight) {
      Series s = rightData.get(name);
      if(!seriesLeft.contains(name) || name.equals(onSeriesRight)) {
        joined.addSeries(name, s);
      } else {
        joined.addSeries(name + COLUMN_JOIN_RIGHT, s);
      }
    }

    for(String name : seriesLeft) {
      Series s = leftData.get(name);
      if(!seriesRight.contains(name) || name.equals(onSeriesLeft)) {
        joined.addSeries(name, s);
      } else {
        joined.addSeries(name + COLUMN_JOIN_LEFT, s);
      }
    }

    joined.setIndex(onSeriesLeft);

    return joined;
  }

  /**
   * Returns a copy of the DataFrame with data from {@code others} appended at the end. Matches
   * series by names and uses the native type of the original (this) DataFrame. If {@code others}
   * do not contain series with matching names, a sequence of {@code nulls} is appended. Any series
   * in {@code other} that are not matched by name are discarded.
   *
   * @param others DataFrames to append in sequence
   * @return copy of the DataFrame with appended data
   */
  public DataFrame append(DataFrame... others) {
    DataFrame df = new DataFrame(this);
    df.series.clear();

    for(String name : this.getSeriesNames()) {
      Series.Builder builder = this.get(name).getBuilder();
      builder.addSeries(this.get(name));

      for(DataFrame other : others) {
        if (other.contains(name)) {
          builder.addSeries(other.get(name));
        } else {
          builder.addSeries(BooleanSeries.nulls(other.size()));
        }
      }

      df.addSeries(name, builder.build());
    }

    return df;
  }

  @Override
  public String toString() {
    StringBuilder builder = new StringBuilder();
    builder.append("DataFrame{\n");
    builder.append(this.toString(DEFAULT_MAX_COLUMN_WIDTH, this.getSeriesNames().toArray(new String[0])));
    builder.append("}");
    return builder.toString();
  }

  public String toString(int maxColumnWidth, String... seriesNames) {
    String[][] values = new String[this.size()][seriesNames.length];
    int[] width = new int[seriesNames.length];
    for(int i=0; i<seriesNames.length; i++) {
      Series s = assertSeriesExists(seriesNames[i]);

      width[i] = truncateToString(seriesNames[i], maxColumnWidth).length();
      for(int j=0; j<this.size(); j++) {
        String itemValue = truncateToString(s.toString(j), maxColumnWidth);
        values[j][i] = itemValue;
        width[i] = Math.max(itemValue.length(), width[i]);
      }
    }

    StringBuilder sb = new StringBuilder();
    // header
    for(int i=0; i<seriesNames.length; i++) {
      sb.append(String.format("%" + width[i] + "s", truncateToString(seriesNames[i], maxColumnWidth)));
      sb.append("  ");
    }
    sb.append("\n");

    // values
    for(int j=0; j<this.size(); j++) {
      for(int i=0; i<seriesNames.length; i++) {
        Series s = this.get(seriesNames[i]);
        String item;
        switch(s.type()) {
          case DOUBLE:
          case LONG:
          case BOOLEAN:
            item = String.format("%" + width[i] + "s", values[j][i]);
            break;
          case STRING:
            item = String.format("%-" + width[i] + "s", values[j][i]);
            break;
          default:
            throw new IllegalArgumentException(String.format("Unknown series type '%s'", s.type()));
        }
        sb.append(item);
        sb.append("  ");
      }
      sb.append("\n");
    }

    return sb.toString();
  }

  static String truncateToString(String value, int maxWidth) {
    if(value.length() > maxWidth)
      value = value.substring(0, maxWidth - 3) + "...";
    return value;
  }

  @Override
  public boolean equals(Object o) {
    if (this == o) {
      return true;
    }
    if (o == null || getClass() != o.getClass()) {
      return false;
    }

    DataFrame dataFrame = (DataFrame) o;

    return series != null ? series.equals(dataFrame.series) : dataFrame.series == null;
  }

  @Override
  public int hashCode() {
    return series != null ? series.hashCode() : 0;
  }

  Series[] names2series(String... names) {
    Series[] inputSeries = new Series[names.length];
    for(int i=0; i<names.length; i++) {
      inputSeries[i] = assertSeriesExists(names[i]);
    }
    return inputSeries;
  }

  Series assertSeriesExists(String name) {
    if(!series.containsKey(name))
      throw new IllegalArgumentException(String.format("Unknown series '%s'", name));
    return series.get(name);
  }

  void assertSameLength(Series s) {
    if(this.size() != s.size())
      throw new IllegalArgumentException("Series size must be equals to DataFrame size");
  }

  static void assertSameLength(Series... series) {
    for(int i=0; i<series.length-1; i++) {
      if (series[i].size() != series[i+1].size())
        throw new IllegalArgumentException("Series size must be equals to DataFrame size");
    }
  }

  static void assertIndex(DataFrame... dataframes) {
    for(DataFrame d : dataframes)
      if(!d.hasIndex())
        throw new IllegalArgumentException("DataFrames must have a valid index");
  }

  Set<String> extractSeriesNames(String doubleExpression) {
    Matcher m = SERIES_NAME_PATTERN.matcher(doubleExpression);

    Set<String> variables = new HashSet<>();
    while(m.find()) {
      if(this.series.keySet().contains(m.group()))
        variables.add(m.group());
    }

    return variables;
  }

  /* **************************************************************************
   * DataFrame parsers
   ***************************************************************************/

  /**
   * Reads in a CSV structured stream and returns it as a DataFrame. The native series type is
   * chosen to be as specific as possible based on the data ingested.
   * <br/><b>NOTE:</b> Expects the first line to contain
   * column headers. The column headers are transformed into series names by replacing non-word
   * character sequences with underscores ({@code "_"}). Leading digits in series names are also
   * escaped with a leading underscore.
   *
   * @param in input reader
   * @return CSV as DataFrame
   * @throws IOException if a read error is encountered
   * @throws IllegalArgumentException if the column headers cannot be transformed into valid series names
   */
  public static DataFrame fromCsv(Reader in) throws IOException {
    Iterator<CSVRecord> it = CSVFormat.RFC4180.withFirstRecordAsHeader().parse(in).iterator();
    if(!it.hasNext())
      return new DataFrame();

    CSVRecord first = it.next();
    Set<String> headers = first.toMap().keySet();

    // transform column headers into series names
    Map<String, String> header2name = new HashMap<>();
    for(String h : headers) {
      // remove spaces
      String name = Pattern.compile("\\W+").matcher(h).replaceAll("_");

      // underscore escape leading number
      if(Pattern.compile("\\A[0-9]").matcher(name).find())
        name = "_" + name;

      if(!SERIES_NAME_PATTERN.matcher(name).matches()) {
        throw new IllegalArgumentException(String.format("Series name must match pattern '%s'", SERIES_NAME_PATTERN));
      }
      header2name.put(h, name);
    }

    // read first line and initialize builders
    Map<String, StringSeries.Builder> builders = new HashMap<>();
    for(String h : headers) {
      StringSeries.Builder builder = StringSeries.builder();
      builder.addValues(first.get(h));
      builders.put(h, builder);
    }

    while(it.hasNext()) {
      CSVRecord record = it.next();
      for(String h : headers) {
        String value = record.get(h);
        builders.get(h).addValues(value);
      }
    }

    // construct dataframe and detect native data types
    DataFrame df = new DataFrame();
    for(Map.Entry<String, StringSeries.Builder> e : builders.entrySet()) {
      StringSeries s = e.getValue().build();
      Series conv = s.get(s.inferType());
      String name = header2name.get(e.getKey());
      df.addSeries(name, conv);
    }

    return df;
  }

  /**
   * Reads in a Pinot ResultSetGroup and returns it as a DataFrame.
   *
   * <br/><b>NOTE:</b> cannot parse a query result with multiple group aggregations
   *
   * @param resultSetGroup pinot query result
   * @return Pinot query result as DataFrame
   * @throws IllegalArgumentException if the result cannot be parsed
   */
  public static DataFrame fromPinotResult(ResultSetGroup resultSetGroup) {
    if (resultSetGroup.getResultSetCount() <= 0)
      throw new IllegalArgumentException("Query did not return any results");

    if (resultSetGroup.getResultSetCount() > 1)
      throw new IllegalArgumentException("Query returned multiple results");

    ResultSet resultSet = resultSetGroup.getResultSet(0);

    DataFrame df = new DataFrame();

    // TODO conditions not necessarily safe
    if(resultSet.getColumnCount() == 1 && resultSet.getRowCount() == 0) {
      // empty result

    } else if(resultSet.getColumnCount() == 1 && resultSet.getRowCount() == 1 && resultSet.getGroupKeyLength() == 0) {
      // aggregation result

      String function = resultSet.getColumnName(0);
      String value = resultSet.getString(0, 0);
      df.addSeries(function, DataFrame.toSeries(new String[] { value }));

    } else if(resultSet.getColumnCount() == 1 && resultSet.getGroupKeyLength() > 0) {
      // groupby result

      String function = resultSet.getColumnName(0);
      df.addSeries(function, makeGroupByValueSeries(resultSet));
      for(int i=0; i<resultSet.getGroupKeyLength(); i++) {
        String groupKey = resultSet.getGroupKeyColumnName(i);
        df.addSeries(groupKey, makeGroupByGroupSeries(resultSet, i));
      }

    } else if(resultSet.getColumnCount() >= 1 && resultSet.getGroupKeyLength() == 0) {
      // selection result

      for (int i = 0; i < resultSet.getColumnCount(); i++) {
        df.addSeries(resultSet.getColumnName(i), makeSelectionSeries(resultSet, i));
      }

    } else {
      // defensive
      throw new IllegalStateException("Could not determine DataFrame shape from output");
    }

    return df;
  }

  private static Series makeSelectionSeries(ResultSet resultSet, int colIndex) {
    int rowCount = resultSet.getRowCount();
    if(rowCount <= 0)
      return StringSeries.empty();

    //DataFrame.SeriesType type = inferType(resultSet.getString(0, colIndex));

    String[] values = new String[rowCount];
    for(int i=0; i<rowCount; i++) {
      values[i] = resultSet.getString(i, colIndex);
    }

    return DataFrame.toSeries(values);
  }

  private static Series makeGroupByValueSeries(ResultSet resultSet) {
    int rowCount = resultSet.getRowCount();
    if(rowCount <= 0)
      return StringSeries.empty();

    String[] values = new String[rowCount];
    for(int i=0; i<rowCount; i++) {
      values[i] = resultSet.getString(i, 0);
    }

    return DataFrame.toSeries(values);
  }

  private static Series makeGroupByGroupSeries(ResultSet resultSet, int keyIndex) {
    int rowCount = resultSet.getRowCount();
    if(rowCount <= 0)
      return StringSeries.empty();

    String[] values = new String[rowCount];
    for(int i=0; i<rowCount; i++) {
      values[i] = resultSet.getGroupKeyString(i, keyIndex);
    }

    return DataFrame.toSeries(values);
  }
}