package com.github.lwhite1.tablesaw.integration;
import com.github.lwhite1.tablesaw.api.Table;
import com.github.lwhite1.tablesaw.api.ColumnType;
import com.github.lwhite1.tablesaw.io.csv.CsvReader;
import com.github.lwhite1.tablesaw.store.StorageManager;
import com.google.common.base.Stopwatch;
import java.util.concurrent.TimeUnit;
/**
*
*/
public class AirlineDelays {
private static Table flights2008;
public static void main(String[] args) throws Exception {
new AirlineDelays();
Stopwatch stopwatch = Stopwatch.createStarted();
flights2008.sortAscendingOn("Origin", "UniqueCarrier");
System.out.println("Sorting " + stopwatch.elapsed(TimeUnit.SECONDS));
}
private AirlineDelays() throws Exception {
Stopwatch stopwatch = Stopwatch.createStarted();
System.out.println("loading");
flights2008 = CsvReader.read(reduced_set, "bigdata/2015.csv");
System.out.println(String.format("loaded %d records in %d seconds",
flights2008.rowCount(),
stopwatch.elapsed(TimeUnit.SECONDS)));
out(flights2008.shape());
out(flights2008.columnNames().toString());
flights2008.first(10).print();
StorageManager.saveTable("bigdata", flights2008);
stopwatch.reset().start();
}
private static void out(Object obj) {
System.out.println(String.valueOf(obj));
}
// The full set of all available columns in tbe dataset
static ColumnType[] heading = {
ColumnType.INTEGER, // year
ColumnType.INTEGER, // month
ColumnType.INTEGER, // day
ColumnType.CATEGORY, // dow
ColumnType.LOCAL_TIME, // DepTime
ColumnType.LOCAL_TIME, // CRSDepTime
ColumnType.LOCAL_TIME, // ArrTime
ColumnType.LOCAL_TIME, // CRSArrTime
ColumnType.CATEGORY, // Carrier
ColumnType.CATEGORY, // FlightNum
ColumnType.CATEGORY, // TailNum
ColumnType.INTEGER, // ActualElapsedTime
ColumnType.INTEGER, // CRSElapsedTime
ColumnType.INTEGER, // AirTime
ColumnType.INTEGER, // ArrDelay
ColumnType.INTEGER, // DepDelay
ColumnType.CATEGORY, // Origin
ColumnType.CATEGORY, // Dest
ColumnType.INTEGER, // Distance
ColumnType.INTEGER, // TaxiIn
ColumnType.INTEGER, // TaxiOut
ColumnType.BOOLEAN, // Cancelled
ColumnType.CATEGORY, // CancellationCode
ColumnType.BOOLEAN, // Diverted
ColumnType.FLOAT, // CarrierDelay
ColumnType.FLOAT, // WeatherDelay
ColumnType.FLOAT, // NASDelay
ColumnType.FLOAT, // SecurityDelay
ColumnType.FLOAT // LateAircraftDelay
};
// A filtered set of columns
private static ColumnType[] reduced_set = {
ColumnType.SKIP, // year
ColumnType.INTEGER, // month
ColumnType.INTEGER, // day
ColumnType.CATEGORY, // dow
ColumnType.SKIP, // DepTime
ColumnType.LOCAL_TIME, // CRSDepTime
ColumnType.SKIP, // ArrTime
ColumnType.SKIP, // CRSArrTime
ColumnType.CATEGORY, // Carrier
ColumnType.SKIP, // FlightNum
ColumnType.SKIP, // TailNum
ColumnType.SKIP, // ActualElapsedTime
ColumnType.SKIP, // CRSElapsedTime
ColumnType.SKIP, // AirTime
ColumnType.SKIP, // ArrDelay
ColumnType.INTEGER, // DepDelay
ColumnType.CATEGORY, // Origin
ColumnType.CATEGORY, // Dest
ColumnType.INTEGER, // Distance
ColumnType.SKIP, // TaxiIn
ColumnType.SKIP, // TaxiOut
ColumnType.BOOLEAN, // Cancelled
ColumnType.SKIP, // CancellationCode
ColumnType.SKIP, // Diverted
ColumnType.SKIP, // CarrierDelay
ColumnType.SKIP, // WeatherDelay
ColumnType.SKIP, // NASDelay
ColumnType.SKIP, // SecurityDelay
ColumnType.SKIP // LateAircraftDelay
};
}