/*************************************************************************** * Copyright (C) 2011 by H-Store Project * * Brown University * * Massachusetts Institute of Technology * * Yale University * * * * http://hstore.cs.brown.edu/ * * * * Permission is hereby granted, free of charge, to any person obtaining * * a copy of this software and associated documentation files (the * * "Software"), to deal in the Software without restriction, including * * without limitation the rights to use, copy, modify, merge, publish, * * distribute, sublicense, and/or sell copies of the Software, and to * * permit persons to whom the Software is furnished to do so, subject to * * the following conditions: * * * * The above copyright notice and this permission notice shall be * * included in all copies or substantial portions of the Software. * * * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR * * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * * OTHER DEALINGS IN THE SOFTWARE. * ***************************************************************************/ package edu.brown.benchmark.seats.util; import java.io.File; import java.util.HashMap; import java.util.Map; import java.util.TreeMap; import java.util.Map.Entry; import org.apache.commons.collections15.map.ListOrderedMap; import org.apache.log4j.Logger; import au.com.bytecode.opencsv.CSVReader; import edu.brown.benchmark.seats.SEATSConstants; import edu.brown.statistics.Histogram; import edu.brown.statistics.ObjectHistogram; import edu.brown.utils.ArgumentsParser; import edu.brown.utils.FileUtil; import edu.brown.utils.StringUtil; public class GenerateHistograms { private static final Logger LOG = Logger.getLogger(GenerateHistograms.class); final ObjectHistogram<String> flights_per_airline = new ObjectHistogram<String>(); final ObjectHistogram<String> flights_per_time = new ObjectHistogram<String>(); final Map<String, ObjectHistogram<String>> flights_per_airport = new TreeMap<String, ObjectHistogram<String>>(); public GenerateHistograms() { // Nothing... } public static GenerateHistograms generate(File input) throws Exception { GenerateHistograms gh = new GenerateHistograms(); final ListOrderedMap<String, Integer> columns_xref = new ListOrderedMap<String, Integer>(); CSVReader reader = new CSVReader(FileUtil.getReader(input)); String row[] = null; boolean first = true; while ((row = reader.readNext()) != null) { if (first) { for (int i = 0; i < row.length; i++) { columns_xref.put(row[i].toUpperCase(), i); } // FOR first = false; continue; } if (row[0].equalsIgnoreCase("Year")) continue; String airline_code = row[columns_xref.get("UNIQUECARRIER")]; String depart_airport_code = row[columns_xref.get("ORIGIN")]; String arrival_airport_code = row[columns_xref.get("DEST")]; String depart_time = row[columns_xref.get("CRSDEPTIME")]; // Flights Per Airline gh.flights_per_airline.put(airline_code); // Flights Per Time // Convert the time into "HH:MM" and round to the nearest 15 minutes int hour = Integer.parseInt(depart_time.substring(0, 2)); int minute = Integer.parseInt(depart_time.substring(2, 4)); minute = (minute / 15) * 15; gh.flights_per_time.put(String.format("%02d:%02d", hour, minute)); // Flights Per Airport // DepartAirport -> Histogram<ArrivalAirport> ObjectHistogram<String> h = gh.flights_per_airport.get(depart_airport_code); if (h == null) { h = new ObjectHistogram<String>(); gh.flights_per_airport.put(depart_airport_code, h); } h.put(arrival_airport_code); } // WHILE reader.close(); return (gh); } public static void main(String[] vargs) throws Exception { ArgumentsParser args = ArgumentsParser.load(vargs); File csv_path = new File(args.getOptParam(0)); File output_path = new File(args.getOptParam(1)); GenerateHistograms gh = GenerateHistograms.generate(csv_path); Map<String, Object> m = new ListOrderedMap<String, Object>(); m.put("Airport Codes", gh.flights_per_airport.size()); m.put("Airlines", gh.flights_per_airline.getValueCount()); m.put("Departure Times", gh.flights_per_time.getValueCount()); LOG.info(StringUtil.formatMaps(m)); System.err.println(StringUtil.join("\n", gh.flights_per_airport.keySet())); Map<String, Histogram<?>> histograms = new HashMap<String, Histogram<?>>(); histograms.put(SEATSConstants.HISTOGRAM_FLIGHTS_PER_DEPART_TIMES, gh.flights_per_time); // histograms.put(SEATSConstants.HISTOGRAM_FLIGHTS_PER_AIRLINE, gh.flights_per_airline); histograms.put(SEATSConstants.HISTOGRAM_FLIGHTS_PER_AIRPORT, SEATSHistogramUtil.collapseAirportFlights(gh.flights_per_airport)); for (Entry<String, Histogram<?>> e : histograms.entrySet()) { File output_file = new File(output_path.getAbsolutePath() + "/" + e.getKey() + ".histogram"); LOG.info(String.format("Writing out %s data to '%s' [samples=%d, values=%d]", e.getKey(), output_file, e.getValue().getSampleCount(), e.getValue().getValueCount())); e.getValue().save(output_file); } // FOR } }