/**
* Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.linkedin.pinotdruidbenchmark;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.util.HashMap;
import java.util.Map;
/**
* Separate data set into multiple chunks according to <code>l_shipdate</code>.
*/
public final class DataSeparator {
private DataSeparator() {
}
public static void main(String[] args)
throws Exception {
if (args.length != 2) {
System.err.println("2 arguments required: INPUT_FILE_PATH, OUTPUT_DIR.");
return;
}
File inputFile = new File(args[0]);
File outputDir = new File(args[1]);
if (!outputDir.exists()) {
if (!outputDir.mkdirs()) {
throw new RuntimeException("Failed to create output directory: " + outputDir);
}
}
BufferedReader reader = new BufferedReader(new FileReader(inputFile));
Map<String, BufferedWriter> writerMap = new HashMap<>();
String line;
while ((line = reader.readLine()) != null) {
String shipDate = line.split("\\|")[10];
BufferedWriter writer = writerMap.get(shipDate);
if (writer == null) {
writer = new BufferedWriter(new FileWriter(new File(outputDir, shipDate + ".csv")));
writerMap.put(shipDate, writer);
}
writer.write(line);
writer.newLine();
}
for (BufferedWriter writer : writerMap.values()) {
writer.close();
}
}
}