package com.github.projectflink.avro;
import com.github.projectflink.avro.generated.AvroLineitem;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.io.AvroOutputFormat;
import org.apache.flink.configuration.Configuration;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
/**
* Prepare Avro test.
*
* Load TPCH data and transform it to Avro
*/
public class Prepare {
public static void main(final String[] args) throws Exception {
// set up the execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<String> text = env.readTextFile(args[0]);
DataSet<AvroLineitem> avro = text.map(new AvroLineItemMapper());
avro.write(new AvroOutputFormat<AvroLineitem>(AvroLineitem.class), args[1]);
env.execute("Lineitem Text 2 Avro converter");
}
public static class AvroLineItemMapper extends RichMapFunction<String, AvroLineitem> {
DateFormat fs = new SimpleDateFormat("yyyy-MM-dd");
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
}
@Override
public AvroLineitem map(String s) throws Exception {
getRuntimeContext().getLongCounter("elements").add(1L);
String[] parts = s.split("\\|");
return new AvroLineitem(Long.parseLong(parts[0]), // order key
Long.parseLong(parts[1]), // part key
Long.parseLong(parts[2]),// supplierKey
Long.parseLong(parts[3]), // lineNumber
Long.parseLong(parts[4]), //qty
Double.parseDouble(parts[5]), //ext
Double.parseDouble(parts[6]), // dis
Double.parseDouble(parts[7]), // tax
parts[8], // ret flag
parts[9], // status
fs.parse(parts[10]).getDate(), // ship date
fs.parse(parts[11]).getDate(), // commit date
fs.parse(parts[12]).getDate(), // receiptdate
parts[13], // ship instr
parts[14], // ship mode
parts[15] // commit
);
}
}
}