/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.flink.api.avro.testjar; // ================================================================================================ // This file defines the classes for the AvroExternalJarProgramITCase. // The program is exported into src/test/resources/AvroTestProgram.jar. // // THIS FILE MUST STAY FULLY COMMENTED SUCH THAT THE HERE DEFINED CLASSES ARE NOT COMPILED // AND ADDED TO THE test-classes DIRECTORY. OTHERWISE, THE EXTERNAL CLASS LOADING WILL // NOT BE COVERED BY THIS TEST. // ================================================================================================ import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Random; import org.apache.avro.file.DataFileWriter; import org.apache.avro.io.DatumWriter; import org.apache.avro.reflect.ReflectData; import org.apache.avro.reflect.ReflectDatumWriter; import org.apache.flink.api.common.functions.RichMapFunction; import org.apache.flink.api.common.functions.RichReduceFunction; import org.apache.flink.api.java.io.DiscardingOutputFormat; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.api.java.DataSet; import org.apache.flink.api.java.ExecutionEnvironment; import org.apache.flink.api.java.io.AvroInputFormat; import org.apache.flink.core.fs.Path; public class AvroExternalJarProgram { public static final class Color { private String name; private double saturation; public Color() { name = ""; saturation = 1.0; } public Color(String name, double saturation) { this.name = name; this.saturation = saturation; } public String getName() { return name; } public void setName(String name) { this.name = name; } public double getSaturation() { return saturation; } public void setSaturation(double saturation) { this.saturation = saturation; } @Override public String toString() { return name + '(' + saturation + ')'; } } public static final class MyUser { private String name; private List<Color> colors; public MyUser() { name = "unknown"; colors = new ArrayList<Color>(); } public MyUser(String name, List<Color> colors) { this.name = name; this.colors = colors; } public String getName() { return name; } public List<Color> getColors() { return colors; } public void setName(String name) { this.name = name; } public void setColors(List<Color> colors) { this.colors = colors; } @Override public String toString() { return name + " : " + colors; } } // -------------------------------------------------------------------------------------------- // -------------------------------------------------------------------------------------------- public static final class NameExtractor extends RichMapFunction<MyUser, Tuple2<String, MyUser>> { private static final long serialVersionUID = 1L; @Override public Tuple2<String, MyUser> map(MyUser u) { String namePrefix = u.getName().substring(0, 1); return new Tuple2<String, MyUser>(namePrefix, u); } } public static final class NameGrouper extends RichReduceFunction<Tuple2<String, MyUser>> { private static final long serialVersionUID = 1L; @Override public Tuple2<String, MyUser> reduce(Tuple2<String, MyUser> val1, Tuple2<String, MyUser> val2) { return val1; } } // -------------------------------------------------------------------------------------------- // Test Data // -------------------------------------------------------------------------------------------- public static final class Generator { private final Random rnd = new Random(2389756789345689276L); public MyUser nextUser() { return randomUser(); } private MyUser randomUser() { int numColors = rnd.nextInt(5); ArrayList<Color> colors = new ArrayList<Color>(numColors); for (int i = 0; i < numColors; i++) { colors.add(new Color(randomString(), rnd.nextDouble())); } return new MyUser(randomString(), colors); } private String randomString() { char[] c = new char[this.rnd.nextInt(20) + 5]; for (int i = 0; i < c.length; i++) { c[i] = (char) (this.rnd.nextInt(150) + 40); } return new String(c); } } public static void writeTestData(File testFile, int numRecords) throws IOException { DatumWriter<MyUser> userDatumWriter = new ReflectDatumWriter<MyUser>(MyUser.class); DataFileWriter<MyUser> dataFileWriter = new DataFileWriter<MyUser>(userDatumWriter); dataFileWriter.create(ReflectData.get().getSchema(MyUser.class), testFile); Generator generator = new Generator(); for (int i = 0; i < numRecords; i++) { MyUser user = generator.nextUser(); dataFileWriter.append(user); } dataFileWriter.close(); } // public static void main(String[] args) throws Exception { // String testDataFile = new File("src/test/resources/testdata.avro").getAbsolutePath(); // writeTestData(new File(testDataFile), 50); // } public static void main(String[] args) throws Exception { String inputPath = args[0]; ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<MyUser> input = env.createInput(new AvroInputFormat<MyUser>(new Path(inputPath), MyUser.class)); DataSet<Tuple2<String, MyUser>> result = input.map(new NameExtractor()).groupBy(0).reduce(new NameGrouper()); result.output(new DiscardingOutputFormat<Tuple2<String,MyUser>>()); env.execute(); } }