/* * Licensed to the Apache Software Foundation (ASF) under one or more contributor license * agreements. See the NOTICE file distributed with this work for additional information regarding * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance with the License. You may obtain a * copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing permissions and limitations under * the License. */ package org.apache.beam.sdk.io.hadoop.inputformat; import com.google.common.base.Function; import com.google.common.collect.Lists; import java.util.ArrayList; import java.util.List; import org.apache.beam.sdk.values.KV; import org.apache.hadoop.io.Text; /** * Test Utils used in {@link EmployeeInputFormat} and {@link ReuseObjectsEmployeeInputFormat} for * computing splits. */ public class TestEmployeeDataSet { public static final long NUMBER_OF_RECORDS_IN_EACH_SPLIT = 5L; public static final long NUMBER_OF_SPLITS = 3L; private static final List<KV<String, String>> data = new ArrayList<KV<String, String>>(); /** * Returns List of employee details. Employee details are available in the form of {@link KV} in * which, key indicates employee id and value indicates employee details such as name and address * separated by '_'. This is data input to {@link EmployeeInputFormat} and * {@link ReuseObjectsEmployeeInputFormat}. */ public static List<KV<String, String>> populateEmployeeData() { if (!data.isEmpty()) { return data; } data.add(KV.of("0", "Alex_US")); data.add(KV.of("1", "John_UK")); data.add(KV.of("2", "Tom_UK")); data.add(KV.of("3", "Nick_UAE")); data.add(KV.of("4", "Smith_IND")); data.add(KV.of("5", "Taylor_US")); data.add(KV.of("6", "Gray_UK")); data.add(KV.of("7", "James_UAE")); data.add(KV.of("8", "Jordan_IND")); data.add(KV.of("9", "Leena_UK")); data.add(KV.of("10", "Zara_UAE")); data.add(KV.of("11", "Talia_IND")); data.add(KV.of("12", "Rose_UK")); data.add(KV.of("13", "Kelvin_UAE")); data.add(KV.of("14", "Goerge_IND")); return data; } /** * This is a helper function used in unit tests for validating data against data read using * {@link EmployeeInputFormat} and {@link ReuseObjectsEmployeeInputFormat}. */ public static List<KV<Text, Employee>> getEmployeeData() { return Lists.transform((data.isEmpty() ? populateEmployeeData() : data), new Function<KV<String, String>, KV<Text, Employee>>() { @Override public KV<Text, Employee> apply(KV<String, String> input) { String[] empData = input.getValue().split("_"); return KV.of(new Text(input.getKey()), new Employee(empData[0], empData[1])); } }); } }