/* * Copyright © 2016 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.internal.app.runtime.batch.dataset.output; import co.cask.cdap.api.dataset.lib.FileSet; import co.cask.cdap.common.io.Locations; import co.cask.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms; import co.cask.cdap.internal.app.runtime.BasicArguments; import co.cask.cdap.internal.app.runtime.batch.MapReduceRunnerTestBase; import co.cask.cdap.internal.app.runtime.batch.dataset.input.AppWithMapReduceUsingMultipleInputs; import com.google.common.base.Charsets; import com.google.common.collect.ImmutableList; import com.google.common.io.CharStreams; import org.apache.twill.filesystem.Location; import org.junit.Assert; import org.junit.Test; import java.io.IOException; import java.io.PrintWriter; import java.util.List; /** * Test case that tests ability to write to multiple outputs of a MapReduce job. */ public class MapReduceWithMultipleOutputsTest extends MapReduceRunnerTestBase { @Test public void testMultipleOutputs() throws Exception { ApplicationWithPrograms app = deployApp(AppWithMapReduceUsingMultipleOutputs.class); final FileSet fileSet = datasetCache.getDataset(AppWithMapReduceUsingMultipleOutputs.PURCHASES); Location inputFile = fileSet.getBaseLocation().append("inputFile"); inputFile.createNew(); PrintWriter writer = new PrintWriter(inputFile.getOutputStream()); // the PURCHASES dataset consists of purchase records in the format: <customerId> <spend> writer.println("1 20"); writer.println("1 65"); writer.println("1 30"); writer.println("2 5"); writer.println("2 53"); writer.println("2 45"); writer.println("3 101"); writer.close(); // Using multiple outputs, this MapReduce send the records to a different path of the same dataset, depending // on the value in the data (large spend amounts will go to one file, while small will go to another file. runProgram(app, AppWithMapReduceUsingMultipleOutputs.SeparatePurchases.class, new BasicArguments()); FileSet outputFileSet = datasetCache.getDataset(AppWithMapReduceUsingMultipleOutputs.SEPARATED_PURCHASES); Assert.assertEquals(ImmutableList.of("1 20", "1 30", "2 5", "2 45"), readFromOutput(outputFileSet, "small_purchases")); Assert.assertEquals(ImmutableList.of("1 65", "2 53", "3 101"), readFromOutput(outputFileSet, "large_purchases")); } private List<String> readFromOutput(FileSet fileSet, String relativePath) throws IOException { // small amount of data, so expect all data from just 1 file Location location = fileSet.getLocation(relativePath).append("part-m-00000"); return CharStreams.readLines(CharStreams.newReaderSupplier(Locations.newInputSupplier(location), Charsets.UTF_8)); } @Test public void testAddingMultipleOutputsWithSameAlias() throws Exception { final ApplicationWithPrograms app = deployApp(AppWithMapReduceUsingMultipleOutputs.class); // will fail because it configured two outputs with the same alias Assert.assertFalse(runProgram(app, AppWithMapReduceUsingMultipleOutputs.InvalidMapReduce.class, new BasicArguments())); } }