/* * Copyright © 2015-2016 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.partitioned; import co.cask.cdap.api.dataset.lib.PartitionDetail; import co.cask.cdap.api.dataset.lib.PartitionFilter; import co.cask.cdap.api.dataset.lib.PartitionedFileSet; import co.cask.cdap.common.conf.Constants; import co.cask.cdap.test.ApplicationManager; import co.cask.cdap.test.DataSetManager; import co.cask.cdap.test.ProgramManager; import co.cask.cdap.test.ServiceManager; import co.cask.cdap.test.base.TestFrameworkTestBase; import co.cask.common.http.HttpRequest; import co.cask.common.http.HttpRequests; import co.cask.common.http.HttpResponse; import com.google.common.base.Function; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import org.junit.Assert; import org.junit.Test; import java.io.IOException; import java.net.URL; import java.sql.Connection; import java.sql.ResultSet; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.TimeUnit; /** * Test that MapReduce and Worker can incrementally process partitions. */ public class PartitionConsumingTestRun extends TestFrameworkTestBase { private static final String LINE1 = "a b a"; private static final String LINE2 = "b a b"; private static final String LINE3 = "c c c"; @Test public void testMapReduceConsumer() throws Exception { testWordCountOnFileSet(new Function<ApplicationManager, ProgramManager>() { @Override public ProgramManager apply(ApplicationManager input) { return input.getMapReduceManager(AppWithPartitionConsumers.WordCountMapReduce.NAME).start(); } }); Map<String, String> tags = ImmutableMap.of(Constants.Metrics.Tag.NAMESPACE, "default", Constants.Metrics.Tag.APP, "AppWithPartitionConsumers", Constants.Metrics.Tag.MAPREDUCE, "WordCountMapReduce", Constants.Metrics.Tag.MR_TASK_TYPE, "r"); long totalIn = getMetricsManager().getTotalMetric(tags, "system.process.entries.in"); long totalOut = getMetricsManager().getTotalMetric(tags, "system.process.entries.out"); Assert.assertEquals(9, totalIn); Assert.assertEquals(10, totalOut); } @Test public void testWorkerConsumer() throws Exception { testWordCountOnFileSet(new Function<ApplicationManager, ProgramManager>() { @Override public ProgramManager apply(ApplicationManager input) { return input.getWorkerManager(AppWithPartitionConsumers.WordCountWorker.NAME).start(); } }); } private void testWordCountOnFileSet(Function<ApplicationManager, ProgramManager> runProgram) throws Exception { ApplicationManager applicationManager = deployApplication(AppWithPartitionConsumers.class); ServiceManager serviceManager = applicationManager.getServiceManager("DatasetService").start(); serviceManager.waitForStatus(true); URL serviceURL = serviceManager.getServiceURL(); // write a file to the file set using the service and run the WordCount MapReduce job on that one partition createPartition(serviceURL, LINE1, "1"); ProgramManager programManager = runProgram.apply(applicationManager); programManager.waitForFinish(5, TimeUnit.MINUTES); Assert.assertEquals(new Long(2), getCount(serviceURL, "a")); Assert.assertEquals(new Long(1), getCount(serviceURL, "b")); Assert.assertEquals(new Long(0), getCount(serviceURL, "c")); // create two additional partitions createPartition(serviceURL, LINE2, "2"); createPartition(serviceURL, LINE3, "3"); // running the program job now processes these two new partitions (LINE2 and LINE3) and updates the counts // dataset accordingly programManager = runProgram.apply(applicationManager); programManager.waitForFinish(5, TimeUnit.MINUTES); Assert.assertEquals(new Long(3), getCount(serviceURL, "a")); Assert.assertEquals(new Long(3), getCount(serviceURL, "b")); Assert.assertEquals(new Long(3), getCount(serviceURL, "c")); // running the program without adding new partitions does not affect the counts dataset programManager = runProgram.apply(applicationManager); programManager.waitForFinish(5, TimeUnit.MINUTES); Assert.assertEquals(new Long(3), getCount(serviceURL, "a")); Assert.assertEquals(new Long(3), getCount(serviceURL, "b")); Assert.assertEquals(new Long(3), getCount(serviceURL, "c")); DataSetManager<PartitionedFileSet> outputLines = getDataset("outputLines"); Set<PartitionDetail> partitions = outputLines.get().getPartitions(PartitionFilter.ALWAYS_MATCH); Assert.assertEquals(2, partitions.size()); // we only store the counts to the "outputLines" dataset List<String> expectedCounts = Lists.newArrayList("1", "1", "2", "2", "3"); List<String> outputRecords = getDataFromExplore("outputLines"); Collections.sort(outputRecords); Assert.assertEquals(expectedCounts, outputRecords); } private List<String> getDataFromExplore(String dsName) throws Exception { try (Connection connection = getQueryClient()) { ResultSet results = connection.prepareStatement("SELECT * FROM dataset_" + dsName).executeQuery(); List<String> cleanRecords = new ArrayList<>(); while (results.next()) { cleanRecords.add(results.getString(1)); } return cleanRecords; } } private void createPartition(URL serviceUrl, String body, String time) throws IOException { HttpResponse response = HttpRequests.execute(HttpRequest.put(new URL(serviceUrl, "lines?time=" + time)).withBody(body).build()); Assert.assertEquals(200, response.getResponseCode()); } private Long getCount(URL serviceUrl, String word) throws IOException { HttpResponse response = HttpRequests.execute(HttpRequest.get(new URL(serviceUrl, "counts?word=" + word)).build()); Assert.assertEquals(200, response.getResponseCode()); return Long.valueOf(response.getResponseBodyAsString()); } }