/* * Copyright © 2016 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.examples.clicksandviews; import co.cask.cdap.api.dataset.lib.PartitionDetail; import co.cask.cdap.api.dataset.lib.PartitionKey; import co.cask.cdap.api.dataset.lib.PartitionedFileSet; import co.cask.cdap.test.ApplicationManager; import co.cask.cdap.test.DataSetManager; import co.cask.cdap.test.MapReduceManager; import co.cask.cdap.test.StreamManager; import co.cask.cdap.test.TestBase; import com.google.common.base.Joiner; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import org.apache.twill.filesystem.Location; import org.junit.Assert; import org.junit.Test; import java.io.BufferedReader; import java.io.InputStreamReader; import java.sql.Connection; import java.sql.ResultSet; import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.concurrent.TimeUnit; /** * Tests that a MapReduce job can process data from two different Streams and perform a reduce-side join across * the data in the two Streams. */ public class ClicksAndViewsMapReduceTest extends TestBase { private static final Joiner TAB_JOINER = Joiner.on("\t"); // have views with id [0,5], all for the same adId private static final ImmutableList<String> VIEWS = ImmutableList.of(createView(0, 1461219010, 2157, "http://www.google.com", "lu=fQ9qHjLjFg3qi3bZiuz", "62.128.93.36"), createView(1, 1461265001, 2157, "http://www.google.co.uk", "lu=8fsdggknea@ASJHlz", "21.612.39.63"), createView(2, 1461281958, 2157, "http://www.yahoo.com", "name=Mike", "212.193.252.52"), createView(3, 1461331879, 2157, "http://www.amazon.com", "name=Matt", "1.116.135.146"), createView(4, 1461348738, 2157, "http://www.t.co", "name=Nicholas; Httponly", "89.141.94.158"), createView(5, 1461349158, 2157, "http://www.linkedin.com", "lo=Npa0jbIHGloMnx75", "69.75.87.114")); private static final ImmutableList<Integer> CLICKS = ImmutableList.of(1, 2, 5); private static final int OUTPUT_PARTITION_RUNTIME = 1461280019; @Test public void testClicksAndViews() throws Exception { ApplicationManager applicationManager = deployApplication(ClicksAndViews.class); // write each of the views to the VIEWS stream StreamManager viewsStream = getStreamManager(ClicksAndViews.VIEWS); for (String view : VIEWS) { viewsStream.send(view); } // send clicks for viewIds 1,2,5 StreamManager clicksStream = getStreamManager(ClicksAndViews.CLICKS); for (Integer click : CLICKS) { clicksStream.send(createClick(click)); } MapReduceManager mapReduceManager = applicationManager.getMapReduceManager(ClicksAndViewsMapReduce.NAME) // configure this run of the MapReduce to write to the partition keyed by OUTPUT_PARTITION_RUNTIME .start(ImmutableMap.of("output.partition.key.runtime", Integer.toString(OUTPUT_PARTITION_RUNTIME))); mapReduceManager.waitForFinish(5, TimeUnit.MINUTES); List<String> joinedViews = new ArrayList<>(); for (int i = 0; i < VIEWS.size(); i++) { joinedViews.add(createJoinedView(VIEWS.get(i), Collections.frequency(CLICKS, i))); } Set<String> expectedJoinedViews = ImmutableSet.copyOf(joinedViews); Assert.assertEquals(expectedJoinedViews, getDataFromFile()); Assert.assertEquals(expectedJoinedViews, getDataFromExplore()); } private static String createView(int viewId, long requestBeginTime, int adId, String referrer, String userCookie, String ip) { // View: // viewId, requestBeginTime, adId, referrer, userCookie, ip return TAB_JOINER.join(viewId, requestBeginTime, adId, referrer, userCookie, ip); } private static String createClick(int viewId) { // Click: // viewId return Integer.toString(viewId); } private static String createJoinedView(String view, int numClicks) { // Joined View: // viewId, requestBeginTime, adId, referrer, userCookie, ip, numClicks return TAB_JOINER.join(view, numClicks); } private Set<String> getDataFromFile() throws Exception { DataSetManager<PartitionedFileSet> cleanRecords = getDataset(ClicksAndViews.JOINED); Set<String> cleanData = new HashSet<>(); // we configured the MapReduce to write to this partition when starting it PartitionDetail partition = cleanRecords.get().getPartition(PartitionKey.builder().addLongField("runtime", OUTPUT_PARTITION_RUNTIME).build()); Assert.assertNotNull(partition); for (Location location : partition.getLocation().list()) { if (location.getName().startsWith("part-")) { try (BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(location.getInputStream()))) { String line; while ((line = bufferedReader.readLine()) != null) { cleanData.add(line); } } } } return cleanData; } private Set<String> getDataFromExplore() throws Exception { try (Connection connection = getQueryClient()) { ResultSet results = connection .prepareStatement("SELECT * FROM dataset_" + ClicksAndViews.JOINED) .executeQuery(); Set<String> cleanRecords = new HashSet<>(); while (results.next()) { cleanRecords.add(TAB_JOINER.join(results.getString(1), results.getString(2), results.getString(3), results.getString(4), results.getString(5), results.getString(6), results.getString(7))); } return cleanRecords; } } }