/* (c) 2014 LinkedIn Corp. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use * this file except in compliance with the License. You may obtain a copy of the * License at http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR * CONDITIONS OF ANY KIND, either express or implied. */ package com.linkedin.cubert.operator; import com.linkedin.cubert.block.Block; import com.linkedin.cubert.block.BlockProperties; import com.linkedin.cubert.block.BlockSchema; import com.linkedin.cubert.plan.physical.TestContext; import com.linkedin.cubert.utils.JsonUtils; import java.io.IOException; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.Set; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.MapContext; import org.apache.hadoop.mapreduce.ReduceContext; import org.apache.pig.data.Tuple; import org.codehaus.jackson.JsonGenerationException; import org.codehaus.jackson.map.JsonMappingException; import org.codehaus.jackson.map.ObjectMapper; import org.codehaus.jackson.node.ArrayNode; import org.codehaus.jackson.node.ObjectNode; import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; /*** * Tests for OLAP cube additive. * * tests sum aggregate on a single group by and also on grouping sets, * * @author Krishna Puttaswamy * */ public class TestOLAPCube { @BeforeClass public void setUp() throws JsonGenerationException, JsonMappingException, IOException { } void validate(Object[][] rows, String[] expected) throws JsonGenerationException, JsonMappingException, IOException, InterruptedException { validateGroupingSets(rows, expected, null); } void validateGroupingSets(Object[][] rows, String[] expected, String[] groupingSets) throws JsonGenerationException, JsonMappingException, IOException, InterruptedException { /* Step 1: Create input block schema */ int ndims = rows[0].length - 1; String[] dimensions = new String[ndims]; String[] columnNames = new String[ndims + 1]; columnNames[0] = "clickCount"; StringBuffer typeName = new StringBuffer(); for (int i = 0; i < ndims; i++) { if (i > 0) typeName.append(","); typeName.append("int "); String name = "Dim" + i; typeName.append(name); columnNames[i + 1] = name; dimensions[i] = name; } BlockSchema inputSchema = new BlockSchema(typeName.toString()); /** Step 2: Create json */ ObjectMapper mapper = new ObjectMapper(); ObjectNode node = mapper.createObjectNode(); Configuration conf = new JobConf(); PhaseContext.create((MapContext) new TestContext(), conf); PhaseContext.create((ReduceContext) new TestContext(), conf); // add aggregates into json ArrayNode measures = mapper.createArrayNode(); measures.add(JsonUtils.createObjectNode("type", "SUM", "input", "clickCount", "output", "sum_clicks")); node.put("aggregates", measures); // add dimensions into json ArrayNode dimensionNode = mapper.createArrayNode(); for (int i = 0; i < dimensions.length; i++) dimensionNode.add(dimensions[i]); node.put("dimensions", dimensionNode); // add grouping sets into json ArrayNode groupingSetNode = mapper.createArrayNode(); if (groupingSets != null) for (String str : groupingSets) groupingSetNode.add(str); node.put("groupingSets", groupingSetNode); /** Step 3: create the input block */ HashMap<String, Block> map = new HashMap<String, Block>(); Block block = new ArrayBlock(Arrays.asList(rows), columnNames, 1); map.put("block", block); /** Step 4: create CUBE operator, initialize */ CubeOperator cd = new CubeOperator(); BlockSchema outputSchema = inputSchema.append(new BlockSchema("INT sum_clicks")); BlockProperties props = new BlockProperties(null, outputSchema, (BlockProperties) null); cd.setInput(map, node, props); /** Step 5: get the results from CUBE operator and put them in a set */ Set<String> computed = new HashSet<String>(); Tuple tuple; while ((tuple = cd.next()) != null) { computed.add(tuple.toString()); } /** Step 6: validate that computed and brute force results are same */ // System.out.println("Aggregated:" + computed); // System.out.println("Expected: " + java.util.Arrays.toString(expected)); Assert.assertEquals(computed.size(), expected.length); for (String entry : expected) { Assert.assertTrue(computed.contains(entry)); } } @Test void testNoOverlap() throws JsonGenerationException, JsonMappingException, IOException, InterruptedException { Object[][] rows = { { 1, (int) 10 }, { 1, (int) 20 }, { 2, (int) 30 }, { 2, (int) 40 }, { 3, (int) 50 } }; String[] expected = new String[] { "(10,1)", "(20,1)", "(30,2)", "(40,2)", "(50,3)", "(,9)" }; validate(rows, expected); } @Test void testTotalClickCount() throws JsonGenerationException, JsonMappingException, IOException, InterruptedException { // clickCount // dimensions: country code, number of monitors, vegetarian Object[][] rows = { { 1, (int) 1, (int) 1, (int) 1 }, { 1, (int) 1, (int) 1, (int) 2 }, { 2, (int) 1, (int) 2, (int) 1 }, { 3, (int) 1, (int) 2, (int) 2 }, { 2, (int) 2, (int) 2, (int) 2 } }; String[] expected = new String[] { "(1,,,7)", "(2,,,2)", "(,1,,2)", "(,2,,7)", "(,,1,3)", "(,,2,6)", "(1,1,,2)", "(1,2,,5)", "(2,2,,2)", "(,1,1,1)", "(,1,2,1)", "(,2,1,2)", "(,2,2,5)", "(1,,1,3)", "(1,,2,4)", "(2,,2,2)", "(1,1,1,1)", "(1,1,2,1)", "(1,2,1,2)", "(1,2,2,3)", "(2,2,2,2)", "(,,,9)" }; validate(rows, expected); } @Test void testGroupingSetsSum() throws JsonGenerationException, JsonMappingException, IOException, InterruptedException { // clickCount // dimensions: country code, number of monitors, vegetarian Object[][] rows = { { 1, (int) 1, (int) 1, (int) 1 }, { 1, (int) 1, (int) 1, (int) 2 }, { 2, (int) 1, (int) 2, (int) 1 }, { 3, (int) 1, (int) 2, (int) 2 }, { 2, (int) 2, (int) 2, (int) 2 } }; String[] expected = new String[] { "(1,,,7)", "(2,,,2)", "(1,1,,2)", "(1,2,,5)", "(2,2,,2)" }; validateGroupingSets(rows, expected, new String[] { "Dim0,Dim1", "Dim0" }); } }