/* (c) 2014 LinkedIn Corp. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use
* this file except in compliance with the License. You may obtain a copy of the
* License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed
* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied.
*/
package com.linkedin.cubert.operator;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
import org.apache.pig.data.Tuple;
import org.codehaus.jackson.JsonGenerationException;
import org.codehaus.jackson.map.JsonMappingException;
import org.codehaus.jackson.map.ObjectMapper;
import org.codehaus.jackson.node.ArrayNode;
import org.codehaus.jackson.node.ObjectNode;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import com.linkedin.cubert.block.Block;
import com.linkedin.cubert.block.BlockProperties;
import com.linkedin.cubert.block.BlockSchema;
import com.linkedin.cubert.operator.CubeOperator;
import com.linkedin.cubert.utils.JsonUtils;
/***
* Tests for OLAP cube count distinct. main cases covered: olap cube with 1 dimension,
* multiple dimension, full cube, grouping sets, with and without overlap in aggregate,
* single and multiple measures, unit tests to test the dimension fields of type long.
*
* @author Krishna Puttaswamy
*
*/
public class TestOLAPCubeCountDistinct
{
@BeforeClass
public void setUp() throws JsonGenerationException,
JsonMappingException,
IOException
{
}
void validate(Object[][] rows, String[] expected) throws JsonGenerationException,
JsonMappingException,
IOException,
InterruptedException
{
validateGroupingSets(rows, expected, null);
}
void validateGroupingSets(Object[][] rows, String[] expected, String[] groupingSets) throws JsonGenerationException,
JsonMappingException,
IOException,
InterruptedException
{
/* Step 1: create input block schema */
int ndims = rows[0].length - 1;
String[] dimensions = new String[ndims];
String[] columnNames = new String[ndims + 1];
columnNames[0] = "member";
StringBuffer typeName = new StringBuffer();
for (int i = 0; i < ndims; i++)
{
if (i > 0)
typeName.append(",");
if (rows[0][i + 1] instanceof Integer)
typeName.append("int ");
else
typeName.append("long ");
String name = "Dim" + i;
typeName.append(name);
columnNames[i + 1] = name;
dimensions[i] = name;
}
BlockSchema inputSchema = new BlockSchema(typeName.toString());
/* Step 2: Create input block */
Block block = new ArrayBlock(Arrays.asList(rows), columnNames, 1);
HashMap<String, Block> map = new HashMap<String, Block>();
map.put("block", block);
// System.out.println("SCHEMA " + block.getSchema());
/* Step 3: create json */
ObjectMapper mapper = new ObjectMapper();
ObjectNode node = mapper.createObjectNode();
// add aggregates
ArrayNode measures = mapper.createArrayNode();
ObjectNode measureNode =
JsonUtils.createObjectNode("input",
"member",
"output",
"count_distinct_members",
"type",
"COUNT_DISTINCT");
measures.add(measureNode);
node.put("aggregates", measures);
// add dimensions
ArrayNode dimensionNode = mapper.createArrayNode();
for (int i = 0; i < dimensions.length; i++)
{
dimensionNode.add(dimensions[i]);
}
node.put("dimensions", dimensionNode);
// add innerDimensions
node.put("innerDimensions", "member");
// add grouping sets
ArrayNode groupingSetNode = mapper.createArrayNode();
if (groupingSets != null)
for (String str : groupingSets)
groupingSetNode.add(str);
node.put("groupingSets", groupingSetNode);
/* Step 4: create and initialize CUBE operator */
CubeOperator cd = new CubeOperator();
BlockSchema outputSchema =
inputSchema.append(new BlockSchema("LONG count_distinct_members"));
BlockProperties props =
new BlockProperties(null, outputSchema, (BlockProperties) null);
cd.setInput(map, node, props);
// aggregate the output
// Map<DimensionKey, int[]> aggregated = new HashMap<DimensionKey, int[]>();
/* Step 5: store the output of CUBE operator in a set */
Set<String> computed = new HashSet<String>();
Tuple tuple;
while ((tuple = cd.next()) != null)
{
computed.add(tuple.toString());
}
/* Step 6: validate the computed results against expected */
if (expected.length > computed.size())
{
System.out.println("EXPECTED: " + java.util.Arrays.toString(expected));
System.out.println("COMPUTED: " + computed);
Set<String> a = new HashSet<String>();
for (String s : expected)
a.add(s);
for (String s : computed)
a.remove(s);
System.out.println("Remaining: " + a);
}
Assert.assertEquals(expected.length, computed.size());
for (String entry : expected)
{
if (!computed.contains(entry))
Assert.assertFalse(true, entry);
Assert.assertTrue(computed.contains(entry));
}
}
@Test
void testOneMember() throws JsonGenerationException,
JsonMappingException,
IOException,
InterruptedException
{
Object[][] rows = { { 1, (int) 10, (int) 100 }, { 1, (int) 10, (int) 200 } };
String[] expected =
new String[] { "(10,,1)", "(,100,1)", "(,200,1)", "(,,1)", "(10,100,1)",
"(10,200,1)" };
validate(rows, expected);
}
@Test
void testOneDimension() throws JsonGenerationException,
JsonMappingException,
IOException,
InterruptedException
{
Object[][] rows = { { 1, (int) 10 }, { 2, (int) 10 }, { 2, (int) 10 } };
String[] expected = new String[] { "(10,2)", "(,2)" };
validate(rows, expected);
}
@Test
void testNoOverlap() throws JsonGenerationException,
JsonMappingException,
IOException,
InterruptedException
{
Object[][] rows =
{ { 1, (int) 10 }, { 1, (int) 20 }, { 2, (int) 30 }, { 2, (int) 40 },
{ 3, (int) 50 } };
String[] expected =
new String[] { "(10,1)", "(20,1)", "(30,1)", "(40,1)", "(50,1)", "(,3)" };
validate(rows, expected);
}
@Test
void testThreeDimsTeam() throws JsonGenerationException,
JsonMappingException,
IOException,
InterruptedException
{
Object[][] rows =
{ { 1, (int) 1, (int) 2, (int) 1 }, { 2, (int) 1, (int) 2, (int) 2 },
{ 3, (int) 1, (int) 1, (int) 1 },
{ 4, (int) 1, (int) 1, (int) 2 },
{ 5, (int) 2, (int) 2, (int) 2 } };
String[] expected =
new String[] { "(1,2,1,1)", "(1,2,2,1)", "(1,1,1,1)", "(1,1,2,1)",
"(2,2,2,1)", "(1,,,4)", "(2,,,1)", "(,1,,2)", "(,2,,3)",
"(,,1,2)", "(,,2,3)", "(1,1,,2)", "(1,2,,2)", "(2,2,,1)",
"(,1,1,1)", "(,1,2,1)", "(,2,1,1)", "(,2,2,2)", "(1,,1,2)",
"(1,,2,2)", "(2,,2,1)", "(,,,5)" };
validate(rows, expected);
}
@Test
void testLongType() throws JsonGenerationException,
JsonMappingException,
IOException,
InterruptedException
{
// members: srinivas, maneesh, krishna, saurabh, rui
// dimensions: country code, number of monitors, vegetarian
Object[][] rows =
{ { 1, (long) 1, (long) 2, (long) 1 },
{ 2, (long) 1, (long) 2, (long) 2 },
{ 3, (long) 1, (long) 1, (long) 1 },
{ 4, (long) 1, (long) 1, (long) 2 },
{ 5, (long) 2, (long) 2, (long) 2 } };
String[] expected =
new String[] { "(1,2,1,1)", "(1,2,2,1)", "(1,1,1,1)", "(1,1,2,1)",
"(2,2,2,1)", "(1,,,4)", "(2,,,1)", "(,1,,2)", "(,2,,3)",
"(,,1,2)", "(,,2,3)", "(1,1,,2)", "(1,2,,2)", "(2,2,,1)",
"(,1,1,1)", "(,1,2,1)", "(,2,1,1)", "(,2,2,2)", "(1,,1,2)",
"(1,,2,2)", "(2,,2,1)", "(,,,5)" };
validate(rows, expected);
}
@Test
void testMixedTypes() throws JsonGenerationException,
JsonMappingException,
IOException,
InterruptedException
{
// members: srinivas, maneesh, krishna, saurabh, rui
// dimensions: country code, number of monitors, vegetarian
Object[][] rows =
{ { 1, 1, (long) 22222222222222L, 1 },
{ 2, 1, (long) 22222222222222L, 2 },
{ 3, 1, (long) 11111111111111L, 1 },
{ 4, 1, (long) 11111111111111L, 2 },
{ 5, 2, (long) 22222222222222L, 2 } };
String[] expected =
new String[] { "(1,22222222222222,1,1)", "(1,22222222222222,2,1)",
"(1,11111111111111,1,1)", "(1,11111111111111,2,1)",
"(2,22222222222222,2,1)", "(1,,,4)", "(2,,,1)",
"(,11111111111111,,2)", "(,22222222222222,,3)", "(,,1,2)",
"(,,2,3)", "(1,11111111111111,,2)", "(1,22222222222222,,2)",
"(2,22222222222222,,1)", "(,11111111111111,1,1)",
"(,11111111111111,2,1)", "(,22222222222222,1,1)",
"(,22222222222222,2,2)", "(1,,1,2)", "(1,,2,2)", "(2,,2,1)",
"(,,,5)" };
validate(rows, expected);
}
@Test
void testThreeDimsTeamGroupingSets() throws JsonGenerationException,
JsonMappingException,
IOException,
InterruptedException
{
// members: srinivas, maneesh, krishna, saurabh, rui
// dimensions: country code, number of monitors, vegetarian
Object[][] rows =
{ { 1, (int) 1, (int) 2, (int) 1 }, { 2, (int) 1, (int) 2, (int) 2 },
{ 3, (int) 1, (int) 1, (int) 1 },
{ 4, (int) 1, (int) 1, (int) 2 },
{ 5, (int) 2, (int) 2, (int) 2 } };
String[] expected =
new String[] { "(1,,,4)", "(2,,,1)", "(1,1,,2)", "(1,2,,2)", "(2,2,,1)" };
validateGroupingSets(rows, expected, new String[] { "Dim0,Dim1", "Dim0" });
}
// There was not Assert statement in the original code.
// Commenting it out for now
// @SuppressWarnings("unused")
// @Test
void testThreeDimsTeamGroupingSetsMultiAggregate() throws JsonGenerationException,
JsonMappingException,
IOException,
InterruptedException
{
// // members: srinivas, maneesh, krishna, saurabh, rui
// // dimensions: country code, number of monitors, vegetarian
// Object[][] rows =
// { { 1, (int) 1, (int) 2, (int) 1, 1 },
// { 2, (int) 1, (int) 2, (int) 2, 2 },
// { 3, (int) 1, (int) 1, (int) 1, null },
// { 4, (int) 1, (int) 1, (int) 2, null },
// { 5, (int) 2, (int) 2, (int) 2, null } };
//
// String valueColumns = "member,condition";
// int ndims = 3;
// String[] groupingSets = new String[] { "Dim0", "Dim0,Dim1" };
//
// String[] dimensions = new String[ndims];
// String[] columnNames = new String[ndims + valueColumns.split(",").length];
// columnNames[0] = "member";
// StringBuffer typeName = new StringBuffer();
// for (int i = 0; i < ndims; i++)
// {
// if (i > 0)
// typeName.append(",");
// typeName.append("int ");
// String name = "Dim" + i;
// typeName.append(name);
// columnNames[i + 1] = name;
// dimensions[i] = name;
// }
//
// BlockSchema blockSchema = new BlockSchema(typeName.toString());
//
// String[] fields = valueColumns.split(",");
// for (int i = 1; i < fields.length; i++)
// columnNames[ndims + i] = fields[i];
//
// Block block = new ArrayBlock(Arrays.asList(rows), columnNames, 1);
//
// HashMap<String, Block> map = new HashMap<String, Block>();
// map.put("block", block);
//
// OLAPCubeCountDistinct cd = new OLAPCubeCountDistinct();
//
// ObjectMapper mapper = new ObjectMapper();
// ObjectNode node = mapper.createObjectNode();
//
// ArrayNode measures = mapper.createArrayNode();
// ObjectNode m1 = mapper.createObjectNode();
// m1.put("input", "member");
// m1.put("output", "LONG CDMember0");
// measures.add(m1);
// ObjectNode m2 = mapper.createObjectNode();
// m2.put("input", "condition");
// m2.put("output", "LONG CDMember1");
// measures.add(m2);
// node.put("aggregates", measures);
//
// ArrayNode dimensionNode = mapper.createArrayNode();
// for (int i = 0; i < dimensions.length; i++)
// {
// dimensionNode.add(dimensions[i]);
// }
// node.put("dimensions", dimensionNode);
//
// ArrayNode groupingSetNode = mapper.createArrayNode();
// if (groupingSets != null)
// for (String str : groupingSets)
// groupingSetNode.add(str);
//
// node.put("groupingSets", groupingSetNode);
//
// cd.setInput(map, node);
//
// // aggregate the output
// Map<DimensionKey, int[]> aggregated = new HashMap<DimensionKey, int[]>();
//
// Tuple tuple;
//
// int sizeInInts =
// OLAPCubeUtils.getDimensionsSizeInInteger(blockSchema, dimensions);
//
// while ((tuple = cd.next()) != null)
// {
// DimensionKey key = new DimensionKey(sizeInInts);
// for (int i = 0; i < ndims; i++)
// {
// if (tuple.get(i) != null)
// {
// int code = ((Integer) tuple.get(i)).intValue();
// key.set(i, code);
// }
// else
// key.set(i, 0);
// }
//
// int count_distinct = ((Long) tuple.get(ndims)).intValue();
// int conditional_cd = ((Long) tuple.get(ndims + 1)).intValue();
//
// int[] old = aggregated.get(key);
// if (old == null)
// {
// old = new int[2];
// aggregated.put(key, old);
// }
// old[0] += count_distinct;
// old[1] += conditional_cd;
// }
// for (DimensionKey k : aggregated.keySet())
// {
// print.f("key %s ==> value %s",
// k.toString(),
// Arrays.toString(aggregated.get(k)));
// }
}
}