/*
* Copyright © 2015 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.dq.test;
import co.cask.cdap.api.common.Bytes;
import co.cask.cdap.dq.DataQualityWritable;
import co.cask.cdap.dq.functions.DiscreteValuesHistogram;
import co.cask.cdap.dq.functions.HistogramWithBucketing;
import co.cask.cdap.dq.functions.Mean;
import co.cask.cdap.dq.functions.StandardDeviation;
import co.cask.cdap.dq.functions.UniqueValues;
import com.google.common.collect.Maps;
import com.google.gson.Gson;
import com.google.gson.reflect.TypeToken;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.junit.Assert;
import org.junit.Test;
import java.lang.reflect.Type;
import java.util.AbstractMap;
import java.util.HashMap;
import java.util.Map;
/**
* Test cases for aggregation function logic. Disjoint from actual DataQuality map reduce
*/
public class AggregationFunctionsTest {
private static final Type TOKEN_TYPE_MAP_STRING_INTEGER = new TypeToken<Map<String, Integer>>() { }.getType();
private static final Gson GSON = new Gson();
@Test
public void averageReturnAggregationTest() throws Exception {
byte[] val1 = Bytes.toBytes(10.0);
Mean mean = new Mean();
Double averageVal = mean.deserialize(val1);
Assert.assertEquals(10.0, averageVal, 0);
}
@Test
public void standardDeviationReturnAggregationTest() throws Exception {
byte[] val1 = Bytes.toBytes(10.0);
StandardDeviation standardDeviation = new StandardDeviation();
Double stdevVal = standardDeviation.deserialize(val1);
Assert.assertEquals(10.0, stdevVal, 0);
}
@Test
public void discreteValuesHistogramReturnAggregationTest() throws Exception {
DiscreteValuesHistogram discreteValuesHistogram = new DiscreteValuesHistogram();
Map<String, Integer> map1 = Maps.newHashMap();
map1.put("a", 1);
map1.put("b", 2);
Map<String, Integer> map2 = Maps.newHashMap();
map2.put("a", 2);
map2.put("b", 3);
Map<String, Integer> combinedMap = Maps.newHashMap();
combinedMap.put("a", 3);
combinedMap.put("b", 5);
byte[] bytesMap1 = Bytes.toBytes(GSON.toJson(map1));
byte[] bytesMap2 = Bytes.toBytes(GSON.toJson(map2));
discreteValuesHistogram.combine(bytesMap1);
discreteValuesHistogram.combine(bytesMap2);
Map<String, Integer> histogramVal = discreteValuesHistogram.retrieveAggregation();
Assert.assertEquals(combinedMap, histogramVal);
}
@Test
public void averageGenerateAggregationTest() throws Exception {
DataQualityWritable val1 = new DataQualityWritable();
val1.set(new DoubleWritable(2.0));
DataQualityWritable val2 = new DataQualityWritable();
val2.set(new DoubleWritable(2.0));
Mean mean = new Mean();
mean.add(val1);
mean.add(val2);
byte[] output = mean.aggregate();
Assert.assertEquals(2.0, Bytes.toDouble(output), 0);
}
@Test
public void standardDeviationGenerateAggregationTest() throws Exception {
DataQualityWritable val1 = new DataQualityWritable();
val1.set(new DoubleWritable(2.0));
DataQualityWritable val2 = new DataQualityWritable();
val2.set(new DoubleWritable(5.0));
DataQualityWritable val3 = new DataQualityWritable();
val3.set(new DoubleWritable(10.0));
DataQualityWritable val4 = new DataQualityWritable();
val4.set(new DoubleWritable(52.0));
StandardDeviation standardDeviation = new StandardDeviation();
standardDeviation.add(val1);
standardDeviation.add(val2);
standardDeviation.add(val3);
standardDeviation.add(val4);
byte[] output = standardDeviation.aggregate();
Assert.assertEquals(20.265426, Bytes.toDouble(output), 0.001);
}
@Test
public void uniqueValuesGenerateAggregationTest() throws Exception {
DataQualityWritable val1 = new DataQualityWritable();
DataQualityWritable val2 = new DataQualityWritable();
DataQualityWritable val3 = new DataQualityWritable();
val1.set(new Text("a"));
val2.set(new Text("a"));
val3.set(new Text("a"));
UniqueValues uniqueValues = new UniqueValues();
uniqueValues.add(val1);
uniqueValues.add(val2);
uniqueValues.add(val3);
byte[] output = uniqueValues.aggregate();
Assert.assertEquals("[a]", Bytes.toString(output));
}
@Test
public void discreteValuesGenerateAggregationTest() throws Exception {
DataQualityWritable val1 = new DataQualityWritable();
DataQualityWritable val2 = new DataQualityWritable();
DataQualityWritable val3 = new DataQualityWritable();
val1.set(new Text("a"));
val2.set(new Text("a"));
val3.set(new Text("b"));
DiscreteValuesHistogram discreteValuesHistogram = new DiscreteValuesHistogram();
discreteValuesHistogram.add(val1);
discreteValuesHistogram.add(val2);
discreteValuesHistogram.add(val3);
Map<String, Integer> expectedMap = Maps.newHashMap();
expectedMap.put("a", 2);
expectedMap.put("b", 1);
byte[] outputVal = discreteValuesHistogram.aggregate();
Map<String, Integer> outputMap =
GSON.fromJson(Bytes.toString(outputVal), TOKEN_TYPE_MAP_STRING_INTEGER);
Assert.assertEquals(expectedMap, outputMap);
}
@Test
public void histogramWithBucketingTest() throws Exception {
DataQualityWritable val1 = new DataQualityWritable();
DataQualityWritable val2 = new DataQualityWritable();
DataQualityWritable val3 = new DataQualityWritable();
DataQualityWritable val4 = new DataQualityWritable();
DataQualityWritable val5 = new DataQualityWritable();
DataQualityWritable val6 = new DataQualityWritable();
DataQualityWritable val7 = new DataQualityWritable();
DataQualityWritable val8 = new DataQualityWritable();
val1.set(new IntWritable(2));
val2.set(new IntWritable(3));
val3.set(new IntWritable(4));
val4.set(new IntWritable(16));
val5.set(new IntWritable(16));
val6.set(new IntWritable(26));
val7.set(new IntWritable(46));
val8.set(new IntWritable(56));
HistogramWithBucketing histogramWithBucketing = new HistogramWithBucketing();
histogramWithBucketing.add(val1);
histogramWithBucketing.add(val2);
histogramWithBucketing.add(val3);
histogramWithBucketing.add(val4);
histogramWithBucketing.add(val5);
histogramWithBucketing.add(val6);
histogramWithBucketing.add(val7);
histogramWithBucketing.add(val8);
histogramWithBucketing.aggregate();
Map<Map.Entry<Double, Double>, Long> expectedMap = new HashMap<>();
Map.Entry<Double, Double> expectedMapEntry = new AbstractMap.SimpleEntry<>(2.0, 86.0);
expectedMap.put(expectedMapEntry, new Long(8));
Assert.assertEquals(histogramWithBucketing.histogram, expectedMap);
}
}