/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package hivemall.anomaly; import hivemall.anomaly.ChangeFinderUDF.LossFunction; import hivemall.anomaly.ChangeFinderUDF.Parameters; import hivemall.utils.lang.StringUtils; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Random; import java.util.zip.GZIPInputStream; import javax.annotation.Nonnull; import org.apache.commons.math3.distribution.NormalDistribution; import org.apache.commons.math3.distribution.PoissonDistribution; import org.apache.commons.math3.distribution.UniformIntegerDistribution; import org.apache.commons.math3.random.RandomGenerator; import org.apache.commons.math3.random.Well19937c; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.junit.Assert; import org.junit.Test; public class ChangeFinder2DTest { private static final boolean DEBUG = false; @Test public void testCf1d() throws IOException, HiveException { Parameters params = new Parameters(); params.set(LossFunction.logloss); PrimitiveObjectInspector oi = PrimitiveObjectInspectorFactory.javaDoubleObjectInspector; ListObjectInspector listOI = ObjectInspectorFactory.getStandardListObjectInspector(oi); ChangeFinder2D cf = new ChangeFinder2D(params, listOI); double[] outScores = new double[2]; List<Double> x = new ArrayList<Double>(1); BufferedReader reader = readFile("cf1d.csv"); println("x outlier change"); String line; int i = 1, numOutliers = 0, numChangepoints = 0; while ((line = reader.readLine()) != null) { double d = Double.parseDouble(line); x.add(Double.valueOf(d)); cf.update(x, outScores); printf("%d %f %f %f%n", i, d, outScores[0], outScores[1]); if (outScores[0] > 10.d) { numOutliers++; } if (outScores[1] > 10.d) { numChangepoints++; } x.clear(); i++; } Assert.assertTrue("#outliers SHOULD be greater than 10: " + numOutliers, numOutliers > 10); Assert.assertTrue("#outliers SHOULD be less than 20: " + numOutliers, numOutliers < 20); Assert.assertTrue("#changepoints SHOULD be greater than 0: " + numChangepoints, numChangepoints > 0); Assert.assertTrue("#changepoints SHOULD be less than 5: " + numChangepoints, numChangepoints < 5); } @Test public void testTwitterData() throws IOException, HiveException { Parameters params = new Parameters(); params.set(LossFunction.logloss); params.r1 = 0.01d; params.k = 6; params.T1 = 10; params.T2 = 5; PrimitiveObjectInspector oi = PrimitiveObjectInspectorFactory.javaDoubleObjectInspector; ListObjectInspector listOI = ObjectInspectorFactory.getStandardListObjectInspector(oi); ChangeFinder2D cf = new ChangeFinder2D(params, listOI); double[] outScores = new double[2]; List<Double> x = new ArrayList<Double>(1); BufferedReader reader = readFile("twitter.csv.gz"); println("time x outlier change"); String line; int i = 1, numOutliers = 0, numChangepoints = 0; while ((line = reader.readLine()) != null) { double d = Double.parseDouble(line); x.add(Double.valueOf(d)); cf.update(x, outScores); printf("%d %f %f %f%n", i, d, outScores[0], outScores[1]); if (outScores[0] > 30.d) { numOutliers++; } if (outScores[1] > 8.d) { numChangepoints++; } x.clear(); i++; } Assert.assertTrue("#outliers SHOULD be greater than 5: " + numOutliers, numOutliers > 5); Assert.assertTrue("#outliers SHOULD be less than 10: " + numOutliers, numOutliers < 10); Assert.assertTrue("#changepoints SHOULD be greater than 0: " + numChangepoints, numChangepoints > 0); Assert.assertTrue("#changepoints SHOULD be less than 5: " + numChangepoints, numChangepoints < 5); } @Test public void testPoissenDist() throws HiveException { final int examples = 10000; final int dims = 3; final PoissonDistribution[] poisson = new PoissonDistribution[] { new PoissonDistribution(10.d), new PoissonDistribution(5.d), new PoissonDistribution(20.d)}; final Random rand = new Random(42); final Double[] x = new Double[dims]; final List<Double> xList = Arrays.asList(x); Parameters params = new Parameters(); params.set(LossFunction.logloss); params.r1 = 0.01d; params.k = 6; params.T1 = 10; params.T2 = 5; PrimitiveObjectInspector oi = PrimitiveObjectInspectorFactory.javaDoubleObjectInspector; ListObjectInspector listOI = ObjectInspectorFactory.getStandardListObjectInspector(oi); final ChangeFinder2D cf = new ChangeFinder2D(params, listOI); final double[] outScores = new double[2]; println("# time x0 x1 x2 outlier change"); for (int i = 0; i < examples; i++) { double r = rand.nextDouble(); x[0] = r * poisson[0].sample(); x[1] = r * poisson[1].sample(); x[2] = r * poisson[2].sample(); cf.update(xList, outScores); printf("%d %f %f %f %f %f%n", i, x[0], x[1], x[2], outScores[0], outScores[1]); } } //@Test public void testSota5D() throws HiveException { final int DIM = 5; final int EXAMPLES = 20001; final Double[] x = new Double[DIM]; final List<Double> xList = Arrays.asList(x); Parameters params = new Parameters(); params.set(LossFunction.logloss); params.r1 = 0.01d; params.k = 10; params.T1 = 10; params.T2 = 10; PrimitiveObjectInspector oi = PrimitiveObjectInspectorFactory.javaDoubleObjectInspector; ListObjectInspector listOI = ObjectInspectorFactory.getStandardListObjectInspector(oi); final ChangeFinder2D cf = new ChangeFinder2D(params, listOI); final double[] outScores = new double[2]; RandomGenerator rng1 = new Well19937c(31L); final UniformIntegerDistribution uniform = new UniformIntegerDistribution(rng1, 0, 10); RandomGenerator rng2 = new Well19937c(41L); final PoissonDistribution poissonEvent = new PoissonDistribution(rng2, 1000.d, PoissonDistribution.DEFAULT_EPSILON, PoissonDistribution.DEFAULT_MAX_ITERATIONS); final StringBuilder buf = new StringBuilder(256); println("# time x0 x1 x2 x3 x4 mean0 mean1 mean2 mean3 mean4 outlier change"); FIN: for (int i = 0; i < EXAMPLES;) { int len = poissonEvent.sample(); double data[][] = new double[DIM][len]; double mean[] = new double[DIM]; double sd[] = new double[DIM]; for (int j = 0; j < DIM; j++) { mean[j] = uniform.sample() * 5.d; sd[j] = uniform.sample() / 10.d * 5.d + 1.d; if (i % 5 == 0) { mean[j] += 50.d; } NormalDistribution normDist = new NormalDistribution(new Well19937c(i + j), mean[j], sd[j]); data[j] = normDist.sample(len); data[j][len / (j + 2) + DIM % (j + 1)] = mean[j] + (j + 4) * sd[j]; } for (int j = 0; j < len; j++) { if (i >= EXAMPLES) { break FIN; } x[0] = data[0][j]; x[1] = data[1][j]; x[2] = data[2][j]; x[3] = data[3][j]; x[4] = data[4][j]; cf.update(xList, outScores); buf.append(i) .append(' ') .append(x[0].doubleValue()) .append(' ') .append(x[1].doubleValue()) .append(' ') .append(x[2].doubleValue()) .append(' ') .append(x[3].doubleValue()) .append(' ') .append(x[4].doubleValue()) .append(' ') .append(mean[0]) .append(' ') .append(mean[1]) .append(' ') .append(mean[2]) .append(' ') .append(mean[3]) .append(' ') .append(mean[4]) .append(' ') .append(outScores[0]) .append(' ') .append(outScores[1]); println(buf.toString()); StringUtils.clear(buf); i++; } } } private static void println(String msg) { if (DEBUG) { System.out.println(msg); } } private static void printf(String format, Object... args) { if (DEBUG) { System.out.printf(format, args); } } @Nonnull private static BufferedReader readFile(@Nonnull String fileName) throws IOException { InputStream is = ChangeFinder1DTest.class.getResourceAsStream(fileName); if (fileName.endsWith(".gz")) { is = new GZIPInputStream(is); } return new BufferedReader(new InputStreamReader(is)); } }