/* # Licensed Materials - Property of IBM # Copyright IBM Corp. 2015 */ package state; import java.util.Random; import org.apache.commons.math.stat.descriptive.moment.Mean; import org.apache.commons.math.stat.descriptive.moment.StandardDeviation; import com.ibm.streamsx.topology.TStream; import com.ibm.streamsx.topology.Topology; import com.ibm.streamsx.topology.context.StreamsContextFactory; import com.ibm.streamsx.topology.function.Predicate; import com.ibm.streamsx.topology.function.Supplier; /** * Finds outliers from a sequence of doubles (e.g. simulating a sensor reading). * * Demonstrates function logic that maintains state across tuples. * */ public class FindOutliers { public static void main(String[] args) throws Exception { final double threshold = args.length == 0 ? 2.0 : Double .parseDouble(args[0]); Topology t = new Topology("StandardDeviationFilter"); final Random rand = new Random(); // Produce a stream of random double values with a normal // distribution, mean 0.0 and standard deviation 1. TStream<Double> values = t.limitedSource(new Supplier<Double>() { private static final long serialVersionUID = 1L; @Override public Double get() { return rand.nextGaussian(); } }, 100000); /* * Filters the values based on calculating the mean and standard * deviation from the incoming data. In this case only outliers are * present in the output stream outliers. A outlier is defined as one * more than (threshold*standard deviation) from the mean. * * This demonstrates an anonymous functional logic class that is * stateful. The two fields mean and sd maintain their values across * multiple invocations of the test method, that is for multiple tuples. * * Note both Mean & StandardDeviation classes are serializable. */ TStream<Double> outliers = values.filter(new Predicate<Double>() { private static final long serialVersionUID = 1L; private final Mean mean = new Mean(); private final StandardDeviation sd = new StandardDeviation(); @Override public boolean test(Double tuple) { mean.increment(tuple); sd.increment(tuple); double multpleSd = threshold * sd.getResult(); double absMean = Math.abs(mean.getResult()); double absTuple = Math.abs(tuple); return absTuple > absMean + multpleSd; } }); outliers.print(); StreamsContextFactory.getEmbedded().submit(t).get(); } }