/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.addthis.hydra.data.util; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import com.addthis.codec.codables.Codable; import org.apache.commons.lang3.ArrayUtils; /** * Tools for finding change points in an integer array. */ public class FindChangePoints implements Codable { /** * Finds high points in an array of integers. * * @param data The array of integers in which to search * @param max_width The maximum width to group high-points together * @param min_height The minimum height to consider * @return A list of pairs of integers of the form (index, size) */ public static List<ChangePoint> findHighPoints(Long[] data, int max_width, int min_height) { List<ChangePoint> rv = new ArrayList<>(); int currIndex = 0; long currHt = data[0]; int currWidth = 0; boolean started = false; if (data.length <= 1) return rv; for (int i = 0; i < data.length; i++) { if (data[i] > currHt && data[i] > min_height) { started = true; currIndex = i; currHt = data[i]; currWidth = 0; } else { if (started) { currWidth++; if (currWidth >= max_width) { started = false; rv.add(new ChangePoint(currHt, currIndex, ChangePoint.ChangePointType.PEAK)); currWidth = 0; currIndex = -1; currHt = 0; } } } } if (started) { rv.add(new ChangePoint(currHt, currIndex, ChangePoint.ChangePointType.PEAK)); } return rv; } /** * Finds places where the data changed dramatically, either sustained or "instantaneously" * * @param data The array of integers in which to search * @return A list of pairs of integers of the form (index, size) */ public static List<ChangePoint> findSignificantPoints(Long[] data, int minChange, double minRatio, double minZScore, int inactiveThreshold, int windowSize) { List<ChangePoint> rv = new ArrayList<>(); rv.addAll(findAndSmoothOverPeaks(data, minChange, minZScore, windowSize)); rv.addAll(findChangePoints(data, minChange, minRatio, minZScore, inactiveThreshold, windowSize)); return rv; } private static List<ChangePoint> findChangePoints(Long[] data, int minChange, double minRatio, double minZScore, int inactiveThreshold, int windowSize) { List<Long> dataList = Arrays.asList(data); ArrayList<ChangePoint> rvList = new ArrayList<>(); for (int i = 2; i < data.length; i++) { int startIndex = Math.max(i - windowSize + 1, 0); Long[] currSlice = dataList.subList(startIndex, i).toArray(new Long[]{}); long nextValue = data[i]; double predicted = linearPredictNext(currSlice); double diff = nextValue - predicted; double zScoreDiff = diff / sd(currSlice); double changeRatio = -1 + (double) (nextValue) / Math.max(predicted, 1.); if (Math.abs(zScoreDiff) > minZScore && Math.abs(diff) > minChange && Math.abs(changeRatio) > minRatio) { ChangePoint.ChangePointType type = chooseTypeForChange((long) mean(currSlice), nextValue, inactiveThreshold); rvList.add(new ChangePoint((int) diff, i, type)); } } return rvList; } private static ChangePoint.ChangePointType chooseTypeForChange(long before, long after, int inactiveThreshold) { if (before > after) { return after > inactiveThreshold ? ChangePoint.ChangePointType.FALL : ChangePoint.ChangePointType.STOP; } else { return before < inactiveThreshold ? ChangePoint.ChangePointType.START : ChangePoint.ChangePointType.RISE; } } private static List<ChangePoint> findAndSmoothOverPeaks(Long[] data, int minChange, double minZscore, int width) { ArrayList<ChangePoint> rvList = new ArrayList<>(); for (int i = 0; i < data.length; i++) { int leftEndpoint = Math.max(0, i - width); int rightEndpoint = Math.min(i + width, data.length); Long[] neighborhood = Arrays.copyOfRange(data, leftEndpoint, rightEndpoint); Long[] neighborhoodWithout = ArrayUtils.addAll(Arrays.copyOfRange(data, leftEndpoint, i), Arrays.copyOfRange(data, i + 1, rightEndpoint)); if (sd(neighborhood) > minZscore * sd(neighborhoodWithout)) { double change = data[i] - mean(neighborhoodWithout); if (Math.abs(change) > minChange) { rvList.add(new ChangePoint((int) change, i, ChangePoint.ChangePointType.PEAK)); data[i] = (long) mean(neighborhoodWithout); } } } return rvList; } private static int sum(Long[] longs) { int rv = 0; for (Long z : longs) { rv += z; } return rv; } public static double mean(Long[] longs) { return (double) (sum(longs)) / longs.length; } private static double sd(Long[] longs) { double mean = mean(longs); double sumSquareResiduals = 0; for (long z : longs) { sumSquareResiduals += Math.pow(mean - z, 2); } return Math.max(Math.sqrt(sumSquareResiduals), .0001); } private static double linearPredictNext(Long[] ints) { double slope; double intercept; int len = ints.length; Long[] xx = new Long[len]; Long[] xy = new Long[len]; for (int i = 0; i < len; i++) { xx[i] = (long) (i * i); xy[i] = i * ints[i]; } double meanx = .5 * (len - 1.); slope = (mean(xy) - meanx * mean(ints)) / (mean(xx) - Math.pow(meanx, 2)); intercept = mean(ints) - slope * meanx; return slope * ints.length + intercept; } }