package net.seninp.jmotif.sax.discord; import java.util.Date; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import net.seninp.jmotif.distance.EuclideanDistance; import net.seninp.jmotif.sax.SAXProcessor; import net.seninp.jmotif.sax.TSProcessor; import net.seninp.jmotif.sax.registry.SlidingWindowMarkerAlgorithm; import net.seninp.jmotif.sax.registry.VisitRegistry; /** * Implements SAX-based discord finder, i.e. HOT-SAX. * * @author psenin */ public class BruteForceDiscordImplementation { private static final Logger LOGGER = LoggerFactory .getLogger(BruteForceDiscordImplementation.class); private static TSProcessor tsProcessor = new TSProcessor(); private static EuclideanDistance ed = new EuclideanDistance(); /** * Brute force discord search implementation. BRUTE FORCE algorithm. * * @param series the data we work with. * @param windowSize the sliding window size. * @param discordCollectionSize the number of discords we look for. * @param marker the marker window algorithm implementation. * @param nThreshold the z-Normalization threshold value. * @return discords. * @throws Exception if error occurs. */ public static DiscordRecords series2BruteForceDiscords(double[] series, Integer windowSize, int discordCollectionSize, SlidingWindowMarkerAlgorithm marker, double nThreshold) throws Exception { DiscordRecords discords = new DiscordRecords(); // init new registry to the full length, but mark the end of it // VisitRegistry globalTrackVisitRegistry = new VisitRegistry(series.length); globalTrackVisitRegistry.markVisited(series.length - windowSize, series.length); int discordCounter = 0; while (discords.getSize() < discordCollectionSize) { LOGGER.debug("currently known discords: {} out of {}", discords.getSize(), discordCollectionSize); // mark start and number of iterations Date start = new Date(); DiscordRecord bestDiscord = findBestDiscordBruteForce(series, windowSize, globalTrackVisitRegistry, nThreshold); bestDiscord.setPayload("#" + discordCounter); Date end = new Date(); // if the discord is null we getting out of the search if (bestDiscord.getNNDistance() == 0.0D || bestDiscord.getPosition() == -1) { LOGGER.debug("breaking the outer search loop, discords found: {} last seen discord: {}" + discords.getSize(), bestDiscord); break; } bestDiscord.setInfo( "position " + bestDiscord.getPosition() + ", NN distance " + bestDiscord.getNNDistance() + ", elapsed time: " + SAXProcessor.timeToString(start.getTime(), end.getTime()) + ", " + bestDiscord.getInfo()); LOGGER.debug("{}", bestDiscord.getInfo()); // collect the result // discords.add(bestDiscord); // and maintain data structures // marker.markVisited(globalTrackVisitRegistry, bestDiscord.getPosition(), windowSize); discordCounter++; } // done deal // return discords; } /** * Finds the best discord. BRUTE FORCE algorithm. * * @param series the data. * @param windowSize the SAX sliding window size. * @param globalRegistry the visit registry to use. * @param nThreshold the z-Normalization threshold value. * @return the best discord with respect to registry. * @throws Exception if error occurs. */ public static DiscordRecord findBestDiscordBruteForce(double[] series, Integer windowSize, VisitRegistry globalRegistry, double nThreshold) throws Exception { Date start = new Date(); long distanceCallsCounter = 0; double bestSoFarDistance = -1.0; int bestSoFarPosition = -1; VisitRegistry outerRegistry = globalRegistry.clone(); int outerIdx = -1; while (-1 != (outerIdx = outerRegistry.getNextRandomUnvisitedPosition())) { // outer loop outerRegistry.markVisited(outerIdx); // check the global visits registry if (globalRegistry.isVisited(outerIdx)) { continue; } double[] candidateSeq = tsProcessor .znorm(tsProcessor.subseriesByCopy(series, outerIdx, outerIdx + windowSize), nThreshold); double nearestNeighborDistance = Double.MAX_VALUE; VisitRegistry innerRegistry = new VisitRegistry(series.length - windowSize); int innerIdx; while (-1 != (innerIdx = innerRegistry.getNextRandomUnvisitedPosition())) { // inner loop innerRegistry.markVisited(innerIdx); if (Math.abs(outerIdx - innerIdx) > windowSize) { // > means they shall not overlap even // over a single point double[] currentSubsequence = tsProcessor.znorm( tsProcessor.subseriesByCopy(series, innerIdx, innerIdx + windowSize), nThreshold); double dist = ed.earlyAbandonedDistance(candidateSeq, currentSubsequence, nearestNeighborDistance); distanceCallsCounter++; if ((!Double.isNaN(dist)) && dist < nearestNeighborDistance) { nearestNeighborDistance = dist; } } } if (!(Double.isInfinite(nearestNeighborDistance)) && nearestNeighborDistance > bestSoFarDistance) { bestSoFarDistance = nearestNeighborDistance; bestSoFarPosition = outerIdx; LOGGER.trace("discord updated: pos {}, dist {}", bestSoFarPosition, bestSoFarDistance); } } Date firstDiscord = new Date(); LOGGER.debug("best discord found at {}, best distance: {}, in {} distance calls: {}", bestSoFarPosition, bestSoFarDistance, SAXProcessor.timeToString(start.getTime(), firstDiscord.getTime()), distanceCallsCounter); DiscordRecord res = new DiscordRecord(bestSoFarPosition, bestSoFarDistance); res.setLength(windowSize); res.setInfo("distance calls: " + distanceCallsCounter); return res; } }