/* * RapidMiner * * Copyright (C) 2001-2011 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.learner.associations.gsp; import java.util.Iterator; import java.util.LinkedList; import java.util.ListIterator; /** * Additionally to the Sequence methods, this one stores information for fast access on items within time constraints. * * @author Sebastian Land * */ public class DataSequence extends Sequence { private static final long serialVersionUID = -7914523040469057204L; private double[][] times; public DataSequence(int numberOfItems) { super(); times = new double[numberOfItems][]; } public void buildAccessStructure() { int itemCount[] = new int[times.length]; Iterator<Item> itemIterator = itemIterator(); while (itemIterator.hasNext()) { int itemIndex = itemIterator.next().getIndex(); itemCount[itemIndex]++; } for (int i = 0; i < times.length; i++) { times[i] = new double[itemCount[i]]; itemCount[i] = 0; // deleting for further use } for (Transaction transaction : this) { double time = transaction.getTime(); for (Item item : transaction) { int itemIndex = item.getIndex(); times[itemIndex][itemCount[itemIndex]++] = time; } } } private TransactionSet findTransaction(Transaction findWhat, double t, CountingInformations countingInformations) { TransactionSet result = new TransactionSet(); while (true) { for (Item item : findWhat) { double foundTime = firstOccurenceAfter(item, t); if (Double.isNaN(foundTime)) return null; // then no item later currentTime has been found result.addTimeOfTransaction(foundTime); } if (result.getEndTime() - result.getStartTime() > countingInformations.windowSize) { t = result.getEndTime() - countingInformations.windowSize; // we can use the end time here, because the // earliest occurrence of at least one item // is at this position. Since it must be // included. result.reset(); } else return result; } } private double firstOccurenceAfter(Item item, double currentTime) { int itemIndex = item.getIndex(); for (int i = 0; i < times[itemIndex].length; i++) { if (times[itemIndex][i] > currentTime) return times[itemIndex][i]; } return Double.NaN; } public static boolean containsSequence(DataSequence data, Sequence candidate, CountingInformations countingInformations) { ListIterator<Transaction> candidateIterator = candidate.listIterator(); LinkedList<TransactionSet> matches = new LinkedList<TransactionSet>(); ListIterator<TransactionSet> matchesIterator = matches.listIterator(); // loop until true or false double t = Double.NEGATIVE_INFINITY; while (true) { // forward step while (candidateIterator.hasNext()) { Transaction currentTransaction = candidateIterator.next(); TransactionSet currentSet = data.findTransaction(currentTransaction, t, countingInformations); if (currentSet != null) { double difference = currentSet.getEndTime() - t; if (matches.isEmpty() || difference < countingInformations.maxGap && difference > 0) { // matches is empty as indicator for first run! no previous to check matchesIterator.add(currentSet); t = currentSet.getEndTime(); } else { t = currentSet.getEndTime(); break; } } else { return false; // candidate not contained } } // checking if complete candidate is contained if (!candidateIterator.hasNext()) return true; // backward step candidateIterator.previous(); // going one back if (matchesIterator.hasPrevious()) matchesIterator.previous(); else return false; while (true) { Transaction currentTransaction = candidateIterator.previous(); TransactionSet currentSet = data.findTransaction(currentTransaction, t - countingInformations.maxGap, countingInformations); if (currentSet != null) { if (matchesIterator.hasPrevious()) { TransactionSet last = matchesIterator.previous(); double difference = currentSet.getStartTime() - last.getEndTime(); // difference can be negative // because of window! if (difference < countingInformations.maxGap && difference > countingInformations.minGap && difference > 0d || !candidateIterator.hasPrevious()) { t = currentSet.getEndTime(); matchesIterator.add(currentSet); break; } else { t = currentSet.getStartTime(); } } else { matchesIterator.add(currentSet); t = currentSet.getEndTime(); break; } } else { return false; // not contained } } // deleting all subsequent but deleted matches while (matchesIterator.hasNext()) { matchesIterator.next(); matchesIterator.remove(); } matchesIterator.previous(); // this one has found on backward: Next one should be following not itself candidateIterator.next(); } } }