package edu.hawaii.jmotif.sax.datastructures; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.Iterator; import java.util.List; /** * The SAX data structure. Implements optimized storage for the SAX data. * * @author Pavel Senin. * */ public class SAXFrequencyData implements Iterable<SAXFrequencyEntry> { private final HashMap<String, SAXFrequencyEntry> data; private HashMap<Integer, String> positionsAndWords; private ArrayList<Integer> allIndices; /** * Constructor. */ public SAXFrequencyData() { super(); this.data = new HashMap<String, SAXFrequencyEntry>(); } /** * Put the substring with it's index into the storage. * * @param substring The substring value. * @param idx The substring entry index. */ public void put(String substring, int idx) { SAXFrequencyEntry sfe = this.data.get(substring); if (null == sfe) { this.data.put(substring, new SAXFrequencyEntry(substring, idx)); } else { sfe.put(idx); } } /** * Get the internal hash size. * * @return The number of substrings in the data structure. */ public Integer size() { return this.data.size(); } /** * Check if the data includes substring. * * @param substring The query substring. * @return TRUE is contains, FALSE if not. */ public boolean contains(String substring) { return this.data.containsKey(substring); } /** * Get the entry information. * * @param substring The key get entry for. * @return The entry containing the substring occurence frequency information. */ public SAXFrequencyEntry get(String substring) { return this.data.get(substring); } /** * Get the set of sorted by the occurence frequencies. * * @return The set of sorted by the occurence frequencies. */ public List<SAXFrequencyEntry> getSortedFrequencies() { List<SAXFrequencyEntry> l = new ArrayList<SAXFrequencyEntry>(); l.addAll(this.data.values()); Collections.sort(l); return l; } /** * {@inheritDoc} */ @Override public Iterator<SAXFrequencyEntry> iterator() { return this.data.values().iterator(); } /** * Get all SAX subsequences as one string separated by a specified string. * * @param separator The separator. * @return SAX all SAX words as a string. */ public String getSAXString(String separator) { // hash mapping, position -> word // this.positionsAndWords = new HashMap<Integer, String>(); // all timeseries indexes where word is mapped to // allIndices = new ArrayList<Integer>(); // iterate over all the frequency entries filling up above data structures // Iterator<SAXFrequencyEntry> freqIterator = iterator(); while (freqIterator.hasNext()) { SAXFrequencyEntry freqEntry = freqIterator.next(); ArrayList<Integer> entryOccurrences = freqEntry.getEntries(); // save words for (int index : entryOccurrences) { positionsAndWords.put(index, freqEntry.getSubstring()); } // save indexes this.allIndices.addAll(entryOccurrences); } // sort by the position // Collections.sort(allIndices, new Comparator<Integer>() { public int compare(Integer int1, Integer int2) { return Integer.valueOf(int1).compareTo(Integer.valueOf(int2)); } }); // make a string // StringBuilder sb = new StringBuilder(); for (int index : allIndices) { sb.append(positionsAndWords.get(index)); sb.append(separator); } return sb.toString(); } public HashMap<Integer, String> getPositionsAndWords() { return positionsAndWords; } public ArrayList<Integer> getAllIndices() { return allIndices; } }