/* * Copyright (c) 2011-2015 EPFL DATA Laboratory * Copyright (c) 2014-2015 The Squall Collaboration (see NOTICE) * * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ch.epfl.data.squall.ewh.data_structures; import java.io.File; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import org.apache.log4j.Logger; import org.ujmp.core.Matrix; import org.ujmp.core.enums.FileFormat; import org.ujmp.core.matrix.AbstractMatrix; import ch.epfl.data.squall.ewh.visualize.VisualizerInterface; import ch.epfl.data.squall.predicates.ComparisonPredicate; import ch.epfl.data.squall.types.NumericType; import ch.epfl.data.squall.utilities.MyUtilities; // Join Attribute Type public abstract class JoinMatrix<JAT extends Comparable<JAT>> implements SimpleMatrix { private static Logger LOG = Logger.getLogger(JoinMatrix.class); // matrix could be Boolean, but we will later on add region boundaries in // other colors protected AbstractMatrix _ujmpMatrix; protected List<Region> _regions; protected List<JAT> _joinAttributeX = new ArrayList<JAT>(); protected List<JAT> _joinAttributeY = new ArrayList<JAT>(); protected ComparisonPredicate _cp; // for finding candidate regions private int _totalNumOutputs = -1; // we did not want to add overhead to // setElement method; // that's why a user needs to set it // (EWHSampleMatrixBolt.scaleOutput) protected Map<JAT, Integer> _freqX, _freqY; // key, frequency protected Map<JAT, Integer> _keyXFirstPos, _keyYFirstPos; // key, // firstPosition protected NumericType _wrapper; protected String _matrixName, _matrixPath; public ComparisonPredicate getComparisonPredicate() { return _cp; } public NumericType getWrapper() { return _wrapper; } public Iterator<long[]> getNonEmptyCoordinatesIterator() { return _ujmpMatrix.availableCoordinates().iterator(); /* * Alternatives: _ujmpMatrix.availableCoordinates * _ujmpMatrix.nonZeroCoordinates _ujmpMatrix.allValues */ } public void setJoinAttributeX(JAT key) { _joinAttributeX.add(key); } public void setJoinAttributeY(JAT key) { _joinAttributeY.add(key); } public JAT getJoinAttributeX(int position) { return _joinAttributeX.get(position); } public JAT getJoinAttributeY(int position) { return _joinAttributeY.get(position); } public void precomputeFrequencies() { // for x _freqX = new HashMap<JAT, Integer>(); _keyXFirstPos = new HashMap<JAT, Integer>(); for (int i = 0; i < getXSize(); i++) { addElement(_freqX, _keyXFirstPos, getJoinAttributeX(i), i); } // System.out.println("FreqX = " + _freqX); // for y _freqY = new HashMap<JAT, Integer>(); _keyYFirstPos = new HashMap<JAT, Integer>(); for (int j = 0; j < getYSize(); j++) { addElement(_freqY, _keyYFirstPos, getJoinAttributeY(j), j); } // System.out.println("FreqY = " + _freqY); } private void addElement(Map<JAT, Integer> freqList, Map<JAT, Integer> keyFirstPos, JAT joinAttribute, int position) { if (!freqList.containsKey(joinAttribute)) { freqList.put(joinAttribute, 1); keyFirstPos.put(joinAttribute, position); } else { int currentFreq = freqList.get(joinAttribute); int newFreq = currentFreq + 1; freqList.put(joinAttribute, newFreq); } } public int getNumXElements(JAT key) { if (_freqX == null) { throw new RuntimeException( "Method precomputeFrequencies() must be called before this method (getNumXElements)!"); } else { return _freqX.get(key); } } public int getNumYElements(JAT key) { if (_freqY == null) { throw new RuntimeException( "Method precomputeFrequencies() must be called before this method (getNumYElements)!"); } else { return _freqY.get(key); } } public int getXFirstKeyPosition(JAT key) { if (_keyXFirstPos == null) { throw new RuntimeException( "Method precomputeFrequencies() must be called before this method (getXFirstKeyPosition)!"); } else { return _keyXFirstPos.get(key); } } public int getYFirstKeyPosition(JAT key) { if (_keyYFirstPos == null) { throw new RuntimeException( "Method precomputeFrequencies() must be called before this method (getYFirstKeyPosition)!"); } else { return _keyYFirstPos.get(key); } } // we are always asking for frequency on one on the boundaries of the region public FrequencyPosition getXFreqPos(JAT key, Region region) { int lowerPos = region.get_x1(); int upperPos = region.get_x2(); int freq = 0; int smallestKeyPosition = lowerPos; // true when starting from lower // border // for lower border while ((lowerPos <= upperPos) && getJoinAttributeX(lowerPos).equals(key)) { freq++; lowerPos++; } // for upper border while ((lowerPos <= upperPos) && getJoinAttributeX(upperPos).equals(key)) { freq++; smallestKeyPosition = upperPos; upperPos--; } return new FrequencyPosition(freq, smallestKeyPosition); } // we are always asking for frequency on one on the boundaries of the region public FrequencyPosition getYFreqPos(JAT key, Region region) { int lowerPos = region.get_y1(); int upperPos = region.get_y2(); int freq = 0; int smallestKeyPosition = lowerPos; // true when starting from lower // border // for lower border while ((lowerPos <= upperPos) && getJoinAttributeY(lowerPos).equals(key)) { freq++; lowerPos++; } // for upper border while ((lowerPos <= upperPos) && getJoinAttributeY(upperPos).equals(key)) { freq++; smallestKeyPosition = upperPos; upperPos--; } return new FrequencyPosition(freq, smallestKeyPosition); } public List<Region> getRegions() { return _regions; } public void setRegions(List<Region> regions) { _regions = regions; } public void clearRegions() { _regions = null; } public void writeMatrixToFile() { try { String path = _matrixPath + "/" + _matrixName; _ujmpMatrix.exportToFile(FileFormat.SPARSECSV, new File(path)); } catch (Exception exc) { LOG.info(MyUtilities.getStackTrace(exc)); } // PLT format is the only thing we could use for saving graphs for the // papers } public Matrix getUJMPMatrix() { return _ujmpMatrix; } // The cost of WeighPrecomputation is O(n^2) // This is cheaper as regions do not cover the entire matrix (large portions // of zero-cells are not covered) // Cost of this is O(C * m), where C is due to the fact that non all // elements within regions are candidate // Alternatively (and more efficiently), we could go over all output cells // and assign them to the appropriate region (O(m)) // This is not measured in the algorithm execution time, and thus not // important public int getRegionNumOutputs(Region region) { int numOutputs = 0; for (int i = region.get_x1(); i <= region.get_x2(); i++) { for (int j = region.get_y1(); j <= region.get_y2(); j++) { numOutputs += getElement(i, j); } } return numOutputs; } public int getTotalNumOutputs() { // this will work when reading from a file; // it won't work when regions are set, but that does not happen for the // invocation of this method // old slow version: return (int) _ujmpMatrix.getValueSum(); return _totalNumOutputs; // I could alternatively change the setElement method, but I wanted to // avoid the overheads } public void setTotalNumOutput(int totalNumOutputs) { _totalNumOutputs = totalNumOutputs; } // for the sample matrix public int getNumCandidatesIterate(Map conf) { int result = 0; int firstCandInLastLine = 0; for (int i = 0; i < getXSize(); i++) { boolean isFirstInLine = true; int x1 = i; int x2 = i; for (int j = firstCandInLastLine; j < getYSize(); j++) { int y1 = j; int y2 = j; Region region = new Region(x1, y1, x2, y2); boolean isCandidate = MyUtilities.isCandidateRegion(this, region, _cp, conf); if (isCandidate) { result++; if (isFirstInLine) { firstCandInLastLine = j; isFirstInLine = false; } } if (!isFirstInLine && !isCandidate) { // I am right from the candidate are; the first // non-candidate guy means I should switch to the next row break; } } } return result; } // *********************************************************************** // from Matrix interface @Override public abstract long getCapacity(); @Override public long getNumElements() { // this will work when reading from a file; // it won't work when regions are set, but that does not happen for the // invocation of this method return _ujmpMatrix.getValueCount(); } @Override public int getXSize() { return (int) _ujmpMatrix.getRowCount(); } @Override public int getYSize() { return (int) _ujmpMatrix.getColumnCount(); } @Override public abstract int getElement(int x, int y); @Override public abstract void setElement(int value, int x, int y); @Override public abstract void increment(int x, int y); @Override public abstract void increase(int delta, int x, int y); // *********************************************************************** // abstract methods public abstract JoinMatrix<JAT> getDeepCopy(); public abstract void setMinPositiveValue(int x, int y); public abstract int getMinPositiveValue(); public abstract boolean isEmpty(int x, int y); public abstract void visualize(VisualizerInterface visualizer); public abstract Map getConfiguration(); }