InputShallowCoarsener.java example

Explorer
squall-master
/*
 * Copyright (c) 2011-2015 EPFL DATA Laboratory
 * Copyright (c) 2014-2015 The Squall Collaboration (see NOTICE)
 *
 * All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package ch.epfl.data.squall.ewh.algorithms;

import org.apache.log4j.Logger;

import ch.epfl.data.squall.ewh.data_structures.JoinMatrix;

/* If we receive input tuples:
 *     * Postponed algorithm:
 *         ** Sort them
 *         ** Come up with p delimiters on rows and columns
 *         ** Go over candidate cells, and put each output tuple in the right bucket
 *              *** O(n^2) complexity
 *              *** can be made cheaper, but no need: it's not on the critical path
 *     * Keeping everything sorted (not on critical path):
 *         ** Advantage: 
 *              *** No need to do sort
 *         ** Disadvantages:     
 *              *** Data structure is complicated and replicated:
 *                     **** It is more complicated than two Btrees, as we need to keep outputs as well
 *                     **** Rows: Sorted list (rows) of sorted lists (outputs)
 *                     **** Columns: Sorted list (columns) of sorted lists (outputs)
 *              *** Complexity: O(nlogn) to insert, and O(nlogn) to join with
 *         ** O(n + m) time to count elements in all bucket; m is the number of output tuples 
 * 
 *         
 * If we receive output tuples:
 *     * Postponed algorithm:
 *         ** Take input tuples of the output tuple, and do the same as in "If we receive input tuples:"
 *     * Keeping everything sorted (not on critical path):
 *         ** The same as "If we receive input tuples:", except that we do not compute the output tuple
 *         
 */
public class InputShallowCoarsener extends ShallowCoarsener {
    private static Logger LOG = Logger.getLogger(InputShallowCoarsener.class);

    private int _originalXSize;
    private int _originalYSize;

    private int _numXBuckets, _numYBuckets;
    private int _bucketXSize, _bucketYSize; // last bucket is slightly bigger

    public InputShallowCoarsener(int numXBuckets, int numYBuckets) {
	_numXBuckets = numXBuckets;
	_numYBuckets = numYBuckets;
    }

    // has to be invoked before all other methods
    @Override
    public void setOriginalMatrix(JoinMatrix originalMatrix, StringBuilder sb) {
	_originalMatrix = originalMatrix;
	_originalXSize = originalMatrix.getXSize();
	_originalYSize = originalMatrix.getYSize();

	// compute bucket sizes: last bucket is slightly bigger
	_bucketXSize = _originalXSize / _numXBuckets;
	_bucketYSize = _originalYSize / _numYBuckets;

	// corner case: more buckets than elements in the original matrix
	if (_bucketXSize == 0) {
	    _numXBuckets = _originalXSize;
	    _bucketXSize = 1;
	    sb.append("\nWARNING: Bucket size X reduced to the number of rows ")
		    .append(_originalXSize).append("\n");
	}
	if (_bucketYSize == 0) {
	    _numYBuckets = _originalYSize;
	    _bucketYSize = 1;
	    sb.append(
		    "\nWARNING: Bucket size Y reduced to the number of columns ")
		    .append(_originalYSize).append("\n");
	}
	sb.append("\nFor InputCoarsener, building the rounded matrix is instantenous!\n");
    }

    @Override
    public WeightPrecomputation getPrecomputation() {
	// this coarsener does not build precomputation
	return null;
    }

    @Override
    public int getNumXCoarsenedPoints() {
	return _numXBuckets;
    }

    @Override
    public int getNumYCoarsenedPoints() {
	return _numYBuckets;
    }

    @Override
    public int getOriginalXCoordinate(int cx, boolean isHigher) {
	// this point is included; we take end of this point as the boundary
	// this point is out of maths, as the last bucket can be larger
	if (cx == _numXBuckets - 1) {
	    if (isHigher) {
		return _originalXSize - 1;
	    }
	}

	// non-last bucket
	if (isHigher) {
	    // the end of this bucket is the boundary
	    cx++;
	}
	int x = cx * _bucketXSize;
	if (isHigher) {
	    // the end of this bucket is the boundary
	    x--;
	}

	return x;
    }

    @Override
    public int getOriginalYCoordinate(int cy, boolean isHigher) {
	// this point is included; we take end of this point as the boundary
	// this point is out of maths, as the last bucket can be larger
	if (cy == _numYBuckets - 1) {
	    if (isHigher) {
		return _originalYSize - 1;
	    }
	}

	// non-last bucket
	if (isHigher) {
	    // the end of this bucket is the boundary
	    cy++;
	}
	int y = cy * _bucketYSize;
	if (isHigher) {
	    // the end of this bucket is the boundary
	    y--;
	}

	return y;
    }

    @Override
    public int getCoarsenedXCoordinate(int x) {
	// this point is included; we take end of this point as the boundary
	// this point is out of maths, as the last bucket can be larger
	if (x > (_numXBuckets - 1) * _bucketXSize) {
	    return _numXBuckets - 1;
	} else {
	    return x / _bucketXSize;
	}
    }

    @Override
    public int getCoarsenedYCoordinate(int y) {
	// this point is included; we take end of this point as the boundary
	// this point is out of maths, as the last bucket can be larger
	if (y > (_numYBuckets - 1) * _bucketYSize) {
	    return _numYBuckets - 1;
	} else {
	    return y / _bucketYSize;
	}
    }

    @Override
    public String toString() {
	return "InputBinaryCoarsener [numXBuckets = " + _numXBuckets
		+ ", numYBuckets = " + _numYBuckets + "]";
    }
}