AggregateSumOperator.java example

Explorer
squall-master
/*
 * Copyright (c) 2011-2015 EPFL DATA Laboratory
 * Copyright (c) 2014-2015 The Squall Collaboration (see NOTICE)
 *
 * All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package ch.epfl.data.squall.operators;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;

import ch.epfl.data.squall.expressions.Subtraction;
import org.apache.commons.lang.ArrayUtils;
import org.apache.log4j.Logger;

import ch.epfl.data.squall.expressions.Addition;
import ch.epfl.data.squall.expressions.ValueExpression;
import ch.epfl.data.squall.expressions.ValueSpecification;
import ch.epfl.data.squall.storage.AggregationStore;
import ch.epfl.data.squall.storage.BasicStore;
import ch.epfl.data.squall.storage.WindowAggregationStore;
import ch.epfl.data.squall.types.NumericType;
import ch.epfl.data.squall.types.Type;
import ch.epfl.data.squall.utilities.MyUtilities;
import ch.epfl.data.squall.visitors.OperatorVisitor;
import ch.epfl.data.squall.window_semantics.WindowSemanticsManager;

public class AggregateSumOperator<T extends Number & Comparable<T>> extends OneToOneOperator implements
	AggregateOperator<T>, AggregateStream {
    private static final long serialVersionUID = 1L;
    private static Logger LOG = Logger.getLogger(AggregateSumOperator.class);

    // the GroupBy type
    private static final int GB_UNSET = -1;
    private static final int GB_COLUMNS = 0;
    private static final int GB_PROJECTION = 1;

    private DistinctOperator _distinct;
    private int _groupByType = GB_UNSET;
    private List<Integer> _groupByColumns = new ArrayList<Integer>();
    private ProjectOperator _groupByProjection;
    private int _numTuplesProcessed = 0;

    private final NumericType _wrapper;
    private final ValueExpression<T> _ve;
    private BasicStore<T> _storage;

    private final Map _map;

    private int _windowRangeSecs = -1;
    private int _slideRangeSecs = -1;

    public AggregateSumOperator(ValueExpression<T> ve, Map map) {
	_wrapper = (NumericType) ve.getType();
	_ve = ve;
	_map = map;
	_storage = new AggregationStore<T>(this, _wrapper, _map, true);
    }

    @Override
    public void accept(OperatorVisitor ov) {
	ov.visit(this);
    }

    private boolean alreadySetOther(int GB_COLUMNS) {
	return (_groupByType != GB_COLUMNS && _groupByType != GB_UNSET);
    }

    @Override
    public void clearStorage() {
	_storage.reset();
    }

    // for this method it is essential that HASH_DELIMITER, which is used in
    // tupleToString method,
    // is the same as DIP_GLOBAL_ADD_DELIMITER
    @Override
    public List<String> getContent() {
	final String str = _storage.getContent();
	return str == null ? null : Arrays.asList(str.split("\\r?\\n"));
    }

    @Override
    public DistinctOperator getDistinct() {
	return _distinct;
    }

    @Override
    public List<ValueExpression> getExpressions() {
	final List<ValueExpression> result = new ArrayList<ValueExpression>();
	result.add(_ve);
	return result;
    }

    @Override
    public List<Integer> getGroupByColumns() {
	return _groupByColumns;
    }

    @Override
    public ProjectOperator getGroupByProjection() {
	return _groupByProjection;
    }

    private String getGroupByStr() {
	final StringBuilder sb = new StringBuilder();
	sb.append("(");
	for (int i = 0; i < _groupByColumns.size(); i++) {
	    sb.append(_groupByColumns.get(i));
	    if (i == _groupByColumns.size() - 1)
		sb.append(")");
	    else
		sb.append(", ");
	}
	return sb.toString();
    }

    @Override
    public int getNumTuplesProcessed() {
	return _numTuplesProcessed;
    }

    @Override
    public BasicStore getStorage() {
	return _storage;
    }

    @Override
    public Type getType() {
	return _wrapper;
    }

    @Override
    public boolean hasGroupBy() {
	return _groupByType != GB_UNSET;
    }

    @Override
    public boolean isBlocking() {
	return true;
    }

    @Override
    public String printContent() {
	return _storage.getContent();
    }

    // from Operator
    @Override
    public List<String> processOne(List<String> tuple, long lineageTimestamp) {
	_numTuplesProcessed++;
	if (_distinct != null) {
	    tuple = _distinct.processOne(tuple, lineageTimestamp);
	    if (tuple == null)
		return null;
	}
	String tupleHash;
	if (_groupByType == GB_PROJECTION)
	    tupleHash = MyUtilities.createHashString(tuple, _groupByColumns,
		    _groupByProjection.getExpressions(), _map);
	else
	    tupleHash = MyUtilities.createHashString(tuple, _groupByColumns,
		    _map);
	final T value = _storage.update(tuple, tupleHash, lineageTimestamp);
	final String strValue = _wrapper.toString(value);

	// propagate further the affected tupleHash-tupleValue pair
	final List<String> affectedTuple = new ArrayList<String>();
	affectedTuple.add(tupleHash);
	affectedTuple.add(strValue);

	return affectedTuple;
    }

    @Override
    public List<List<String>> updateStream(List<String> tuple, boolean withMultiplicity) {
        _numTuplesProcessed++;
        if (_distinct != null) {
            tuple = _distinct.processOne(tuple, 0);
            if (tuple == null)
                return null;
        }
        String tupleHash;
        if (_groupByType == GB_PROJECTION)
            tupleHash = MyUtilities.createHashString(tuple, _groupByColumns,
                    _groupByProjection.getExpressions(), _map);
        else
            tupleHash = MyUtilities.createHashString(tuple, _groupByColumns,
                    _map);

        // might consider to split the tupleHash based on delimiter instead as this does not take into account GB_PROJECTION
        final List<String> tupleKey = new ArrayList<>(_groupByColumns.size());
        for (int i = 0; i < _groupByColumns.size(); i++)
            tupleKey.add(tuple.get(_groupByColumns.get(i)));

        // get old value
        T oldValue = null;
        List<T> currentValues = _storage.access(tupleHash);
        if (currentValues != null && currentValues.size() > 0) {
            oldValue = currentValues.get(0);
        }

        // new value after process
        final T newValue = _storage.update(tuple, tupleHash, 0);

        if (withMultiplicity) {
            List<List<String>> output = new ArrayList<List<String>>();
            if (oldValue != null) {
                List<String> oldTuple = createUpdateTuple("-1", tupleKey, _wrapper.toString(oldValue));
                output.add(oldTuple);
            }
            List<String> newTuple = createUpdateTuple("1", tupleKey, _wrapper.toString(newValue));
            output.add(newTuple);
            return output;

        } else {
            Subtraction<T> s = new Subtraction<T>(new ValueSpecification<T>(_wrapper, newValue),
                                                    new ValueSpecification<T>(_wrapper, oldValue));
            T delta = s.eval(null);
            return Arrays.asList(createUpdateTuple(null, tupleKey, _wrapper.toString(delta)));
        }

    }

    private List<String> createUpdateTuple(String multiplicity, List<String> tupleKey, String tupleValue) {
        List<String> updateTuple = new ArrayList<String>();

        updateTuple.addAll(tupleKey);
        updateTuple.add(tupleValue);

        if (multiplicity != null)
            updateTuple.add(multiplicity);
        return updateTuple;
    }

    // actual operator implementation
    @Override
    public T runAggregateFunction(T value, List<String> tuple) {
	final ValueExpression<T> base = new ValueSpecification<T>(_wrapper,
		value);
	final Addition<T> result = new Addition<T>(base, _ve);
	return result.eval(tuple);
    }

    @Override
    public T runAggregateFunction(T value1, T value2) {
	final ValueExpression<T> ve1 = new ValueSpecification<T>(_wrapper,
		value1);
	final ValueExpression<T> ve2 = new ValueSpecification<T>(_wrapper,
		value2);
	final Addition<T> result = new Addition<T>(ve1, ve2);
	return result.eval(null);
    }

    @Override
    public AggregateSumOperator setDistinct(DistinctOperator distinct) {
	_distinct = distinct;
	return this;
    }

    @Override
    public AggregateSumOperator<T> setGroupByColumns(int... hashIndexes) {
	return setGroupByColumns(Arrays
		.asList(ArrayUtils.toObject(hashIndexes)));
    }

    // from AgregateOperator
    @Override
    public AggregateSumOperator<T> setGroupByColumns(
	    List<Integer> groupByColumns) {
	if (!alreadySetOther(GB_COLUMNS)) {
	    _groupByType = GB_COLUMNS;
	    _groupByColumns = groupByColumns;
	    _storage.setSingleEntry(false);
	    return this;
	} else
	    throw new RuntimeException("Aggragation already has groupBy set!");
    }

    @Override
    public AggregateSumOperator setGroupByProjection(
	    ProjectOperator groupByProjection) {
	if (!alreadySetOther(GB_PROJECTION)) {
	    _groupByType = GB_PROJECTION;
	    _groupByProjection = groupByProjection;
	    _storage.setSingleEntry(false);
	    return this;
	} else
	    throw new RuntimeException("Aggragation already has groupBy set!");
    }

    @Override
    public String toString() {
	final StringBuilder sb = new StringBuilder();
	sb.append("AggregateSumOperator with VE: ");
	sb.append(_ve.toString());
	if (_groupByColumns.isEmpty() && _groupByProjection == null)
	    sb.append("\n  No groupBy!");
	else if (!_groupByColumns.isEmpty())
	    sb.append("\n  GroupByColumns are ").append(getGroupByStr())
		    .append(".");
	else if (_groupByProjection != null)
	    sb.append("\n  GroupByProjection is ")
		    .append(_groupByProjection.toString()).append(".");
	if (_distinct != null)
	    sb.append("\n  It also has distinct ").append(_distinct.toString());
	return sb.toString();
    }

    @Override
    public AggregateOperator<T> SetWindowSemantics(int windowRangeInSeconds,
	    int windowSlideInSeconds) {
	WindowSemanticsManager._IS_WINDOW_SEMANTICS = true;
	_windowRangeSecs = windowRangeInSeconds;
	_slideRangeSecs = windowSlideInSeconds;
	_storage = new WindowAggregationStore<>(this, _wrapper, _map, true,
		_windowRangeSecs, _slideRangeSecs);
	if (_groupByColumns != null || _groupByProjection != null)
	    _storage.setSingleEntry(false);
	return this;
    }

    @Override
    public AggregateOperator<T> SetWindowSemantics(int windowRangeInSeconds) {
	return SetWindowSemantics(windowRangeInSeconds, windowRangeInSeconds);
    }

    @Override
    public int[] getWindowSemanticsInfo() {
	int[] res = new int[2];
	res[0] = _windowRangeSecs;
	res[1] = _slideRangeSecs;
	return res;
    }

}