/*
* Copyright (c) 2011-2015 EPFL DATA Laboratory
* Copyright (c) 2014-2015 The Squall Collaboration (see NOTICE)
*
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ch.epfl.data.squall.operators;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import ch.epfl.data.squall.expressions.Subtraction;
import org.apache.commons.lang.ArrayUtils;
import org.apache.log4j.Logger;
import ch.epfl.data.squall.expressions.Addition;
import ch.epfl.data.squall.expressions.ValueExpression;
import ch.epfl.data.squall.expressions.ValueSpecification;
import ch.epfl.data.squall.storage.AggregationStore;
import ch.epfl.data.squall.storage.BasicStore;
import ch.epfl.data.squall.storage.WindowAggregationStore;
import ch.epfl.data.squall.types.NumericType;
import ch.epfl.data.squall.types.Type;
import ch.epfl.data.squall.utilities.MyUtilities;
import ch.epfl.data.squall.visitors.OperatorVisitor;
import ch.epfl.data.squall.window_semantics.WindowSemanticsManager;
public class AggregateSumOperator<T extends Number & Comparable<T>> extends OneToOneOperator implements
AggregateOperator<T>, AggregateStream {
private static final long serialVersionUID = 1L;
private static Logger LOG = Logger.getLogger(AggregateSumOperator.class);
// the GroupBy type
private static final int GB_UNSET = -1;
private static final int GB_COLUMNS = 0;
private static final int GB_PROJECTION = 1;
private DistinctOperator _distinct;
private int _groupByType = GB_UNSET;
private List<Integer> _groupByColumns = new ArrayList<Integer>();
private ProjectOperator _groupByProjection;
private int _numTuplesProcessed = 0;
private final NumericType _wrapper;
private final ValueExpression<T> _ve;
private BasicStore<T> _storage;
private final Map _map;
private int _windowRangeSecs = -1;
private int _slideRangeSecs = -1;
public AggregateSumOperator(ValueExpression<T> ve, Map map) {
_wrapper = (NumericType) ve.getType();
_ve = ve;
_map = map;
_storage = new AggregationStore<T>(this, _wrapper, _map, true);
}
@Override
public void accept(OperatorVisitor ov) {
ov.visit(this);
}
private boolean alreadySetOther(int GB_COLUMNS) {
return (_groupByType != GB_COLUMNS && _groupByType != GB_UNSET);
}
@Override
public void clearStorage() {
_storage.reset();
}
// for this method it is essential that HASH_DELIMITER, which is used in
// tupleToString method,
// is the same as DIP_GLOBAL_ADD_DELIMITER
@Override
public List<String> getContent() {
final String str = _storage.getContent();
return str == null ? null : Arrays.asList(str.split("\\r?\\n"));
}
@Override
public DistinctOperator getDistinct() {
return _distinct;
}
@Override
public List<ValueExpression> getExpressions() {
final List<ValueExpression> result = new ArrayList<ValueExpression>();
result.add(_ve);
return result;
}
@Override
public List<Integer> getGroupByColumns() {
return _groupByColumns;
}
@Override
public ProjectOperator getGroupByProjection() {
return _groupByProjection;
}
private String getGroupByStr() {
final StringBuilder sb = new StringBuilder();
sb.append("(");
for (int i = 0; i < _groupByColumns.size(); i++) {
sb.append(_groupByColumns.get(i));
if (i == _groupByColumns.size() - 1)
sb.append(")");
else
sb.append(", ");
}
return sb.toString();
}
@Override
public int getNumTuplesProcessed() {
return _numTuplesProcessed;
}
@Override
public BasicStore getStorage() {
return _storage;
}
@Override
public Type getType() {
return _wrapper;
}
@Override
public boolean hasGroupBy() {
return _groupByType != GB_UNSET;
}
@Override
public boolean isBlocking() {
return true;
}
@Override
public String printContent() {
return _storage.getContent();
}
// from Operator
@Override
public List<String> processOne(List<String> tuple, long lineageTimestamp) {
_numTuplesProcessed++;
if (_distinct != null) {
tuple = _distinct.processOne(tuple, lineageTimestamp);
if (tuple == null)
return null;
}
String tupleHash;
if (_groupByType == GB_PROJECTION)
tupleHash = MyUtilities.createHashString(tuple, _groupByColumns,
_groupByProjection.getExpressions(), _map);
else
tupleHash = MyUtilities.createHashString(tuple, _groupByColumns,
_map);
final T value = _storage.update(tuple, tupleHash, lineageTimestamp);
final String strValue = _wrapper.toString(value);
// propagate further the affected tupleHash-tupleValue pair
final List<String> affectedTuple = new ArrayList<String>();
affectedTuple.add(tupleHash);
affectedTuple.add(strValue);
return affectedTuple;
}
@Override
public List<List<String>> updateStream(List<String> tuple, boolean withMultiplicity) {
_numTuplesProcessed++;
if (_distinct != null) {
tuple = _distinct.processOne(tuple, 0);
if (tuple == null)
return null;
}
String tupleHash;
if (_groupByType == GB_PROJECTION)
tupleHash = MyUtilities.createHashString(tuple, _groupByColumns,
_groupByProjection.getExpressions(), _map);
else
tupleHash = MyUtilities.createHashString(tuple, _groupByColumns,
_map);
// might consider to split the tupleHash based on delimiter instead as this does not take into account GB_PROJECTION
final List<String> tupleKey = new ArrayList<>(_groupByColumns.size());
for (int i = 0; i < _groupByColumns.size(); i++)
tupleKey.add(tuple.get(_groupByColumns.get(i)));
// get old value
T oldValue = null;
List<T> currentValues = _storage.access(tupleHash);
if (currentValues != null && currentValues.size() > 0) {
oldValue = currentValues.get(0);
}
// new value after process
final T newValue = _storage.update(tuple, tupleHash, 0);
if (withMultiplicity) {
List<List<String>> output = new ArrayList<List<String>>();
if (oldValue != null) {
List<String> oldTuple = createUpdateTuple("-1", tupleKey, _wrapper.toString(oldValue));
output.add(oldTuple);
}
List<String> newTuple = createUpdateTuple("1", tupleKey, _wrapper.toString(newValue));
output.add(newTuple);
return output;
} else {
Subtraction<T> s = new Subtraction<T>(new ValueSpecification<T>(_wrapper, newValue),
new ValueSpecification<T>(_wrapper, oldValue));
T delta = s.eval(null);
return Arrays.asList(createUpdateTuple(null, tupleKey, _wrapper.toString(delta)));
}
}
private List<String> createUpdateTuple(String multiplicity, List<String> tupleKey, String tupleValue) {
List<String> updateTuple = new ArrayList<String>();
updateTuple.addAll(tupleKey);
updateTuple.add(tupleValue);
if (multiplicity != null)
updateTuple.add(multiplicity);
return updateTuple;
}
// actual operator implementation
@Override
public T runAggregateFunction(T value, List<String> tuple) {
final ValueExpression<T> base = new ValueSpecification<T>(_wrapper,
value);
final Addition<T> result = new Addition<T>(base, _ve);
return result.eval(tuple);
}
@Override
public T runAggregateFunction(T value1, T value2) {
final ValueExpression<T> ve1 = new ValueSpecification<T>(_wrapper,
value1);
final ValueExpression<T> ve2 = new ValueSpecification<T>(_wrapper,
value2);
final Addition<T> result = new Addition<T>(ve1, ve2);
return result.eval(null);
}
@Override
public AggregateSumOperator setDistinct(DistinctOperator distinct) {
_distinct = distinct;
return this;
}
@Override
public AggregateSumOperator<T> setGroupByColumns(int... hashIndexes) {
return setGroupByColumns(Arrays
.asList(ArrayUtils.toObject(hashIndexes)));
}
// from AgregateOperator
@Override
public AggregateSumOperator<T> setGroupByColumns(
List<Integer> groupByColumns) {
if (!alreadySetOther(GB_COLUMNS)) {
_groupByType = GB_COLUMNS;
_groupByColumns = groupByColumns;
_storage.setSingleEntry(false);
return this;
} else
throw new RuntimeException("Aggragation already has groupBy set!");
}
@Override
public AggregateSumOperator setGroupByProjection(
ProjectOperator groupByProjection) {
if (!alreadySetOther(GB_PROJECTION)) {
_groupByType = GB_PROJECTION;
_groupByProjection = groupByProjection;
_storage.setSingleEntry(false);
return this;
} else
throw new RuntimeException("Aggragation already has groupBy set!");
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder();
sb.append("AggregateSumOperator with VE: ");
sb.append(_ve.toString());
if (_groupByColumns.isEmpty() && _groupByProjection == null)
sb.append("\n No groupBy!");
else if (!_groupByColumns.isEmpty())
sb.append("\n GroupByColumns are ").append(getGroupByStr())
.append(".");
else if (_groupByProjection != null)
sb.append("\n GroupByProjection is ")
.append(_groupByProjection.toString()).append(".");
if (_distinct != null)
sb.append("\n It also has distinct ").append(_distinct.toString());
return sb.toString();
}
@Override
public AggregateOperator<T> SetWindowSemantics(int windowRangeInSeconds,
int windowSlideInSeconds) {
WindowSemanticsManager._IS_WINDOW_SEMANTICS = true;
_windowRangeSecs = windowRangeInSeconds;
_slideRangeSecs = windowSlideInSeconds;
_storage = new WindowAggregationStore<>(this, _wrapper, _map, true,
_windowRangeSecs, _slideRangeSecs);
if (_groupByColumns != null || _groupByProjection != null)
_storage.setSingleEntry(false);
return this;
}
@Override
public AggregateOperator<T> SetWindowSemantics(int windowRangeInSeconds) {
return SetWindowSemantics(windowRangeInSeconds, windowRangeInSeconds);
}
@Override
public int[] getWindowSemanticsInfo() {
int[] res = new int[2];
res[0] = _windowRangeSecs;
res[1] = _slideRangeSecs;
return res;
}
}