/*
* Copyright (c) 2011-2015 EPFL DATA Laboratory
* Copyright (c) 2014-2015 The Squall Collaboration (see NOTICE)
*
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ch.epfl.data.squall.utilities;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Semaphore;
import org.apache.log4j.Logger;
//import frontend.functional.scala.operators.ScalaAggregateOperator;
import ch.epfl.data.squall.expressions.ColumnReference;
import ch.epfl.data.squall.operators.AggregateAvgOperator;
import ch.epfl.data.squall.operators.AggregateOperator;
import ch.epfl.data.squall.operators.AggregateSumOperator;
import ch.epfl.data.squall.operators.Operator;
import ch.epfl.data.squall.storage.AggregationStore;
import ch.epfl.data.squall.storage.BasicStore;
import ch.epfl.data.squall.storm_components.StormComponent;
import ch.epfl.data.squall.types.Type;
public class LocalMergeResults {
private static Logger LOG = Logger.getLogger(LocalMergeResults.class);
// for writing the full final result in Local Mode
private static int _collectedLastComponents = 0;
private static int _numTuplesProcessed = 0;
// the number of tuples the componentTask is reponsible for (!! not how many
// tuples are in storage!!)
private static AggregateOperator _computedAgg;
private static AggregateOperator _fileAgg;
private static Semaphore _semFullResult = new Semaphore(1, true);
private static Semaphore _semNumResults = new Semaphore(0, true);
public static void reset() {
_collectedLastComponents = 0;
_numTuplesProcessed = 0;
_computedAgg = null;
_fileAgg = null;
_semFullResult = new Semaphore(1, true);
_semNumResults = new Semaphore(0, true);
}
private static void addMoreResults(AggregateOperator lastAgg, Map map) {
if (_computedAgg == null) {
// first task of the last component asked to be added
// we create empty aggregations, which we later fill, one from
// tasks, other from a file
_computedAgg = createOverallAgg(lastAgg, map);
_fileAgg = (AggregateOperator) DeepCopy.copy(_computedAgg);
fillAggFromResultFile(map);
}
if (_computedAgg.getStorage() instanceof AggregationStore) {
AggregationStore stor = (AggregationStore) _computedAgg
.getStorage();
stor.addContent((AggregationStore) (lastAgg.getStorage()));
}
/*
* if (_computedAgg.getStorage() instanceof WindowAggregationStorage) {
* WindowAggregationStorage stor = (WindowAggregationStorage)
* _computedAgg .getStorage();
* stor.addContent((WindowAggregationStorage) (lastAgg.getStorage())); }
*/
}
private static AggregateOperator createOverallAgg(
AggregateOperator lastAgg, Map map) {
final Type wrapper = lastAgg.getType();
AggregateOperator overallAgg;
ColumnReference cr;
if (lastAgg.hasGroupBy())
cr = new ColumnReference(wrapper, 1);
else
cr = new ColumnReference(wrapper, 0);
int[] wsMetaData = lastAgg.getWindowSemanticsInfo();
if (lastAgg instanceof AggregateAvgOperator) {
overallAgg = new AggregateAvgOperator(cr, map);
if (wsMetaData[0] > 0)
overallAgg.SetWindowSemantics(wsMetaData[0], wsMetaData[1]);
}
/*
* else if(lastAgg instanceof ScalaAggregateOperator ){ overallAgg =
* ((ScalaAggregateOperator) lastAgg).getNewInstance(); }
*/
else {
overallAgg = new AggregateSumOperator(cr, map);
if (wsMetaData[0] > 0)
overallAgg.SetWindowSemantics(wsMetaData[0], wsMetaData[1]);
}
if (lastAgg.hasGroupBy())
overallAgg.setGroupByColumns(Arrays.asList(0));
return overallAgg;
}
private static void fillAggFromResultFile(Map map) {
try {
final String path = getResultFilePath(map);
final List<String> lines = MyUtilities.readFileLinesSkipEmpty(path);
for (final String line : lines) {
// List<String> tuple = Arrays.asList(line.split("\\s+=\\s+"));
// we want to catch exactly one space between and after =.
// tuple might consist of spaces as well
final List<String> tuple = Arrays.asList(line.split(" = "));
_fileAgg.process(tuple, -1);
}
} catch (final IOException ex) {
// problem with finding the result file
_fileAgg = null;
}
}
// getting size information - from path "../test/data/tpch/0.01G",
// it extracts dataSize = 0.01G
// For Squall (not in Squall Plan Runner) there is DIP_DB_SIZE,
// but this method has to be used for PlanRunner as well.
private static String getDataSizeInfo(Map map) {
final String path = SystemParameters.getString(map, "DIP_DATA_PATH");
return MyUtilities.getPartFromEnd(path, 0);
}
// this has to be a separate method, because we don't want Exception if
// DIP_RESULT_ROOT is not set
private static String getResultDir(Map map) {
String resultRoot = "";
if (SystemParameters.isExisting(map, "DIP_RESULT_ROOT"))
resultRoot = SystemParameters.getString(map, "DIP_RESULT_ROOT");
return resultRoot;
}
public static String getResultFilePath(Map map) {
final String rootDir = getResultDir(map);
final String schemaName = getSchemaName(map);
final String dataSize = getDataSizeInfo(map);
final String queryName = SystemParameters.getString(map,
"DIP_QUERY_NAME");
return rootDir + "/" + schemaName + "/" + dataSize + "/" + queryName
+ ".result";
}
/*
* from "../test/data/tpch/0.01G" as dataPath, return tpch
*/
private static String getSchemaName(Map map) {
final String path = SystemParameters.getString(map, "DIP_DATA_PATH");
return MyUtilities.getPartFromEnd(path, 1);
}
// The following 2 methods are crucial for collecting, printing and
// comparing the results in Local Mode
// called on the component task level, when all Spouts fully propagated
// their tuples
public static void localCollectFinalResult(Operator lastOperator,
int hierarchyPosition, Map map, Logger log) {
if ((!SystemParameters.getBoolean(map, "DIP_DISTRIBUTED"))
&& hierarchyPosition == StormComponent.FINAL_COMPONENT)
try {
// prepare it for printing at the end of the execution
_semFullResult.acquire();
_collectedLastComponents++;
_numTuplesProcessed += lastOperator.getNumTuplesProcessed();
if (lastOperator instanceof AggregateOperator) {
addMoreResults((AggregateOperator) lastOperator, map);
}
_semFullResult.release();
_semNumResults.release();
} catch (final InterruptedException ex) {
throw new RuntimeException(
"InterruptedException unexpectedly occured!");
}
}
private static int localCompare(Map map) {
if (_fileAgg == null) {
LOG.info("\nCannot validate the result, result file "
+ getResultFilePath(map) + " does not exist."
+ "\n Make sure you specified correct DIP_RESULT_ROOT and"
+ "\n created result file with correct name.");
return 1;
}
if (_computedAgg.getStorage().equals(_fileAgg.getStorage())) {
LOG.info("\nOK: Expected result achieved for "
+ SystemParameters.getString(map, "DIP_TOPOLOGY_NAME"));
return 0;
} else {
final StringBuilder sb = new StringBuilder();
sb.append("\nPROBLEM: Not expected result achieved for ").append(
SystemParameters.getString(map, "DIP_TOPOLOGY_NAME"));
sb.append("\nCOMPUTED: \n").append(_computedAgg.printContent());
sb.append("\nFROM THE RESULT FILE: \n").append(
_fileAgg.printContent());
LOG.info(sb.toString());
return 1;
}
}
private static void localPrint(String finalResult, Map map) {
final StringBuilder sb = new StringBuilder();
sb.append("\nThe full result for topology ");
sb.append(SystemParameters.getString(map, "DIP_TOPOLOGY_NAME")).append(
".");
sb.append("\nCollected from ").append(_collectedLastComponents)
.append(" component tasks of the last component.");
sb.append("\nAll the tasks of the last component in total received ")
.append(_numTuplesProcessed).append(" tuples.");
sb.append("\n").append(finalResult);
LOG.info(sb.toString());
}
// called just before killExecution
// only for local mode, since they are executed in a single process, sharing
// all the classes
// we need it due to collectedLastComponents, and lines of result
// in cluster mode, they can communicate only through conf file
public static int localPrintAndCompare(Map map) {
if (_computedAgg == null)
return -1;
localPrint(_computedAgg.printContent(), map);
return localCompare(map);
}
public static BasicStore getResults() {
if (_computedAgg != null) {
return _computedAgg.getStorage();
} else {
return null;
}
}
public static void waitForResults(int howMany) throws InterruptedException {
_semNumResults.acquire(howMany);
//assert(_collectedLastComponents == howMany);
}
}