package eu.europeana.cloud.service.dps.storm.io;
import com.rits.cloning.Cloner;
import eu.europeana.cloud.common.model.dps.TaskState;
import eu.europeana.cloud.service.dps.PluginParameterKeys;
import eu.europeana.cloud.service.dps.storm.AbstractDpsBolt;
import eu.europeana.cloud.service.dps.storm.StormTaskTuple;
import org.apache.storm.task.OutputCollector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.*;
/**
* Read datasets and emit every dataset as a separate {@link StormTaskTuple}.
*/
public class ReadDatasetsBolt extends AbstractDpsBolt {
private static final Logger LOGGER = LoggerFactory.getLogger(ReadDatasetsBolt.class);
/**
* Constructor of ReadDatasetBolt.
*/
public ReadDatasetsBolt() {
}
/**
* Should be used only on tests.
*/
public static ReadDatasetsBolt getTestInstance(OutputCollector outputCollector) {
ReadDatasetsBolt instance = new ReadDatasetsBolt();
instance.outputCollector = outputCollector;
return instance;
}
@Override
public void prepare() {
}
@Override
public void execute(StormTaskTuple t) {
Map<String, String> parameters = t.getParameters();
List<String> datasets = Arrays.asList(parameters.get(PluginParameterKeys.DPS_TASK_INPUT_DATA).split("\\s*,\\s*"));
if (datasets != null && !datasets.isEmpty()) {
t.getParameters().remove(PluginParameterKeys.DPS_TASK_INPUT_DATA);
emitSingleDataSetFromDataSets(t, datasets);
return;
} else {
String message = "No URL to retrieve dataset.";
LOGGER.warn(message);
emitDropNotification(t.getTaskId(), "", message, t.getParameters().toString());
endTask(t.getTaskId(), message, TaskState.DROPPED, new Date());
return;
}
}
private void emitSingleDataSetFromDataSets(StormTaskTuple t, List<String> dataSets) {
for (String dataSet : dataSets) {
StormTaskTuple stormTaskTuple = new Cloner().deepClone(t);
stormTaskTuple.getParameters().put(PluginParameterKeys.DATASET_URL, dataSet);
outputCollector.emit(inputTuple, stormTaskTuple.toStormTuple());
}
}
}