/*******************************************************************************
* Copyright 2015
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package org.dkpro.lab.engine.impl;
import static org.dkpro.lab.engine.impl.ImportUtil.extractConstraints;
import static org.dkpro.lab.storage.StorageService.CONTEXT_ID_SCHEME;
import static org.dkpro.lab.storage.StorageService.LATEST_CONTEXT_SCHEME;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URI;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Queue;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dkpro.lab.ProgressMeter;
import org.dkpro.lab.Util;
import org.dkpro.lab.conversion.ConversionService;
import org.dkpro.lab.engine.ExecutionException;
import org.dkpro.lab.engine.LifeCycleException;
import org.dkpro.lab.engine.LifeCycleManager;
import org.dkpro.lab.engine.TaskContext;
import org.dkpro.lab.engine.TaskContextFactory;
import org.dkpro.lab.engine.TaskExecutionEngine;
import org.dkpro.lab.engine.TaskExecutionService;
import org.dkpro.lab.logging.LoggingService;
import org.dkpro.lab.storage.StorageService;
import org.dkpro.lab.storage.TaskContextNotFoundException;
import org.dkpro.lab.storage.UnresolvedImportException;
import org.dkpro.lab.storage.impl.PropertiesAdapter;
import org.dkpro.lab.task.BatchTask;
import org.dkpro.lab.task.Dimension;
import org.dkpro.lab.task.FixedSizeDimension;
import org.dkpro.lab.task.ParameterSpace;
import org.dkpro.lab.task.Task;
import org.dkpro.lab.task.TaskContextMetadata;
import org.springframework.dao.DataAccessResourceFailureException;
public class BatchTaskEngine
implements TaskExecutionEngine
{
private TaskContextFactory contextFactory;
private final Log log = LogFactory.getLog(getClass());
/**
* The subtask context IDs produced by this batch task in the order of their production.
*/
public static final String SUBTASKS_KEY = "Subtasks";
@Override
public String run(Task aConfiguration)
throws ExecutionException, LifeCycleException
{
if (!(aConfiguration instanceof BatchTask)) {
throw new ExecutionException(
"This engine can only execute [" + BatchTask.class.getName() + "]");
}
// Create persistence service for injection into analysis components
TaskContext ctx = null;
try {
ctx = contextFactory.createContext(aConfiguration);
// Now the setup is complete
ctx.getLifeCycleManager().initialize(ctx, aConfiguration);
// Start recording
ctx.getLifeCycleManager().begin(ctx, aConfiguration);
try {
BatchTask cfg = (BatchTask) aConfiguration;
ParameterSpace parameterSpace = cfg.getParameterSpace();
// Try to calculate the parameter space size.
int estimatedSize = 1;
for (Dimension<?> d : parameterSpace.getDimensions()) {
if (d instanceof FixedSizeDimension) {
FixedSizeDimension fsd = (FixedSizeDimension) d;
if (fsd.size() > 0) {
estimatedSize *= fsd.size();
}
}
}
// A subtask execution may apply to multiple parameter space coordinates!
Set<String> executedSubtasks = new LinkedHashSet<String>();
ProgressMeter progress = new ProgressMeter(estimatedSize);
for (Map<String, Object> config : parameterSpace) {
if (cfg.getConfiguration() != null) {
for (Entry<String, Object> e : cfg.getConfiguration().entrySet()) {
if (!config.containsKey(e.getKey())) {
config.put(e.getKey(), e.getValue());
}
}
}
log.info("== Running new configuration [" + ctx.getId() + "] ==");
List<String> keys = new ArrayList<String>(config.keySet());
for (String key : keys) {
log.info("[" + key + "]: [" + StringUtils
.abbreviateMiddle(Util.toString(config.get(key)), "…", 150) + "]");
}
executeConfiguration(cfg, ctx, config, executedSubtasks);
progress.next();
log.info("Completed configuration " + progress);
}
// Set the subtask property and persist again, so the property is available to
// reports
cfg.setAttribute(SUBTASKS_KEY, executedSubtasks.toString());
cfg.persist(ctx);
}
catch (LifeCycleException e) {
ctx.getLifeCycleManager().fail(ctx, aConfiguration, e);
throw e;
}
catch (UnresolvedImportException e) {
// HACK - pass unresolved import exceptions up to the outer batch task
ctx.getLifeCycleManager().fail(ctx, aConfiguration, e);
throw e;
}
catch (Throwable e) {
ctx.getLifeCycleManager().fail(ctx, aConfiguration, e);
throw new ExecutionException(e);
}
// End recording (here the reports will nbe done)
ctx.getLifeCycleManager().complete(ctx, aConfiguration);
return ctx.getId();
}
finally {
if (ctx != null) {
ctx.getLifeCycleManager().destroy(ctx, aConfiguration);
}
}
}
@Override
public void setContextFactory(TaskContextFactory aContextFactory)
{
contextFactory = aContextFactory;
}
/**
* Locate the latest task execution compatible with the given task configuration.
*
* @param aContext
* the context of the current batch task.
* @param aConfig
* the current parameter configuration.
* @param aExecutedSubtasks
* already executed subtasks.
*/
protected void executeConfiguration(BatchTask aConfiguration, TaskContext aContext,
Map<String, Object> aConfig, Set<String> aExecutedSubtasks)
throws ExecutionException, LifeCycleException
{
if (log.isTraceEnabled()) {
// Show all subtasks executed so far
for (String est : aExecutedSubtasks) {
log.trace("-- Already executed: " + est);
}
}
// Set up initial scope used by sub-batch-tasks using the inherited scope. The scope is
// extended as the subtasks of this batch are executed with the present configuration.
// FIXME: That means that sub-batch-tasks in two different configurations cannot see
// each other. Is that intended? Mind that the "executedSubtasks" set is intentionally
// maintained *across* configurations, so maybe the scope should also be maintained
// *across* configurations? - REC 2014-06-15
Set<String> scope = new HashSet<String>();
if (aConfiguration.getScope() != null) {
scope.addAll(aConfiguration.getScope());
}
// Configure subtasks
for (Task task : aConfiguration.getTasks()) {
aContext.getLifeCycleManager().configure(aContext, task, aConfig);
}
Queue<Task> queue = new LinkedList<Task>(aConfiguration.getTasks());
Set<Task> loopDetection = new HashSet<Task>();
List<UnresolvedImportException> deferralReasons = new ArrayList<UnresolvedImportException>();
while (!queue.isEmpty()) {
Task task = queue.poll();
try {
// Check if a subtask execution compatible with the present configuration has
// does already exist ...
TaskContextMetadata execution = getExistingExecution(aConfiguration, aContext, task,
aConfig, aExecutedSubtasks);
if (execution == null) {
// ... otherwise execute it with the present configuration
log.info("Executing task [" + task.getType() + "]");
// set scope here so that the inherited scopes are considered
// set scope here so that tasks added to scope in this loop are considered
if (task instanceof BatchTask) {
((BatchTask) task).setScope(scope);
}
execution = runNewExecution(aContext, task, aConfig, aExecutedSubtasks);
}
else {
log.debug("Using existing execution [" + execution.getId() + "]");
}
// Record new/existing execution
aExecutedSubtasks.add(execution.getId());
scope.add(execution.getId());
loopDetection.clear();
deferralReasons.clear();
}
catch (UnresolvedImportException e) {
// Add task back to queue
log.debug(
"Deferring execution of task [" + task.getType() + "]: " + e.getMessage());
queue.add(task);
// Detect endless loop
if (loopDetection.contains(task)) {
StringBuilder details = new StringBuilder();
for (UnresolvedImportException r : deferralReasons) {
details.append("\n -");
details.append(r.getMessage());
}
// throw an UnresolvedImportException in case there is an outer BatchTask which
// needs to be executed first
throw new UnresolvedImportException(e, details.toString());
}
// Record failed execution
loopDetection.add(task);
deferralReasons.add(e);
}
}
}
/**
* Locate the latest task execution compatible with the given task configuration.
*
* @param aContext
* the context of the current batch task.
* @param aType
* the type of the task context to find.
* @param aDiscriminators
* the discriminators of the task context to find.
* @param aConfig
* the current parameter configuration.
* @throws TaskContextNotFoundException
* if a matching task context could not be found.
* @see ImportUtil#matchConstraints(Map, Map, boolean)
*/
private TaskContextMetadata getLatestExecution(TaskContext aContext, String aType,
Map<String, String> aDiscriminators, Map<String, Object> aConfig)
{
// Convert parameter values to strings
Map<String, String> config = new HashMap<String, String>();
for (Entry<String, Object> e : aConfig.entrySet()) {
config.put(e.getKey(), Util.toString(e.getValue()));
// If the conversion service has a registered value override the constraint here
// accordingly
Object object = e.getValue();
ConversionService cs = aContext.getConversionService();
if (cs.isRegistered(object)) {
config.put(e.getKey(), cs.getDiscriminableValue(object));
}
}
StorageService storage = aContext.getStorageService();
List<TaskContextMetadata> metas = storage.getContexts(aType, aDiscriminators);
for (TaskContextMetadata meta : metas) {
Map<String, String> discriminators = storage
.retrieveBinary(meta.getId(), Task.DISCRIMINATORS_KEY, new PropertiesAdapter())
.getMap();
// Check if the task is compatible with the current configuration. To do this, we
// interpret the discriminators as constraints on the current configuration.
if (ImportUtil.matchConstraints(discriminators, config, false)) {
return meta;
}
}
throw ImportUtil.createContextNotFoundException(aType, aDiscriminators);
}
/**
* Execute the given task with the given task configuration.
*
* @param aContext
* the context of the current batch task.
* @param aTask
* the the task whose task to be executed.
* @param aConfig
* the current parameter configuration.
* @return the context meta data.
*/
protected TaskContextMetadata runNewExecution(TaskContext aContext, Task aTask,
Map<String, Object> aConfig, Set<String> aScope)
throws ExecutionException, LifeCycleException
{
TaskExecutionService execService = aContext.getExecutionService();
TaskExecutionEngine engine = execService.createEngine(aTask);
engine.setContextFactory(
new ScopedTaskContextFactory(execService.getContextFactory(), aConfig, aScope));
String uuid = engine.run(aTask);
return aContext.getStorageService().getContext(uuid);
}
/**
* Locate the latest task execution compatible with the given task configuration.
*
* @param aContext
* the context of the current batch task.
* @param aTask
* the the task whose task context should be found.
* @param aConfig
* the current parameter configuration.
* @return {@code null} if the context could not be found.
*/
protected TaskContextMetadata getExistingExecution(BatchTask aConfiguration,
TaskContext aContext, Task aTask, Map<String, Object> aConfig, Set<String> aScope)
{
// Batch tasks are always run again since we do not store discriminators for them
if (aTask instanceof BatchTask) {
return null;
}
try {
TaskContextMetadata meta = getLatestExecution(aContext, aTask.getType(),
aTask.getDescriminators(), aConfig);
// If the task was already executed within the scope of this aggregate, do not execute
// it again. Catching this here saves us from running tasks with the same configuration
// more than once per aggregate.
if (aScope.contains(meta.getId())) {
return meta;
}
switch (aConfiguration.getExecutionPolicy()) {
case RUN_AGAIN:
// Always run the task again
return null;
case USE_EXISTING:
// If the task was ever executed, do not run it again.
return meta;
case ASK_EXISTING:
if (ask(meta)) {
// Execute again - act as if the context was not found
return null;
}
else {
// Use existing context
return meta;
}
default:
throw new IllegalStateException(
"Unknown executionPolicy [" + aConfiguration.getExecutionPolicy() + "]");
}
}
catch (TaskContextNotFoundException e) {
// Task context not found in storage
return null;
}
}
private boolean ask(TaskContextMetadata aMeta)
{
try {
boolean execute = true;
InputStreamReader converter = new InputStreamReader(System.in);
BufferedReader in = new BufferedReader(converter);
String line = "";
while (line != null) {
System.out.println("\n\n[" + aMeta.getType() + "] has already been executed in"
+ " this configuration. Do you wish to execute it again? (y/n)");
line = in.readLine().toLowerCase();
if ("y".equals(line)) {
execute = true;
break;
}
if ("n".equals(line)) {
execute = false;
break;
}
}
return execute;
}
catch (IOException e) {
throw new RuntimeException(e);
}
}
protected class ScopedTaskContextFactory
extends DefaultTaskContextFactory
{
private final DefaultTaskContextFactory contextFactory;
private final Map<String, Object> config;
private final Set<String> scope;
public ScopedTaskContextFactory(TaskContextFactory aContextFactory,
Map<String, Object> aConfig, Set<String> aScope)
{
contextFactory = (DefaultTaskContextFactory) aContextFactory;
config = aConfig;
scope = aScope;
}
@Override
protected TaskContext createContext(TaskContextMetadata aMetadata)
{
ScopedTaskContext ctx = new ScopedTaskContext(contextFactory);
ctx.setExecutionService(getExecutionService());
ctx.setLifeCycleManager(getLifeCycleManager());
ctx.setStorageService(getStorageService());
ctx.setConversionService(getConversionService());
ctx.setLoggingService(getLoggingService());
ctx.setMetadata(aMetadata);
ctx.setConfig(config);
ctx.setScope(scope);
return ctx;
}
@Override
public void registerContext(TaskContext aContext)
{
contextFactory.registerContext(aContext);
}
@Override
public void unregisterContext(TaskContext aContext)
{
contextFactory.unregisterContext(aContext);
}
@Override
public String getId()
{
return contextFactory.getId();
}
@Override
public LifeCycleManager getLifeCycleManager()
{
return contextFactory.getLifeCycleManager();
}
@Override
public LoggingService getLoggingService()
{
return contextFactory.getLoggingService();
}
@Override
public StorageService getStorageService()
{
return contextFactory.getStorageService();
}
@Override
public TaskExecutionService getExecutionService()
{
return contextFactory.getExecutionService();
}
@Override
public ConversionService getConversionService()
{
return contextFactory.getConversionService();
}
}
private class ScopedTaskContext
extends DefaultTaskContext
{
private Map<String, Object> config;
private Set<String> scope;
public ScopedTaskContext(TaskContextFactory aOwner)
{
super(aOwner);
}
public void setConfig(Map<String, Object> aConfig)
{
config = aConfig;
}
public void setScope(Set<String> aScope)
{
scope = aScope;
}
@Override
public TaskContextMetadata resolve(URI aUri)
{
TaskContextMetadata meta;
StorageService storage = getStorageService();
if (LATEST_CONTEXT_SCHEME.equals(aUri.getScheme())) {
Map<String, String> constraints = extractConstraints(aUri);
try {
meta = getLatestExecution(this, aUri.getAuthority(), constraints, config);
}
catch (TaskContextNotFoundException e) {
throw new UnresolvedImportException(this, aUri.toString(), e);
}
}
else if (CONTEXT_ID_SCHEME.equals(aUri.getScheme())) {
try {
meta = storage.getContext(aUri.getAuthority());
}
catch (TaskContextNotFoundException e) {
throw new UnresolvedImportException(this, aUri.toString(), e);
}
}
else {
throw new DataAccessResourceFailureException(
"Unknown scheme in import [" + aUri + "]");
}
if (!scope.contains(meta.getId())) {
throw new UnresolvedImportException(this, aUri.toString(),
"Resolved context [" + meta.getId() + "] not in scope " + scope);
}
return meta;
}
}
}