package org.gbif.occurrence.download.inject;
import org.gbif.api.model.occurrence.DownloadFormat;
import org.gbif.api.service.registry.DatasetOccurrenceDownloadUsageService;
import org.gbif.api.service.registry.DatasetService;
import org.gbif.api.service.registry.OccurrenceDownloadService;
import org.gbif.common.search.solr.SolrConfig;
import org.gbif.common.search.solr.SolrModule;
import org.gbif.occurrence.download.conf.WorkflowConfiguration;
import org.gbif.occurrence.download.file.DownloadAggregator;
import org.gbif.occurrence.download.file.DownloadJobConfiguration;
import org.gbif.occurrence.download.file.OccurrenceMapReader;
import org.gbif.occurrence.download.file.dwca.DwcaDownloadAggregator;
import org.gbif.occurrence.download.file.simplecsv.SimpleCsvDownloadAggregator;
import org.gbif.occurrence.download.oozie.DownloadPrepareAction;
import org.gbif.occurrence.download.util.RegistryClientUtil;
import org.gbif.wrangler.lock.LockFactory;
import org.gbif.wrangler.lock.zookeeper.ZooKeeperLockFactory;
import java.io.IOException;
import java.util.concurrent.Executors;
import akka.dispatch.ExecutionContextExecutorService;
import akka.dispatch.ExecutionContexts;
import com.google.common.base.Optional;
import com.google.inject.AbstractModule;
import com.google.inject.Provides;
import com.google.inject.Singleton;
import com.google.inject.name.Named;
import com.google.inject.name.Names;
import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.CuratorFrameworkFactory;
import org.apache.curator.retry.ExponentialBackoffRetry;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
/**
* Private guice module that provides bindings the required Modules and dependencies.
* The following class are exposed:
* - CuratorFramework: this class is exposed only to close the zookeeper connections properly.
* - OccurrenceFileWriter: class that creates the occurrence data and citations file.
*/
public final class DownloadWorkflowModule extends AbstractModule {
public static final String CONF_FILE = "occurrence-download.properties";
//Prefix for static settings
public static final String PROPERTIES_PREFIX = "occurrence.download.";
private static final String PROPERTIES_SOLR_PREFIX = "solr.";
private static final String LOCKING_PATH = "/runningJobs/";
private final Optional<DownloadJobConfiguration> configuration;
private final WorkflowConfiguration workflowConfiguration;
/**
* Loads the default configuration file name and copies the additionalProperties into it.
*/
public DownloadWorkflowModule(WorkflowConfiguration workflowConfiguration, DownloadJobConfiguration configuration) {
this.configuration = Optional.fromNullable(configuration);
this.workflowConfiguration = workflowConfiguration;
}
/**
* Loads the default configuration file name 'occurrence-download.properties'.
*/
public DownloadWorkflowModule(WorkflowConfiguration workflowConfiguration) {
this.workflowConfiguration = workflowConfiguration;
configuration = Optional.absent();
}
@Override
protected void configure() {
Names.bindProperties(binder(), workflowConfiguration.getDownloadSettings());
install(new SolrModule(SolrConfig.fromProperties(workflowConfiguration.getDownloadSettings(),
PROPERTIES_SOLR_PREFIX)));
bind(OccurrenceMapReader.class);
bind(DownloadPrepareAction.class);
bind(WorkflowConfiguration.class).toInstance(workflowConfiguration);
if (configuration.isPresent()) {
bind(DownloadJobConfiguration.class).toInstance(configuration.get());
}
bind(RegistryClientUtil.class).toInstance(new RegistryClientUtil(workflowConfiguration.getDownloadSettings()));
bindDownloadFilesBuilding();
}
@Provides
@Singleton
CuratorFramework provideCuratorFramework(@Named(PROPERTIES_PREFIX + "zookeeper.namespace") String zookeeperNamespace,
@Named(PROPERTIES_PREFIX + "zookeeper.quorum") String zookeeperConnection,
@Named(PROPERTIES_PREFIX + "zookeeper.sleep_time") Integer sleepTime,
@Named(PROPERTIES_PREFIX + "zookeeper.max_retries") Integer maxRetries) {
CuratorFramework curator = CuratorFrameworkFactory.builder().namespace(zookeeperNamespace)
.retryPolicy(new ExponentialBackoffRetry(sleepTime, maxRetries))
.connectString(zookeeperConnection)
.build();
curator.start();
return curator;
}
@Provides
@Singleton
DatasetOccurrenceDownloadUsageService provideDatasetOccurrenceDownloadUsageService(
@Named(DefaultSettings.REGISTRY_URL_KEY) String registryWsUri, RegistryClientUtil registryClientUtil
) {
return registryClientUtil.setupDatasetUsageService(registryWsUri);
}
@Provides
@Singleton
DatasetService provideDatasetService(@Named(DefaultSettings.REGISTRY_URL_KEY) String registryWsUri,
RegistryClientUtil registryClientUtil) {
return registryClientUtil.setupDatasetService(registryWsUri);
}
@Provides
@Singleton
OccurrenceDownloadService provideOccurrenceDownloadService(
@Named(DefaultSettings.REGISTRY_URL_KEY) String registryWsUri, RegistryClientUtil registryClientUtil) {
return registryClientUtil.setupOccurrenceDownloadService(registryWsUri);
}
@Provides
ExecutionContextExecutorService provideExecutionContextExecutorService(
@Named(PROPERTIES_PREFIX + "job.max_threads") int maxThreads
) {
return ExecutionContexts.fromExecutorService(Executors.newFixedThreadPool(maxThreads));
}
@Provides
Connection provideHBaseConnection()
throws IOException {
return ConnectionFactory.createConnection(HBaseConfiguration.create());
}
@Provides
LockFactory provideLock(
CuratorFramework curatorFramework, @Named(PROPERTIES_PREFIX + "max_global_threads") Integer maxGlobalThreads
) {
return new ZooKeeperLockFactory(curatorFramework, maxGlobalThreads, LOCKING_PATH);
}
/**
* Binds a DownloadFilesAggregator according to the DownloadFormat set using the key DOWNLOAD_FORMAT_KEY.
*/
private void bindDownloadFilesBuilding() {
DownloadFormat downloadFormat = workflowConfiguration.getDownloadFormat();
if (downloadFormat != null) {
if (DownloadFormat.DWCA == downloadFormat) {
bind(DownloadAggregator.class).to(DwcaDownloadAggregator.class);
} else if (DownloadFormat.SIMPLE_CSV == downloadFormat) {
bind(DownloadAggregator.class).to(SimpleCsvDownloadAggregator.class);
}
}
}
/**
* Utility class that contains constants and keys set each time the workflow is executed.
*/
public static final class DynamicSettings {
/**
* Hidden constructor.
*/
private DynamicSettings() {
//empty
}
//Prefix for dynamic settings usually set each time that workflow is executed
public static final String WORKFLOW_PROPERTIES_PREFIX = PROPERTIES_PREFIX + "workflow.";
public static final String DOWNLOAD_FORMAT_KEY = WORKFLOW_PROPERTIES_PREFIX + "format";
}
/**
* Utility class that contains configuration keys of common settings.
*/
public static final class DefaultSettings {
public static final String NAME_NODE_KEY = "hdfs.namenode";
public static final String HIVE_DB_KEY = "hive.db";
public static final String REGISTRY_URL_KEY = "registry.ws.url";
public static final String API_URL_KEY = "api.url";
public static final String OCC_HBASE_TABLE_KEY = "hbase.table";
/**
* Hidden constructor.
*/
private DefaultSettings() {
//empty
}
public static final String MAX_THREADS_KEY = PROPERTIES_PREFIX + "job.max_threads";
public static final String JOB_MIN_RECORDS_KEY = PROPERTIES_PREFIX + "job.min_records";
public static final String MAX_RECORDS_KEY = PROPERTIES_PREFIX + "file.max_records";
public static final String ZK_LOCK_NAME_KEY = PROPERTIES_PREFIX + "zookeeper.lock_name";
public static final String DOWNLOAD_USER_KEY = PROPERTIES_PREFIX + "ws.username";
public static final String DOWNLOAD_PASSWORD_KEY = PROPERTIES_PREFIX + "ws.password";
public static final String DOWNLOAD_LINK_KEY = PROPERTIES_PREFIX + "link";
public static final String HDFS_OUTPUT_PATH_KEY = PROPERTIES_PREFIX + "hdfsOutputPath";
public static final String TMP_DIR_KEY = PROPERTIES_PREFIX + "tmp.dir";
public static final String HIVE_DB_PATH_KEY = PROPERTIES_PREFIX + "hive.hdfs.out";
}
}