package mil.nga.giat.geowave.mapreduce.output;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import mil.nga.giat.geowave.core.index.ByteArrayId;
import mil.nga.giat.geowave.core.index.StringUtils;
import mil.nga.giat.geowave.core.store.DataStore;
import mil.nga.giat.geowave.core.store.GeoWaveStoreFinder;
import mil.nga.giat.geowave.core.store.IndexWriter;
import mil.nga.giat.geowave.core.store.adapter.AdapterIndexMappingStore;
import mil.nga.giat.geowave.core.store.adapter.AdapterStore;
import mil.nga.giat.geowave.core.store.adapter.DataAdapter;
import mil.nga.giat.geowave.core.store.adapter.WritableDataAdapter;
import mil.nga.giat.geowave.core.store.adapter.exceptions.MismatchedIndexToAdapterMapping;
import mil.nga.giat.geowave.core.store.index.Index;
import mil.nga.giat.geowave.core.store.index.IndexStore;
import mil.nga.giat.geowave.core.store.index.PrimaryIndex;
import mil.nga.giat.geowave.core.store.operations.remote.options.DataStorePluginOptions;
import mil.nga.giat.geowave.mapreduce.GeoWaveConfiguratorBase;
import mil.nga.giat.geowave.mapreduce.JobContextAdapterStore;
import mil.nga.giat.geowave.mapreduce.JobContextIndexStore;
/**
* This output format is the preferred mechanism for writing data to GeoWave
* within a map-reduce job.
*/
public class GeoWaveOutputFormat extends
OutputFormat<GeoWaveOutputKey<Object>, Object>
{
private static final Class<?> CLASS = GeoWaveOutputFormat.class;
protected static final Logger LOGGER = LoggerFactory.getLogger(CLASS);
@Override
public RecordWriter<GeoWaveOutputKey<Object>, Object> getRecordWriter(
final TaskAttemptContext context )
throws IOException,
InterruptedException {
try {
final Map<String, String> configOptions = getStoreOptionsMap(context);
final AdapterStore persistentAdapterStore = GeoWaveStoreFinder.createAdapterStore(configOptions);
final DataAdapter<?>[] adapters = JobContextAdapterStore.getDataAdapters(context);
for (final DataAdapter<?> a : adapters) {
if (!persistentAdapterStore.adapterExists(a.getAdapterId())) {
persistentAdapterStore.addAdapter(a);
}
}
final IndexStore persistentIndexStore = GeoWaveStoreFinder.createIndexStore(configOptions);
final Index[] indices = JobContextIndexStore.getIndices(context);
if (LOGGER.isDebugEnabled()) {
final StringBuilder sbDebug = new StringBuilder();
sbDebug.append("Config Options: ");
for (final Map.Entry<String, String> entry : configOptions.entrySet()) {
sbDebug.append(entry.getKey() + "/" + entry.getValue() + ", ");
}
sbDebug.append("\n\tIndices Size: " + indices.length);
sbDebug.append("\n\tpersistentIndexStore: " + persistentIndexStore);
final String filename = "/META-INF/services/mil.nga.giat.geowave.core.store.StoreFactoryFamilySpi";
final InputStream is = context.getClass().getResourceAsStream(
filename);
if (is == null) {
sbDebug.append("\n\tStoreFactoryFamilySpi: Unable to open file '" + filename + "'");
}
else {
sbDebug.append("\n\tStoreFactoryFamilySpi: " + IOUtils.toString(
is,
"UTF-8"));
}
LOGGER.debug(sbDebug.toString());
}
for (final Index i : indices) {
if (!persistentIndexStore.indexExists(i.getId())) {
persistentIndexStore.addIndex(i);
}
}
final AdapterStore jobContextAdapterStore = new JobContextAdapterStore(
context,
persistentAdapterStore);
final IndexStore jobContextIndexStore = new JobContextIndexStore(
context,
persistentIndexStore);
final DataStore dataStore = GeoWaveStoreFinder.createDataStore(configOptions);
return new GeoWaveRecordWriter(
context,
dataStore,
jobContextIndexStore,
jobContextAdapterStore);
}
catch (final Exception e) {
throw new IOException(
e);
}
}
public static void setStoreOptions(
final Configuration config,
final DataStorePluginOptions storeOptions ) {
if (storeOptions != null) {
GeoWaveConfiguratorBase.setStoreOptionsMap(
CLASS,
config,
storeOptions.getOptionsAsMap());
}
else {
GeoWaveConfiguratorBase.setStoreOptionsMap(
CLASS,
config,
null);
}
}
public static void setStoreOptionsMap(
final Configuration config,
final Map<String, String> storeConfigOptions ) {
GeoWaveConfiguratorBase.setStoreOptionsMap(
CLASS,
config,
storeConfigOptions);
}
public static void addIndex(
final Configuration config,
final PrimaryIndex index ) {
JobContextIndexStore.addIndex(
config,
index);
}
public static void addDataAdapter(
final Configuration config,
final DataAdapter<?> adapter ) {
JobContextAdapterStore.addDataAdapter(
config,
adapter);
}
public static IndexStore getJobContextIndexStore(
final JobContext context ) {
return GeoWaveConfiguratorBase.getJobContextIndexStore(
CLASS,
context);
}
public static AdapterStore getJobContextAdapterStore(
final JobContext context ) {
return GeoWaveConfiguratorBase.getJobContextAdapterStore(
CLASS,
context);
}
public static AdapterIndexMappingStore getJobContextAdapterIndexMappingStore(
final JobContext context ) {
return GeoWaveConfiguratorBase.getJobContextAdapterIndexMappingStore(
CLASS,
context);
}
public static DataStorePluginOptions getStoreOptions(
final JobContext context ) {
return GeoWaveConfiguratorBase.getStoreOptions(
CLASS,
context);
}
public static Map<String, String> getStoreOptionsMap(
final JobContext context ) {
return GeoWaveConfiguratorBase.getStoreOptionsMap(
CLASS,
context);
}
@Override
public void checkOutputSpecs(
final JobContext context )
throws IOException,
InterruptedException {
// attempt to get each of the GeoWave stores from the job context
try {
final Map<String, String> configOptions = getStoreOptionsMap(context);
if (GeoWaveStoreFinder.createDataStore(configOptions) == null) {
final String msg = "Unable to find GeoWave data store";
LOGGER.warn(msg);
throw new IOException(
msg);
}
if (GeoWaveStoreFinder.createIndexStore(configOptions) == null) {
final String msg = "Unable to find GeoWave index store";
LOGGER.warn(msg);
throw new IOException(
msg);
}
if (GeoWaveStoreFinder.createAdapterStore(configOptions) == null) {
final String msg = "Unable to find GeoWave adapter store";
LOGGER.warn(msg);
throw new IOException(
msg);
}
if (GeoWaveStoreFinder.createDataStatisticsStore(configOptions) == null) {
final String msg = "Unable to find GeoWave data statistics store";
LOGGER.warn(msg);
throw new IOException(
msg);
}
}
catch (final Exception e) {
LOGGER.warn(
"Error finding GeoWave stores",
e);
throw new IOException(
"Error finding GeoWave stores",
e);
}
}
@Override
public OutputCommitter getOutputCommitter(
final TaskAttemptContext context )
throws IOException,
InterruptedException {
return new NullOutputFormat<ByteArrayId, Object>().getOutputCommitter(context);
}
/**
* A base class to be used to create {@link RecordWriter} instances that
* write to Accumulo.
*/
protected static class GeoWaveRecordWriter extends
RecordWriter<GeoWaveOutputKey<Object>, Object>
{
private final Map<ByteArrayId, IndexWriter> adapterIdToIndexWriterCache = new HashMap<ByteArrayId, IndexWriter>();
private final AdapterStore adapterStore;
private final IndexStore indexStore;
private final DataStore dataStore;
protected GeoWaveRecordWriter(
final TaskAttemptContext context,
final DataStore dataStore,
final IndexStore indexStore,
final AdapterStore adapterStore ) {
this.dataStore = dataStore;
this.adapterStore = adapterStore;
this.indexStore = indexStore;
}
/**
* Push a mutation into a table. If table is null, the defaultTable will
* be used. If canCreateTable is set, the table will be created if it
* does not exist. The table name must only contain alphanumerics and
* underscore.
*/
@SuppressWarnings({
"unchecked",
"rawtypes"
})
@Override
public void write(
final GeoWaveOutputKey ingestKey,
final Object data )
throws IOException {
boolean success = false;
String errorMessage = null;
if (ingestKey.getIndexIds().isEmpty()) {
throw new IOException(
"Empty index ID input list");
}
final WritableDataAdapter<?> adapter = ingestKey.getAdapter(adapterStore);
if (adapter != null) {
final IndexWriter indexWriter = getIndexWriter(
adapter,
ingestKey.getIndexIds());
if (indexWriter != null) {
List writeList = indexWriter.write(data);
if (!writeList.isEmpty()) {
success = true;
}
else {
errorMessage = "Empty write list";
}
}
else {
errorMessage = "Cannot write to index '"
+ StringUtils.stringFromBinary(ingestKey.getAdapterId().getBytes()) + "'";
}
}
else {
errorMessage = "Adapter '" + StringUtils.stringFromBinary(ingestKey.getAdapterId().getBytes())
+ "' does not exist";
}
if (!success) {
throw new IOException(
errorMessage);
}
}
private synchronized IndexWriter getIndexWriter(
final DataAdapter<?> adapter,
final Collection<ByteArrayId> indexIds )
throws MismatchedIndexToAdapterMapping {
IndexWriter writer = adapterIdToIndexWriterCache.get(adapter.getAdapterId());
if (writer == null) {
final List<PrimaryIndex> indices = new ArrayList<PrimaryIndex>();
for (final ByteArrayId indexId : indexIds) {
final PrimaryIndex index = (PrimaryIndex) indexStore.getIndex(indexId);
if (index != null) {
indices.add(index);
}
else {
LOGGER.warn("Index '" + StringUtils.stringFromBinary(indexId.getBytes()) + "' does not exist");
}
}
writer = dataStore.createWriter(
adapter,
indices.toArray(new PrimaryIndex[indices.size()]));
adapterIdToIndexWriterCache.put(
adapter.getAdapterId(),
writer);
}
return writer;
}
@Override
public synchronized void close(
final TaskAttemptContext attempt )
throws IOException,
InterruptedException {
for (final IndexWriter indexWriter : adapterIdToIndexWriterCache.values()) {
indexWriter.close();
}
}
}
}