/**
* Copyright 2010 TransPac Software, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.bixolabs.simpledb;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.RejectedExecutionException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.RecordWriter;
import org.apache.hadoop.mapred.Reporter;
import org.apache.log4j.Logger;
import cascading.tuple.Fields;
import cascading.tuple.Tuple;
import cascading.tuple.TupleEntry;
import com.bixolabs.aws.BackoffHttpHandler;
import com.bixolabs.aws.IHttpHandler;
import com.bixolabs.aws.SimpleDB;
public class SimpleDBRecordWriter implements RecordWriter<NullWritable, Tuple> {
private static final Logger LOGGER = Logger.getLogger(SimpleDBRecordWriter.class);
// This value must be less than the Hadoop job timeout value, as otherwise the
// job could get killed while waiting for a request to get handled.
// private static final long REJECTED_EXECUTION_TIMEOUT = 300 * 1000L;
// private static final long TERMINATION_TIMEOUT = REJECTED_EXECUTION_TIMEOUT;
private static final int BATCH_WRITE_SIZE = 25;
private class SdbShardWriter {
private class AsyncSdbWriter implements Runnable {
private Map<String, Map<String, String>> _items;
public AsyncSdbWriter(Map<String, Map<String, String>> items) {
_items = items;
}
@Override
public void run() {
try {
// FUTURE KKr - we could skip replacing the item hashvalue attribute, as that
// shouldn't ever change for a given item.
Map<String, Set<String>> replaceAttr = new HashMap<String, Set<String>>();
for (String itemName : _items.keySet()) {
replaceAttr.put(itemName, _items.get(itemName).keySet());
}
long startTime = System.currentTimeMillis();
LOGGER.trace(String.format("Updating %s with %d items", _shardName, _items.size()));
_sdb.batchPutAttributes(_shardName, _items, replaceAttr);
LOGGER.trace(String.format("Updated %s with %d items in %dms", _shardName, _items.size(), System.currentTimeMillis() - startTime));
} catch (Exception e) {
LOGGER.error("Error while putting attributes to SimpleDB", e);
IOException ioe;
if (e instanceof IOException) {
ioe = new IOException("Error while putting attributes to SimpleDB");
ioe.setStackTrace(e.getStackTrace());
} else {
ioe = new IOException("Error while putting attributes to SimpleDB", e);
}
_exceptions.add(ioe);
}
}
}
private final String _shardName;
private final SimpleDB _sdb;
private Map<String, Map<String, String>> _queue;
public SdbShardWriter(SimpleDB sdb, String shardName) {
_shardName = shardName;
_sdb = sdb;
_queue = new LinkedHashMap<String, Map<String, String>>();
}
public void put(String itemName, Map<String, String> attributes) throws IOException {
_queue.put(itemName, attributes);
if (_queue.size() >= BATCH_WRITE_SIZE) {
writeQueue();
}
}
public void writeQueue() throws IOException {
try {
if (_queue.size() > 0) {
Map<String, Map<String, String>> curQueue = new LinkedHashMap<String, Map<String, String>>(_queue);
LOGGER.trace(String.format("Queuing up %d items for %s", curQueue.size(), _shardName));
_executor.execute(new AsyncSdbWriter(curQueue));
_queue.clear();
}
} catch (RejectedExecutionException e) {
String msg = "Async write to SimpleDB rejected";
LOGGER.error(msg);
throw new IOException(msg, e);
}
}
}
private String _domainName;
private int _numShards;
private Fields _schemeFields;
private String _itemFieldName;
private List<IOException> _exceptions;
private long _closeTimeout;
private SdbShardWriter[] _shardWriters;
private ThreadedExecutor _executor;
public SimpleDBRecordWriter(SimpleDBConfiguration sdbConf) {
_domainName = sdbConf.getDomainName();
_numShards = sdbConf.getNumShards();
_schemeFields = sdbConf.getSchemeFields();
_itemFieldName = sdbConf.getItemFieldName();
_closeTimeout = sdbConf.getCloseTimeout();
List<String> shardNames = SimpleDBUtils.getShardNames(_domainName, _numShards);
_shardWriters = new SdbShardWriter[_numShards];
// One handler gets shared across all shards, but it's multi-threaded
IHttpHandler httpHandler = new BackoffHttpHandler(sdbConf.getMaxThreads());
_executor = new ThreadedExecutor(sdbConf.getMaxThreads(), _closeTimeout);
LOGGER.trace(String.format("Creating shard writers for %d shards of table %s", _numShards, _domainName));
for (int i = 0; i < _numShards; i++) {
SimpleDB sdb = new SimpleDB(sdbConf.getSdbHost(), sdbConf.getAccessKeyId(), sdbConf.getSecretAccessKey(), httpHandler);
_shardWriters[i] = new SdbShardWriter(sdb, shardNames.get(i));
}
// We also need to be able to record exceptions that happen during the async writes.
_exceptions = Collections.synchronizedList(new ArrayList<IOException>());
}
@Override
public void write(NullWritable key, Tuple value) throws IOException {
throwAsyncException();
TupleEntry entry = new TupleEntry(_schemeFields, value);
String itemName = null;
Map<String, String> attributes = new HashMap<String, String>();
for (int i = 0; i < _schemeFields.size(); i++) {
String fieldName = _schemeFields.get(i).toString();
String fieldValue = entry.getString(fieldName);
if (fieldName.equals(_itemFieldName)) {
itemName = fieldValue;
// Also add the special attribute we use for segmenting a shard (domain)
attributes.put(SimpleDBUtils.ITEM_HASH_ATTR_NAME, SimpleDBUtils.getItemHash(itemName));
} else if (fieldValue != null) {
attributes.put(fieldName, fieldValue);
}
}
int shardIndex = SimpleDBUtils.getShardIndex(itemName, _numShards);
_shardWriters[shardIndex].put(itemName, attributes);
}
@Override
public void close(Reporter reporter) throws IOException {
for (int i = 0; i < _numShards; i++) {
_shardWriters[i].writeQueue();
}
try {
if (!_executor.terminate(_closeTimeout)) {
String msg = "Had to do a hard termination of async writes to SimpleDB";
LOGGER.warn(msg);
_exceptions.add(new IOException(msg));
}
} catch (InterruptedException e) {
String msg = "Interrupted while waiting for SimpleDB async writer termination";
LOGGER.warn(msg);
_exceptions.add(new IOException(msg));
}
throwAsyncException();
}
private void throwAsyncException() throws IOException {
if (_exceptions.size() > 0) {
// We're going to pretend that a previous exception actually happened
// now, with this write.
IOException ioe = _exceptions.remove(0);
throw ioe;
}
}
}