package mil.nga.giat.geowave.datastore.accumulo.mapreduce;
import java.io.IOException;
import java.net.InetAddress;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.TreeSet;
import org.apache.accumulo.core.client.AccumuloException;
import org.apache.accumulo.core.client.AccumuloSecurityException;
import org.apache.accumulo.core.client.ClientConfiguration;
import org.apache.accumulo.core.client.Instance;
import org.apache.accumulo.core.client.TableDeletedException;
import org.apache.accumulo.core.client.TableNotFoundException;
import org.apache.accumulo.core.client.TableOfflineException;
import org.apache.accumulo.core.client.impl.Tables;
import org.apache.accumulo.core.client.impl.TabletLocator;
import org.apache.accumulo.core.client.mock.MockInstance;
import org.apache.accumulo.core.client.security.tokens.NullToken;
import org.apache.accumulo.core.client.security.tokens.PasswordToken;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.master.state.tables.TableState;
import org.apache.accumulo.core.util.UtilWaitThread;
import org.apache.hadoop.io.Text;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import mil.nga.giat.geowave.core.index.NumericIndexStrategy;
import mil.nga.giat.geowave.core.index.sfc.data.MultiDimensionalNumericData;
import mil.nga.giat.geowave.core.store.DataStoreOperations;
import mil.nga.giat.geowave.core.store.adapter.AdapterStore;
import mil.nga.giat.geowave.core.store.adapter.DataAdapter;
import mil.nga.giat.geowave.core.store.adapter.statistics.DataStatisticsStore;
import mil.nga.giat.geowave.core.store.adapter.statistics.RowRangeHistogramStatistics;
import mil.nga.giat.geowave.core.store.index.PrimaryIndex;
import mil.nga.giat.geowave.core.store.query.DistributableQuery;
import mil.nga.giat.geowave.core.store.util.DataStoreUtils;
import mil.nga.giat.geowave.datastore.accumulo.AccumuloOperations;
import mil.nga.giat.geowave.datastore.accumulo.util.AccumuloUtils;
import mil.nga.giat.geowave.mapreduce.splits.GeoWaveInputSplit;
import mil.nga.giat.geowave.mapreduce.splits.GeoWaveRowRange;
import mil.nga.giat.geowave.mapreduce.splits.IntermediateSplitInfo;
import mil.nga.giat.geowave.mapreduce.splits.RangeLocationPair;
import mil.nga.giat.geowave.mapreduce.splits.SplitsProvider;
//@formatter:off
/*if[accumulo.api=1.6]
import org.apache.accumulo.core.security.Credentials;
import org.apache.accumulo.core.data.KeyExtent;
else[accumulo.api=1.6]*/
import org.apache.accumulo.core.client.impl.ClientContext;
import org.apache.accumulo.core.client.impl.Credentials;
import org.apache.accumulo.core.data.impl.KeyExtent;
/*end[accumulo.api=1.6]*/
//@formatter:on
public class AccumuloSplitsProvider extends
SplitsProvider
{
private final static Logger LOGGER = LoggerFactory.getLogger(AccumuloSplitsProvider.class);
@Override
protected TreeSet<IntermediateSplitInfo> populateIntermediateSplits(
final TreeSet<IntermediateSplitInfo> splits,
final DataStoreOperations operations,
final PrimaryIndex index,
final List<DataAdapter<Object>> adapters,
final Map<PrimaryIndex, RowRangeHistogramStatistics<?>> statsCache,
final AdapterStore adapterStore,
final DataStatisticsStore statsStore,
final Integer maxSplits,
final DistributableQuery query,
final String[] authorizations )
throws IOException {
AccumuloOperations accumuloOperations = null;
if (operations instanceof AccumuloOperations) {
accumuloOperations = (AccumuloOperations) operations;
}
else {
LOGGER.error("AccumuloSplitsProvider requires AccumuloOperations object.");
return splits;
}
if ((query != null) && !query.isSupported(index)) {
return splits;
}
Range fullrange;
try {
fullrange = unwrapRange(getRangeMax(
index,
adapterStore,
statsStore,
authorizations));
}
catch (final Exception e) {
fullrange = new Range();
LOGGER.warn(
"Cannot ascertain the full range of the data",
e);
}
final String tableName = AccumuloUtils.getQualifiedTableName(
operations.getTableNameSpace(),
index.getId().getString());
final NumericIndexStrategy indexStrategy = index.getIndexStrategy();
final TreeSet<Range> ranges;
if (query != null) {
final List<MultiDimensionalNumericData> indexConstraints = query.getIndexConstraints(indexStrategy);
if ((maxSplits != null) && (maxSplits > 0)) {
ranges = AccumuloUtils.byteArrayRangesToAccumuloRanges(DataStoreUtils.constraintsToByteArrayRanges(
indexConstraints,
indexStrategy,
maxSplits));
}
else {
ranges = AccumuloUtils.byteArrayRangesToAccumuloRanges(DataStoreUtils.constraintsToByteArrayRanges(
indexConstraints,
indexStrategy,
-1));
}
if (ranges.size() == 1) {
final Range range = ranges.first();
if (range.isInfiniteStartKey() || range.isInfiniteStopKey()) {
ranges.remove(range);
ranges.add(fullrange.clip(range));
}
}
}
else {
ranges = new TreeSet<Range>();
ranges.add(fullrange);
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("Protected range: " + fullrange);
}
}
// get the metadata information for these ranges
final Map<String, Map<KeyExtent, List<Range>>> tserverBinnedRanges = getBinnedRangesStructure();
TabletLocator tl;
try {
final Instance instance = accumuloOperations.getInstance();
final String tableId;
Credentials credentials;
if (instance instanceof MockInstance) {
tableId = "";
// in this case, we will have no password;
credentials = new Credentials(
accumuloOperations.getUsername(),
new NullToken());
}
else {
tableId = Tables.getTableId(
instance,
tableName);
credentials = new Credentials(
accumuloOperations.getUsername(),
new PasswordToken(
accumuloOperations.getPassword()));
}
// @formatter:off
/*if[accumulo.api=1.6]
tl = getTabletLocator(
instance,
tableId);
Object clientContextOrCredentials = credentials;
else[accumulo.api=1.6]*/
final ClientContext clientContext = new ClientContext(
instance,credentials,
new ClientConfiguration());
tl = getTabletLocator(
clientContext,
tableId);
final Object clientContextOrCredentials = clientContext;
/*end[accumulo.api=1.6]*/
// @formatter:on
// its possible that the cache could contain complete, but
// old information about a tables tablets... so clear it
tl.invalidateCache();
final List<Range> rangeList = new ArrayList<Range>(
ranges);
while (!binRanges(
rangeList,
clientContextOrCredentials,
tserverBinnedRanges,
tl)) {
if (!(instance instanceof MockInstance)) {
if (!Tables.exists(
instance,
tableId)) {
throw new TableDeletedException(
tableId);
}
if (Tables.getTableState(
instance,
tableId) == TableState.OFFLINE) {
throw new TableOfflineException(
instance,
tableId);
}
}
tserverBinnedRanges.clear();
LOGGER.warn("Unable to locate bins for specified ranges. Retrying.");
UtilWaitThread.sleep(150);
tl.invalidateCache();
}
}
catch (final Exception e) {
throw new IOException(
e);
}
final HashMap<String, String> hostNameCache = getHostNameCache();
for (final Entry<String, Map<KeyExtent, List<Range>>> tserverBin : tserverBinnedRanges.entrySet()) {
final String tabletServer = tserverBin.getKey();
final String ipAddress = tabletServer.split(
":",
2)[0];
String location = hostNameCache.get(ipAddress);
if (location == null) {
// HP Fortify "Often Misused: Authentication"
// These methods are not being used for
// authentication
final InetAddress inetAddress = InetAddress.getByName(ipAddress);
location = inetAddress.getHostName();
hostNameCache.put(
ipAddress,
location);
}
for (final Entry<KeyExtent, List<Range>> extentRanges : tserverBin.getValue().entrySet()) {
final Range keyExtent = extentRanges.getKey().toDataRange();
final Map<PrimaryIndex, List<RangeLocationPair>> splitInfo = new HashMap<PrimaryIndex, List<RangeLocationPair>>();
final List<RangeLocationPair> rangeList = new ArrayList<RangeLocationPair>();
for (final Range range : extentRanges.getValue()) {
final Range clippedRange = keyExtent.clip(range);
if (!(fullrange.beforeStartKey(clippedRange.getEndKey()) || fullrange.afterEndKey(clippedRange
.getStartKey()))) {
final double cardinality = getCardinality(
getHistStats(
index,
adapters,
adapterStore,
statsStore,
statsCache,
authorizations),
wrapRange(clippedRange));
rangeList.add(new AccumuloRangeLocationPair(
wrapRange(clippedRange),
location,
cardinality < 1 ? 1.0 : cardinality));
}
else {
LOGGER.info("Query split outside of range");
}
if (LOGGER.isTraceEnabled()) {
LOGGER.warn("Clipped range: " + rangeList.get(
rangeList.size() - 1).getRange());
}
}
if (!rangeList.isEmpty()) {
splitInfo.put(
index,
rangeList);
splits.add(new IntermediateSplitInfo(
splitInfo,
this));
}
}
}
return splits;
}
/**
* Returns data structure to be filled by binnedRanges Extracted out to
* facilitate testing
*/
public Map<String, Map<KeyExtent, List<Range>>> getBinnedRangesStructure() {
final Map<String, Map<KeyExtent, List<Range>>> tserverBinnedRanges = new HashMap<String, Map<KeyExtent, List<Range>>>();
return tserverBinnedRanges;
}
/**
* Returns host name cache data structure Extracted out to facilitate
* testing
*/
public HashMap<String, String> getHostNameCache() {
final HashMap<String, String> hostNameCache = new HashMap<String, String>();
return hostNameCache;
}
public static GeoWaveRowRange wrapRange(
final Range range ) {
return new AccumuloRowRange(
range);
}
public static Range unwrapRange(
final GeoWaveRowRange range ) {
if (range instanceof AccumuloRowRange) {
return ((AccumuloRowRange) range).getRange();
}
LOGGER.error("AccumuloSplitsProvider requires use of AccumuloRowRange type.");
return null;
}
@Override
protected GeoWaveRowRange constructRange(
final byte[] startKey,
final boolean isStartKeyInclusive,
final byte[] endKey,
final boolean isEndKeyInclusive ) {
return new AccumuloRowRange(
new Range(
new Key(
new Text(
startKey)),
isStartKeyInclusive,
new Key(
new Text(
endKey)),
isEndKeyInclusive));
}
@Override
protected GeoWaveRowRange defaultConstructRange() {
return new AccumuloRowRange(
new Range());
}
public static RangeLocationPair defaultConstructRangeLocationPair() {
return new AccumuloRangeLocationPair();
}
@Override
protected RangeLocationPair constructRangeLocationPair(
final GeoWaveRowRange range,
final String location,
final double cardinality ) {
return new AccumuloRangeLocationPair(
range,
location,
cardinality);
}
@Override
public GeoWaveInputSplit constructInputSplit(
final Map<PrimaryIndex, List<RangeLocationPair>> splitInfo,
final String[] locations ) {
return new GeoWaveAccumuloInputSplit(
splitInfo,
locations);
}
/**
* Initializes an Accumulo {@link TabletLocator} based on the configuration.
*
* @param instance
* the accumulo instance
* @param tableName
* the accumulo table name
* @return an Accumulo tablet locator
* @throws TableNotFoundException
* if the table name set on the configuration doesn't exist
*
*/
protected TabletLocator getTabletLocator(
final Object clientContextOrInstance,
final String tableId )
throws TableNotFoundException {
TabletLocator tabletLocator = null;
// @formatter:off
/*if[accumulo.api=1.6]
tabletLocator = TabletLocator.getLocator(
(Instance) clientContextOrInstance,
new Text(
tableId));
else[accumulo.api=1.6]*/
tabletLocator = TabletLocator.getLocator(
(ClientContext) clientContextOrInstance,
new Text(
tableId));
/*end[accumulo.api=1.6]*/
// @formatter:on
return tabletLocator;
}
protected static boolean binRanges(
final List<Range> rangeList,
final Object clientContextOrCredentials,
final Map<String, Map<KeyExtent, List<Range>>> tserverBinnedRanges,
final TabletLocator tabletLocator )
throws AccumuloException,
AccumuloSecurityException,
TableNotFoundException,
IOException {
// @formatter:off
/*if[accumulo.api=1.6]
return tabletLocator.binRanges(
(Credentials) clientContextOrCredentials,
rangeList,
tserverBinnedRanges).isEmpty();
else[accumulo.api=1.6]*/
return tabletLocator.binRanges(
(ClientContext) clientContextOrCredentials,
rangeList,
tserverBinnedRanges).isEmpty();
/*end[accumulo.api=1.6]*/
// @formatter:on
}
}