/* * Copyright 2014, Stratio. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.stratio.deep.cassandra.cql; import static com.google.common.collect.Iterables.concat; import static com.google.common.collect.Iterables.indexOf; import static com.google.common.collect.Iterables.transform; import static com.stratio.deep.commons.utils.Utils.quote; import java.net.InetAddress; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import javax.annotation.Nullable; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.dht.Token; import com.datastax.driver.core.Host; import com.datastax.driver.core.Metadata; import com.datastax.driver.core.ResultSet; import com.datastax.driver.core.Row; import com.datastax.driver.core.Session; import com.google.common.base.Function; import com.google.common.base.Predicate; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Ordering; import com.google.common.collect.Sets; import com.stratio.deep.cassandra.config.CassandraDeepJobConfig; import com.stratio.deep.cassandra.config.ICassandraDeepJobConfig; import com.stratio.deep.commons.exception.DeepGenericException; import com.stratio.deep.commons.rdd.DeepTokenRange; import com.stratio.deep.commons.utils.Pair; import com.stratio.deep.commons.utils.Utils; /** * Implementation that returns an instance of a {@link DeepRecordReader}. * * @author Luca Rosellini <luca@strat.io> */ public class RangeUtils { /** * private constructor. */ private RangeUtils() { } /** * Gets the list of token for each cluster machine.<br/> * The concrete class of the token depends on the partitioner used.<br/> * * @param query the query to execute against the given session to obtain the list of tokens. * @param sessionWithHost the pair object containing both the session and the name of the machine to which we're connected to. * @param partitioner the partitioner used in the cluster. * @return a map containing, for each cluster machine, the list of tokens. Tokens are not returned in any particular * order. */ static Map<String, Iterable<Comparable>> fetchTokens(String query, final Pair<Session, String> sessionWithHost, IPartitioner partitioner) { ResultSet rSet = sessionWithHost.left.execute(query); final AbstractType tkValidator = partitioner.getTokenValidator(); final Map<String, Iterable<Comparable>> tokens = Maps.newHashMap(); Iterable<Pair<String, Iterable<Comparable>>> pairs = transform(rSet.all(), new FetchTokensRowPairFunction(sessionWithHost, tkValidator)); for (Pair<String, Iterable<Comparable>> pair : pairs) { tokens.put(pair.left, pair.right); } return tokens; } /** * Merges the list of tokens for each cluster machine to a single list of token ranges. * * @param tokens the map of tokens for each cluster machine. * @param session the connection to the cluster. * @param p the partitioner used in the cluster. * @return the merged lists of tokens transformed to DeepTokenRange(s). The returned collection is shuffled. */ static List<DeepTokenRange> mergeTokenRanges(Map<String, Iterable<Comparable>> tokens, final Session session, final IPartitioner p) { final Iterable<Comparable> allRanges = Ordering.natural().sortedCopy(concat(tokens.values())); final Comparable maxValue = Ordering.natural().max(allRanges); final Comparable minValue = (Comparable) p.minValue(maxValue.getClass()).getToken().token; Function<Comparable, Set<DeepTokenRange>> map = new MergeTokenRangesFunction(maxValue, minValue, session, p, allRanges); Iterable<DeepTokenRange> concatenated = concat(transform(allRanges, map)); Set<DeepTokenRange> dedup = Sets.newHashSet(concatenated); return Ordering.natural().sortedCopy(dedup); } /** * Given a token, fetches the list of replica machines holding that token. * * @param token the token whose replicas we want to fetch. * @param session the connection to the cluster. * @param partitioner the partitioner used in the cluster. * @return the list of replica machines holding that token. */ private static List<String> initReplicas( final Comparable token, final Session session, final IPartitioner partitioner) { final AbstractType tkValidator = partitioner.getTokenValidator(); final Metadata metadata = session.getCluster().getMetadata(); @SuppressWarnings("unchecked") Set<Host> replicas = metadata.getReplicas(quote(session.getLoggedKeyspace()), ByteBuffer.wrap(token.toString().getBytes())); return Lists.newArrayList(Iterables.transform(replicas, new Function<Host, String>() { @Nullable @Override public String apply( @Nullable Host input) { assert input != null; return input.getAddress().getHostName(); } })); } /** * Returns the token ranges that will be mapped to Spark partitions. * * @param config the Deep configuration object. * @return the list of computed token ranges. */ public static List<DeepTokenRange> getSplits(CassandraDeepJobConfig config) { Map<String, Iterable<Comparable>> tokens = new HashMap<>(); IPartitioner p = getPartitioner(config); Pair<Session, String> sessionWithHost = CassandraClientProvider.getSession( config.getHost(), config, false); String queryLocal = "select tokens from system.local"; tokens.putAll(fetchTokens(queryLocal, sessionWithHost, p)); String queryPeers = "select peer, tokens from system.peers"; tokens.putAll(fetchTokens(queryPeers, sessionWithHost, p)); List<DeepTokenRange> merged = mergeTokenRanges(tokens, sessionWithHost.left, p); return splitRanges(merged, p, config.getBisectFactor()); } private static List<DeepTokenRange> splitRanges( final List<DeepTokenRange> ranges, final IPartitioner p, final int bisectFactor) { if (bisectFactor == 1) { return ranges; } Iterable<DeepTokenRange> bisectedRanges = concat(transform(ranges, new Function<DeepTokenRange, List<DeepTokenRange>>() { @Nullable @Override public List<DeepTokenRange> apply( @Nullable DeepTokenRange input) { final List<DeepTokenRange> splittedRanges = new ArrayList<>(); bisectTokeRange(input, p, bisectFactor, splittedRanges); return splittedRanges; } })); return Lists.newArrayList(bisectedRanges); } /** * Recursive function that splits a given token range to a given number of token ranges. * * @param range the token range to be splitted. * @param partitioner the cassandra partitioner. * @param bisectFactor the actual number of pieces the original token range will be splitted to. * @param accumulator a token range accumulator (ne */ private static void bisectTokeRange( DeepTokenRange range, final IPartitioner partitioner, final int bisectFactor, final List<DeepTokenRange> accumulator) { final AbstractType tkValidator = partitioner.getTokenValidator(); Token leftToken = partitioner.getTokenFactory().fromByteArray(tkValidator.decompose(range.getStartToken())); Token rightToken = partitioner.getTokenFactory().fromByteArray(tkValidator.decompose(range.getEndToken())); Token midToken = partitioner.midpoint(leftToken, rightToken); Comparable midpoint = (Comparable) tkValidator.compose(tkValidator.fromString(midToken.toString())); DeepTokenRange left = new DeepTokenRange(range.getStartToken(), midpoint, range.getReplicas()); DeepTokenRange right = new DeepTokenRange(midpoint, range.getEndToken(), range.getReplicas()); if (bisectFactor / 2 <= 1) { accumulator.add(left); accumulator.add(right); } else { bisectTokeRange(left, partitioner, bisectFactor / 2, accumulator); bisectTokeRange(right, partitioner, bisectFactor / 2, accumulator); } } /** * Creates a new instance of the cassandra partitioner configured in the configuration object. * * @param config the Deep configuration object. * @return an instance of the cassandra partitioner configured in the configuration object. */ public static IPartitioner getPartitioner(ICassandraDeepJobConfig config) { try { return (IPartitioner) Class.forName(config.getPartitionerClassName()).newInstance(); } catch (ClassNotFoundException | InstantiationException | IllegalAccessException e) { throw new DeepGenericException(e); } } private static class FetchTokensRowPairFunction implements Function<Row, Pair<String, Iterable<Comparable>>> { private final Pair<Session, String> sessionWithHost; private final AbstractType tkValidator; public FetchTokensRowPairFunction(Pair<Session, String> sessionWithHost, AbstractType tkValidator) { this.sessionWithHost = sessionWithHost; this.tkValidator = tkValidator; } @Nullable @Override public Pair<String, Iterable<Comparable>> apply(final @Nullable Row row) { assert row != null; InetAddress host; try { host = row.getInet("peer"); } catch (IllegalArgumentException e) { host = Utils.inetAddressFromLocation(sessionWithHost.right); } Iterable<Comparable> sortedTokens = transform(row.getSet("tokens", String.class), new Function<String, Comparable>() { @Nullable @Override public Comparable apply(final @Nullable String token) { return (Comparable) tkValidator.compose(tkValidator.fromString(token)); } } ); return Pair.create(host.getHostName(), sortedTokens); } } /** * Function that converts a partitioner hash to a token range. Takes into account the ring wrap-around range. */ private static class MergeTokenRangesFunction implements Function<Comparable, Set<DeepTokenRange>> { private final Comparable maxValue; private final Comparable minValue; private final Session session; private final IPartitioner partitioner; private final Iterable<Comparable> allRanges; public MergeTokenRangesFunction(Comparable maxValue, Comparable minValue, Session session, IPartitioner partitioner, Iterable<Comparable> allRanges) { this.maxValue = maxValue; this.minValue = minValue; this.session = session; this.partitioner = partitioner; this.allRanges = allRanges; } @Override public Set<DeepTokenRange> apply(final Comparable elem) { Comparable nextValue; Comparable currValue = elem; Set<DeepTokenRange> result = new HashSet<>(); if (currValue.equals(maxValue)) { result.add(new DeepTokenRange(currValue, minValue, initReplicas(currValue, session, partitioner))); currValue = minValue; nextValue = Iterables.find(allRanges, new Predicate<Comparable>() { @Override @SuppressWarnings("unchecked") public boolean apply( @Nullable Comparable input) { assert input != null; return input.compareTo(minValue) > 0; } }); } else { int nextIdx = 1 + indexOf(allRanges, new Predicate<Comparable>() { @Override public boolean apply( @Nullable Comparable input) { assert input != null; return input.equals(elem); } }); nextValue = Iterables.get(allRanges, nextIdx); } result.add(new DeepTokenRange(currValue, nextValue, initReplicas(currValue, session, partitioner))); return result; } } /** * Returns the token ranges that will be mapped to Spark partitions. * * @param config the Deep configuration object. * @return the list of computed token ranges. */ public static List<DeepTokenRange> getSplitsBySize( CassandraDeepJobConfig config) { IPartitioner p = getPartitioner(config); AbstractType tokenValidator = p.getTokenValidator(); Pair<Session, String> sessionWithHost = CassandraClientProvider .getSession(config.getHost(), config, false); String query = new StringBuilder("CALCULATE SPLITS FROM ") .append(config.getKeyspace()).append(".") .append(config.getTable()).append(" ESTIMATING ") .append(config.getSplitSize()).toString(); ResultSet rSet = sessionWithHost.left.execute(query); List<DeepTokenRange> tokens = new ArrayList<>(); for (Row row : rSet.all()) { Comparable startToken = (Comparable) tokenValidator.compose(row .getBytesUnsafe("start_token")); Comparable endToken = (Comparable) tokenValidator.compose(row .getBytesUnsafe("end_token")); List<String> replicas = new ArrayList<>(); for (InetAddress addres : row.getList("preferred_locations", InetAddress.class)) { replicas.add(addres.getHostName()); } tokens.add(new DeepTokenRange(startToken, endToken, replicas)); } return tokens; } }