ColumnFamilyRecordWriter.java example

Explorer
MyCassandra-master
- MyCassandra-0.2.1
package org.apache.cassandra.hadoop;

/*
 * 
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 * 
 */

import java.io.IOException;
import java.net.InetAddress;
import java.nio.ByteBuffer;
import java.util.*;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.TimeUnit;

import org.apache.cassandra.client.RingCache;
import org.apache.cassandra.dht.Range;
import org.apache.cassandra.thrift.*;
import org.apache.cassandra.utils.Pair;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.thrift.TException;
import org.apache.thrift.transport.TSocket;

import org.apache.cassandra.utils.ByteBufferUtil;

/**
 * The <code>ColumnFamilyRecordWriter</code> maps the output <key, value>
 * pairs to a Cassandra column family. In particular, it applies all mutations
 * in the value, which it associates with the key, and in turn the responsible
 * endpoint.
 * 
 * <p>
 * Furthermore, this writer groups the mutations by the endpoint responsible for
 * the rows being affected. This allows the mutations to be executed in parallel,
 * directly to a responsible endpoint.
 * </p>
 * 
 * @author Karthick Sankarachary
 * @see ColumnFamilyOutputFormat
 * @see OutputFormat
 * 
 */
final class ColumnFamilyRecordWriter extends RecordWriter<ByteBuffer,List<org.apache.cassandra.avro.Mutation>>
implements org.apache.hadoop.mapred.RecordWriter<ByteBuffer,List<org.apache.cassandra.avro.Mutation>>
{
    // The configuration this writer is associated with.
    private final Configuration conf;
    
    // The ring cache that describes the token ranges each node in the ring is
    // responsible for. This is what allows us to group the mutations by
    // the endpoints they should be targeted at. The targeted endpoint
    // essentially
    // acts as the primary replica for the rows being affected by the mutations.
    private final RingCache ringCache;
    
    // The number of mutations to buffer per endpoint
    private final int queueSize;

    // handles for clients for each range running in the threadpool
    private final Map<Range,RangeClient> clients;
    private final long batchThreshold;
    
    private final ConsistencyLevel consistencyLevel;


    /**
     * Upon construction, obtain the map that this writer will use to collect
     * mutations, and the ring cache for the given keyspace.
     * 
     * @param context the task attempt context
     * @throws IOException
     */
    ColumnFamilyRecordWriter(TaskAttemptContext context) throws IOException
    {
        this(context.getConfiguration());
    }
    
    ColumnFamilyRecordWriter(Configuration conf) throws IOException
    {
        this.conf = conf;
        this.ringCache = new RingCache(ConfigHelper.getOutputKeyspace(conf),
                                       ConfigHelper.getPartitioner(conf),
                                       ConfigHelper.getInitialAddress(conf),
                                       ConfigHelper.getRpcPort(conf));
        this.queueSize = conf.getInt(ColumnFamilyOutputFormat.QUEUE_SIZE, 32 * Runtime.getRuntime().availableProcessors());
        this.clients = new HashMap<Range,RangeClient>();
        batchThreshold = conf.getLong(ColumnFamilyOutputFormat.BATCH_THRESHOLD, 32);
        consistencyLevel = ConsistencyLevel.valueOf(ConfigHelper.getWriteConsistencyLevel(conf));
    }

    /**
     * If the key is to be associated with a valid value, a mutation is created
     * for it with the given column family and columns. In the event the value
     * in the column is missing (i.e., null), then it is marked for
     * {@link Deletion}. Similarly, if the entire value for a key is missing
     * (i.e., null), then the entire key is marked for {@link Deletion}.
     * </p>
     * 
     * @param keybuff
     *            the key to write.
     * @param value
     *            the value to write.
     * @throws IOException
     */
    @Override
    public void write(ByteBuffer keybuff, List<org.apache.cassandra.avro.Mutation> value) throws IOException
    {
        Range range = ringCache.getRange(keybuff);

        // get the client for the given range, or create a new one
        RangeClient client = clients.get(range);
        if (client == null)
        {
            // haven't seen keys for this range: create new client
            client = new RangeClient(ringCache.getEndpoint(range));
            client.start();
            clients.put(range, client);
        }

        for (org.apache.cassandra.avro.Mutation amut : value)
            client.put(new Pair<ByteBuffer,Mutation>(keybuff, avroToThrift(amut)));
    }

    /**
     * Deep copies the given Avro mutation into a new Thrift mutation.
     */
    private Mutation avroToThrift(org.apache.cassandra.avro.Mutation amut)
    {
        Mutation mutation = new Mutation();
        org.apache.cassandra.avro.ColumnOrSuperColumn acosc = amut.column_or_supercolumn;
        if (acosc == null)
        {
            // deletion
            assert amut.deletion != null;
            Deletion deletion = new Deletion(amut.deletion.timestamp);
            mutation.setDeletion(deletion);

            org.apache.cassandra.avro.SlicePredicate apred = amut.deletion.predicate;
            if (apred == null && amut.deletion.super_column == null)
            {
                // leave Deletion alone to delete entire row
            }
            else if (amut.deletion.super_column != null)
            {
                // super column
                deletion.setSuper_column(ByteBufferUtil.getArray(amut.deletion.super_column));
            }
            else if (apred.column_names != null)
            {
                // column names
                List<ByteBuffer> names = new ArrayList<ByteBuffer>(apred.column_names.size());
                for (ByteBuffer name : apred.column_names)
                    names.add(name);
                deletion.setPredicate(new SlicePredicate().setColumn_names(names));
            }
            else
            {
                // range
                deletion.setPredicate(new SlicePredicate().setSlice_range(avroToThrift(apred.slice_range)));
            }
        }
        else
        {
            // creation
            ColumnOrSuperColumn cosc = new ColumnOrSuperColumn();
            mutation.setColumn_or_supercolumn(cosc);
            if (acosc.column != null)
                // standard column
                cosc.setColumn(avroToThrift(acosc.column));
            else
            {
                // super column
                ByteBuffer scolname = acosc.super_column.name;
                List<Column> scolcols = new ArrayList<Column>(acosc.super_column.columns.size());
                for (org.apache.cassandra.avro.Column acol : acosc.super_column.columns)
                    scolcols.add(avroToThrift(acol));
                cosc.setSuper_column(new SuperColumn(scolname, scolcols));
            }
        }
        return mutation;
    }

    private SliceRange avroToThrift(org.apache.cassandra.avro.SliceRange asr)
    {
        return new SliceRange(asr.start, asr.finish, asr.reversed, asr.count);
    }

    private Column avroToThrift(org.apache.cassandra.avro.Column acol)
    {
        return new Column(acol.name, acol.value, acol.timestamp);
    }

    /**
     * Close this <code>RecordWriter</code> to future operations, but not before
     * flushing out the batched mutations.
     *
     * @param context the context of the task
     * @throws IOException
     */
    @Override
    public void close(TaskAttemptContext context) throws IOException, InterruptedException
    {
        close((org.apache.hadoop.mapred.Reporter)null);
    }

    /** Fills the deprecated RecordWriter interface for streaming. */
    @Deprecated
    public void close(org.apache.hadoop.mapred.Reporter reporter) throws IOException
    {
        for (RangeClient client : clients.values())
            client.stopNicely();
        try
        {
            for (RangeClient client : clients.values())
            {
                client.join();
                client.close();
            }
        }
        catch (InterruptedException e)
        {
            throw new AssertionError(e);
        }
    }

    /**
     * A client that runs in a threadpool and connects to the list of endpoints for a particular
     * range. Mutations for keys in that range are sent to this client via a queue.
     */
    public class RangeClient extends Thread
    {
        // The list of endpoints for this range
        private final List<InetAddress> endpoints;
        private final String columnFamily = ConfigHelper.getOutputColumnFamily(conf);
        // A bounded queue of incoming mutations for this range
        private final BlockingQueue<Pair<ByteBuffer, Mutation>> queue = new ArrayBlockingQueue<Pair<ByteBuffer,Mutation>>(queueSize);

        private volatile boolean run = true;
        private volatile IOException lastException;

        private Cassandra.Client thriftClient;
        private TSocket thriftSocket;

        /**
         * Constructs an {@link RangeClient} for the given endpoints.
         * @param endpoints the possible endpoints to execute the mutations on
         */
        public RangeClient(List<InetAddress> endpoints)
        {
            super("client-" + endpoints);
            this.endpoints = endpoints;
         }

        /**
         * enqueues the given value to Cassandra
         */
        public void put(Pair<ByteBuffer,Mutation> value) throws IOException
        {
            while (true)
            {
                if (lastException != null)
                    throw lastException;
                try
                {
                    if (queue.offer(value, 100, TimeUnit.MILLISECONDS))
                        break;
                }
                catch (InterruptedException e)
                {
                    throw new AssertionError(e);
                }
            }
        }

        public void stopNicely() throws IOException
        {
            if (lastException != null)
                throw lastException;
            run = false;
            interrupt();
        }

        public void close()
        {
            if (thriftSocket != null)
            {
                thriftSocket.close();
                thriftSocket = null;
                thriftClient = null;
            }
        }

        /**
         * Loops collecting mutations from the queue and sending to Cassandra
         */
        public void run()
        {
            outer:
            while (run || !queue.isEmpty())
            {
                Pair<ByteBuffer, Mutation> mutation;
                try
                {
                    mutation = queue.take();
                }
                catch (InterruptedException e)
                {
                    // re-check loop condition after interrupt
                    continue;
                }

                Map<ByteBuffer, Map<String, List<Mutation>>> batch = new HashMap<ByteBuffer, Map<String, List<Mutation>>>();
                while (mutation != null)
                {
                    Map<String, List<Mutation>> subBatch = batch.get(mutation.left);
                    if (subBatch == null)
                    {
                        subBatch = Collections.singletonMap(columnFamily, (List<Mutation>) new ArrayList<Mutation>());
                        batch.put(mutation.left, subBatch);
                    }

                    subBatch.get(columnFamily).add(mutation.right);
                    if (batch.size() >= batchThreshold)
                        break;

                    mutation = queue.poll();
                }

                Iterator<InetAddress> iter = endpoints.iterator();
                while (true)
                {
                    // send the mutation to the last-used endpoint.  first time through, this will NPE harmlessly.
                    try
                    {
                        thriftClient.batch_mutate(batch, consistencyLevel);
                        break;
                    }
                    catch (Exception e)
                    {
                        close();
                        if (!iter.hasNext())
                        {
                            lastException = new IOException(e);
                            break outer;
                        }
                    }

                    // attempt to connect to a different endpoint
                    try
                    {
                        InetAddress address = iter.next();
                        thriftSocket = new TSocket(address.getHostName(), ConfigHelper.getRpcPort(conf));
                        thriftClient = ColumnFamilyOutputFormat.createAuthenticatedClient(thriftSocket, conf);
                    }
                    catch (Exception e)
                    {
                        close();
                        // TException means something unexpected went wrong to that endpoint, so
                        // we should try again to another.  Other exceptions (auth or invalid request) are fatal.
                        if ((!(e instanceof TException)) || !iter.hasNext())
                        {
                            lastException = new IOException(e);
                            break outer;
                        }
                    }
                }
            }
        }

        @Override
        public String toString()
        {
            return "#<Client for " + endpoints.toString() + ">";
        }
    }
}