CqlBulkRecordWriter.java example

Explorer
stratio-cassandra-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.cassandra.hadoop.cql3;

import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;

import org.apache.cassandra.config.CFMetaData;
import org.apache.cassandra.exceptions.InvalidRequestException;
import org.apache.cassandra.hadoop.AbstractBulkRecordWriter;
import org.apache.cassandra.hadoop.BulkRecordWriter;
import org.apache.cassandra.hadoop.ConfigHelper;
import org.apache.cassandra.hadoop.HadoopCompat;
import org.apache.cassandra.io.sstable.CQLSSTableWriter;
import org.apache.cassandra.io.sstable.SSTableLoader;
import org.apache.cassandra.io.util.FileUtils;
import org.apache.cassandra.streaming.StreamState;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.util.Progressable;

/**
 * The <code>CqlBulkRecordWriter</code> maps the output <key, value>
 * pairs to a Cassandra column family. In particular, it applies the binded variables
 * in the value to the prepared statement, which it associates with the key, and in 
 * turn the responsible endpoint.
 *
 * <p>
 * Furthermore, this writer groups the cql queries by the endpoint responsible for
 * the rows being affected. This allows the cql queries to be executed in parallel,
 * directly to a responsible endpoint.
 * </p>
 *
 * @see CqlBulkOutputFormat
 */
public class CqlBulkRecordWriter extends AbstractBulkRecordWriter<Object, List<ByteBuffer>>
{
    private String keyspace;
    private String columnFamily;
    private String schema;
    private String insertStatement;
    private File outputDir;
    private boolean deleteSrc;

    CqlBulkRecordWriter(TaskAttemptContext context) throws IOException
    {
        super(context);
        setConfigs();
    }

    CqlBulkRecordWriter(Configuration conf, Progressable progress) throws IOException
    {
        super(conf, progress);
        setConfigs();
    }

    CqlBulkRecordWriter(Configuration conf) throws IOException
    {
        super(conf);
        setConfigs();
    }
    
    private void setConfigs() throws IOException
    {
        // if anything is missing, exceptions will be thrown here, instead of on write()
        keyspace = ConfigHelper.getOutputKeyspace(conf);
        columnFamily = ConfigHelper.getOutputColumnFamily(conf);
        schema = CqlBulkOutputFormat.getColumnFamilySchema(conf, columnFamily);
        insertStatement = CqlBulkOutputFormat.getColumnFamilyInsertStatement(conf, columnFamily);
        outputDir = getColumnFamilyDirectory();
        deleteSrc = CqlBulkOutputFormat.getDeleteSourceOnSuccess(conf);
    }

    
    private void prepareWriter() throws IOException
    {
        try
        {
            if (writer == null)
            {
                writer = CQLSSTableWriter.builder()
                    .forTable(schema)
                    .using(insertStatement)
                    .withPartitioner(ConfigHelper.getOutputPartitioner(conf))
                    .inDirectory(outputDir)
                    .withBufferSizeInMB(Integer.parseInt(conf.get(BUFFER_SIZE_IN_MB, "64")))
                    .build();
            }
            if (loader == null)
            {
                ExternalClient externalClient = new ExternalClient(conf);
                
                externalClient.addKnownCfs(keyspace, schema);

                this.loader = new SSTableLoader(outputDir, externalClient, new BulkRecordWriter.NullOutputHandler()) {
                    @Override
                    public void onSuccess(StreamState finalState)
                    {
                        if (deleteSrc)
                            FileUtils.deleteRecursive(outputDir);
                    }
                };
            }
        }
        catch (Exception e)
        {
            throw new IOException(e);
        }      
    }
    
    /**
     * The column values must correspond to the order in which
     * they appear in the insert stored procedure. 
     * 
     * Key is not used, so it can be null or any object.
     * </p>
     *
     * @param key
     *            any object or null.
     * @param values
     *            the values to write.
     * @throws IOException
     */
    @Override
    public void write(Object key, List<ByteBuffer> values) throws IOException
    {
        prepareWriter();
        try
        {
            ((CQLSSTableWriter) writer).rawAddRow(values);
            
            if (null != progress)
                progress.progress();
            if (null != context)
                HadoopCompat.progress(context);
        } 
        catch (InvalidRequestException e)
        {
            throw new IOException("Error adding row with key: " + key, e);
        }
    }
    
    private File getColumnFamilyDirectory() throws IOException
    {
        File dir = new File(String.format("%s%s%s%s%s-%s", getOutputLocation(), File.separator, keyspace, File.separator, columnFamily, UUID.randomUUID().toString()));
        
        if (!dir.exists() && !dir.mkdirs())
        {
            throw new IOException("Failed to created output directory: " + dir);
        }
        
        return dir;
    }
    
    public static class ExternalClient extends AbstractBulkRecordWriter.ExternalClient
    {
        private Map<String, Map<String, CFMetaData>> knownCqlCfs = new HashMap<>();
        
        public ExternalClient(Configuration conf)
        {
            super(conf);
        }

        public void addKnownCfs(String keyspace, String cql)
        {
            Map<String, CFMetaData> cfs = knownCqlCfs.get(keyspace);
            
            if (cfs == null)
            {
                cfs = new HashMap<>();
                knownCqlCfs.put(keyspace, cfs);
            }
            
            CFMetaData metadata = CFMetaData.compile(cql, keyspace);
            cfs.put(metadata.cfName, metadata);
        }
        
        @Override
        public CFMetaData getCFMetaData(String keyspace, String cfName)
        {
            CFMetaData metadata = super.getCFMetaData(keyspace, cfName);
            if (metadata != null)
            {
                return metadata;
            }
            
            Map<String, CFMetaData> cfs = knownCqlCfs.get(keyspace);
            return cfs != null ? cfs.get(cfName) : null;
        }
    }
}