/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.cassandra.hadoop.cql3; import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.UUID; import org.apache.cassandra.config.CFMetaData; import org.apache.cassandra.exceptions.InvalidRequestException; import org.apache.cassandra.hadoop.AbstractBulkRecordWriter; import org.apache.cassandra.hadoop.BulkRecordWriter; import org.apache.cassandra.hadoop.ConfigHelper; import org.apache.cassandra.hadoop.HadoopCompat; import org.apache.cassandra.io.sstable.CQLSSTableWriter; import org.apache.cassandra.io.sstable.SSTableLoader; import org.apache.cassandra.io.util.FileUtils; import org.apache.cassandra.streaming.StreamState; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.util.Progressable; /** * The <code>CqlBulkRecordWriter</code> maps the output <key, value> * pairs to a Cassandra column family. In particular, it applies the binded variables * in the value to the prepared statement, which it associates with the key, and in * turn the responsible endpoint. * * <p> * Furthermore, this writer groups the cql queries by the endpoint responsible for * the rows being affected. This allows the cql queries to be executed in parallel, * directly to a responsible endpoint. * </p> * * @see CqlBulkOutputFormat */ public class CqlBulkRecordWriter extends AbstractBulkRecordWriter<Object, List<ByteBuffer>> { private String keyspace; private String columnFamily; private String schema; private String insertStatement; private File outputDir; private boolean deleteSrc; CqlBulkRecordWriter(TaskAttemptContext context) throws IOException { super(context); setConfigs(); } CqlBulkRecordWriter(Configuration conf, Progressable progress) throws IOException { super(conf, progress); setConfigs(); } CqlBulkRecordWriter(Configuration conf) throws IOException { super(conf); setConfigs(); } private void setConfigs() throws IOException { // if anything is missing, exceptions will be thrown here, instead of on write() keyspace = ConfigHelper.getOutputKeyspace(conf); columnFamily = ConfigHelper.getOutputColumnFamily(conf); schema = CqlBulkOutputFormat.getColumnFamilySchema(conf, columnFamily); insertStatement = CqlBulkOutputFormat.getColumnFamilyInsertStatement(conf, columnFamily); outputDir = getColumnFamilyDirectory(); deleteSrc = CqlBulkOutputFormat.getDeleteSourceOnSuccess(conf); } private void prepareWriter() throws IOException { try { if (writer == null) { writer = CQLSSTableWriter.builder() .forTable(schema) .using(insertStatement) .withPartitioner(ConfigHelper.getOutputPartitioner(conf)) .inDirectory(outputDir) .withBufferSizeInMB(Integer.parseInt(conf.get(BUFFER_SIZE_IN_MB, "64"))) .build(); } if (loader == null) { ExternalClient externalClient = new ExternalClient(conf); externalClient.addKnownCfs(keyspace, schema); this.loader = new SSTableLoader(outputDir, externalClient, new BulkRecordWriter.NullOutputHandler()) { @Override public void onSuccess(StreamState finalState) { if (deleteSrc) FileUtils.deleteRecursive(outputDir); } }; } } catch (Exception e) { throw new IOException(e); } } /** * The column values must correspond to the order in which * they appear in the insert stored procedure. * * Key is not used, so it can be null or any object. * </p> * * @param key * any object or null. * @param values * the values to write. * @throws IOException */ @Override public void write(Object key, List<ByteBuffer> values) throws IOException { prepareWriter(); try { ((CQLSSTableWriter) writer).rawAddRow(values); if (null != progress) progress.progress(); if (null != context) HadoopCompat.progress(context); } catch (InvalidRequestException e) { throw new IOException("Error adding row with key: " + key, e); } } private File getColumnFamilyDirectory() throws IOException { File dir = new File(String.format("%s%s%s%s%s-%s", getOutputLocation(), File.separator, keyspace, File.separator, columnFamily, UUID.randomUUID().toString())); if (!dir.exists() && !dir.mkdirs()) { throw new IOException("Failed to created output directory: " + dir); } return dir; } public static class ExternalClient extends AbstractBulkRecordWriter.ExternalClient { private Map<String, Map<String, CFMetaData>> knownCqlCfs = new HashMap<>(); public ExternalClient(Configuration conf) { super(conf); } public void addKnownCfs(String keyspace, String cql) { Map<String, CFMetaData> cfs = knownCqlCfs.get(keyspace); if (cfs == null) { cfs = new HashMap<>(); knownCqlCfs.put(keyspace, cfs); } CFMetaData metadata = CFMetaData.compile(cql, keyspace); cfs.put(metadata.cfName, metadata); } @Override public CFMetaData getCFMetaData(String keyspace, String cfName) { CFMetaData metadata = super.getCFMetaData(keyspace, cfName); if (metadata != null) { return metadata; } Map<String, CFMetaData> cfs = knownCqlCfs.get(keyspace); return cfs != null ? cfs.get(cfName) : null; } } }