/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.cassandra.streaming; import java.io.IOError; import java.io.IOException; import java.net.InetAddress; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import org.apache.commons.lang.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.cassandra.db.Table; import org.apache.cassandra.dht.Range; import org.apache.cassandra.io.sstable.Descriptor; import org.apache.cassandra.io.sstable.SSTable; import org.apache.cassandra.io.sstable.SSTableReader; import org.apache.cassandra.utils.Pair; /** * This class handles streaming data from one node to another. * * The source node is in charge of the streaming session. It begins the stream by sending * a Message with the stream bit flag in the Header turned on. Part of that Message * will include a StreamHeader that includes the files that will be streamed as part * of that session, as well as the first file-to-be-streamed. (Combining session list * and first file like this is inconvenient, but not as inconvenient as the old * three-part send-file-list, wait-for-ack, start-first-file dance.) * * After each file, the target will send a StreamReply indicating success * (FILE_FINISHED) or failure (FILE_RETRY). * * When all files have been successfully transferred and integrated the source will send * SESSION_FINISHED and the session is complete. * * For Stream requests (for bootstrap), one subtlety is that we always have to * create at least one stream reply, even if the list of files is empty, otherwise the * target has no way to know that it can stop waiting for an answer. * */ public class StreamOut { private static Logger logger = LoggerFactory.getLogger(StreamOut.class); /** * Split out files for all tables on disk locally for each range and then stream them to the target endpoint. */ public static void transferRanges(InetAddress target, String tableName, Collection<Range> ranges, Runnable callback) { assert ranges.size() > 0; // this is so that this target shows up as a destination while anticompaction is happening. StreamOutSession session = StreamOutSession.create(tableName, target, callback); logger.info("Beginning transfer to {}", target); logger.debug("Ranges are {}", StringUtils.join(ranges, ",")); try { Table table = flushSSTable(tableName); // send the matching portion of every sstable in the keyspace transferSSTables(session, table.getAllSSTables(), ranges); } catch (IOException e) { throw new IOError(e); } } /** * (1) dump all the memtables to disk. * (2) determine the minimal file sections we need to send for the given ranges * (3) transfer the data. */ private static Table flushSSTable(String tableName) throws IOException { Table table = Table.open(tableName); logger.info("Flushing memtables for {}...", tableName); for (Future f : table.flush()) { try { f.get(); } catch (InterruptedException e) { throw new RuntimeException(e); } catch (ExecutionException e) { throw new RuntimeException(e); } } return table; } /** * Split out files for all tables on disk locally for each range and then stream them to the target endpoint. */ public static void transferRangesForRequest(StreamOutSession session, Collection<Range> ranges) { assert ranges.size() > 0; logger.info("Beginning transfer to {}", session.getHost()); logger.debug("Ranges are {}", StringUtils.join(ranges, ",")); try { Table table = flushSSTable(session.table); // send the matching portion of every sstable in the keyspace List<PendingFile> pending = createPendingFiles(table.getAllSSTables(), ranges); session.addFilesToStream(pending); session.begin(); } catch (IOException e) { throw new IOError(e); } } /** * Transfers matching portions of a group of sstables from a single table to the target endpoint. */ public static void transferSSTables(StreamOutSession session, Collection<SSTableReader> sstables, Collection<Range> ranges) throws IOException { List<PendingFile> pending = createPendingFiles(sstables, ranges); if (pending.size() > 0) { session.addFilesToStream(pending); session.begin(); } else { session.close(); } } // called prior to sending anything. private static List<PendingFile> createPendingFiles(Collection<SSTableReader> sstables, Collection<Range> ranges) { List<PendingFile> pending = new ArrayList<PendingFile>(); for (SSTableReader sstable : sstables) { Descriptor desc = sstable.descriptor; List<Pair<Long,Long>> sections = sstable.getPositionsForRanges(ranges); if (sections.isEmpty()) continue; pending.add(new PendingFile(sstable, desc, SSTable.COMPONENT_DATA, sections)); } logger.info("Stream context metadata {}, {} sstables.", pending, sstables.size()); return pending; } }