/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.cassandra.streaming; import java.io.*; import java.nio.channels.Channels; import java.nio.channels.ReadableByteChannel; import java.util.Collection; import java.util.UUID; import com.google.common.base.Preconditions; import com.google.common.base.Throwables; import com.google.common.collect.UnmodifiableIterator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.ning.compress.lzf.LZFInputStream; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.schema.TableMetadata; import org.apache.cassandra.db.*; import org.apache.cassandra.db.rows.*; import org.apache.cassandra.io.sstable.SSTableMultiWriter; import org.apache.cassandra.io.sstable.SSTableSimpleIterator; import org.apache.cassandra.io.sstable.format.RangeAwareSSTableWriter; import org.apache.cassandra.io.sstable.format.SSTableFormat; import org.apache.cassandra.io.sstable.format.Version; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.streaming.messages.FileMessageHeader; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.io.util.TrackedInputStream; import org.apache.cassandra.utils.FBUtilities; import org.apache.cassandra.utils.Pair; /** * StreamReader reads from stream and writes to SSTable. */ public class StreamReader { private static final Logger logger = LoggerFactory.getLogger(StreamReader.class); protected final TableId tableId; protected final long estimatedKeys; protected final Collection<Pair<Long, Long>> sections; protected final StreamSession session; protected final Version inputVersion; protected final long repairedAt; protected final UUID pendingRepair; protected final SSTableFormat.Type format; protected final int sstableLevel; protected final SerializationHeader.Component header; protected final int fileSeqNum; public StreamReader(FileMessageHeader header, StreamSession session) { if (session.getPendingRepair() != null) { // we should only ever be streaming pending repair // sstables if the session has a pending repair id assert session.getPendingRepair().equals(header.pendingRepair); } this.session = session; this.tableId = header.tableId; this.estimatedKeys = header.estimatedKeys; this.sections = header.sections; this.inputVersion = header.version; this.repairedAt = header.repairedAt; this.pendingRepair = header.pendingRepair; this.format = header.format; this.sstableLevel = header.sstableLevel; this.header = header.header; this.fileSeqNum = header.sequenceNumber; } /** * @param channel where this reads data from * @return SSTable transferred * @throws IOException if reading the remote sstable fails. Will throw an RTE if local write fails. */ @SuppressWarnings("resource") // channel needs to remain open, streams on top of it can't be closed public SSTableMultiWriter read(ReadableByteChannel channel) throws IOException { long totalSize = totalSize(); ColumnFamilyStore cfs = ColumnFamilyStore.getIfExists(tableId); if (cfs == null) { // schema was dropped during streaming throw new IOException("CF " + tableId + " was dropped during streaming"); } logger.debug("[Stream #{}] Start receiving file #{} from {}, repairedAt = {}, size = {}, ks = '{}', table = '{}', pendingRepair = '{}'.", session.planId(), fileSeqNum, session.peer, repairedAt, totalSize, cfs.keyspace.getName(), cfs.getTableName(), pendingRepair); TrackedInputStream in = new TrackedInputStream(new LZFInputStream(Channels.newInputStream(channel))); StreamDeserializer deserializer = new StreamDeserializer(cfs.metadata(), in, inputVersion, getHeader(cfs.metadata())); SSTableMultiWriter writer = null; try { writer = createWriter(cfs, totalSize, repairedAt, pendingRepair, format); while (in.getBytesRead() < totalSize) { writePartition(deserializer, writer); // TODO move this to BytesReadTracker session.progress(writer.getFilename(), ProgressInfo.Direction.IN, in.getBytesRead(), totalSize); } logger.debug("[Stream #{}] Finished receiving file #{} from {} readBytes = {}, totalSize = {}", session.planId(), fileSeqNum, session.peer, FBUtilities.prettyPrintMemory(in.getBytesRead()), FBUtilities.prettyPrintMemory(totalSize)); return writer; } catch (Throwable e) { logger.warn("[Stream {}] Error while reading partition {} from stream on ks='{}' and table='{}'.", session.planId(), deserializer.partitionKey(), cfs.keyspace.getName(), cfs.getTableName(), e); if (writer != null) { writer.abort(e); } throw Throwables.propagate(e); } } protected SerializationHeader getHeader(TableMetadata metadata) { return header != null? header.toHeader(metadata) : null; //pre-3.0 sstable have no SerializationHeader } protected SSTableMultiWriter createWriter(ColumnFamilyStore cfs, long totalSize, long repairedAt, UUID pendingRepair, SSTableFormat.Type format) throws IOException { Directories.DataDirectory localDir = cfs.getDirectories().getWriteableLocation(totalSize); if (localDir == null) throw new IOException(String.format("Insufficient disk space to store %s", FBUtilities.prettyPrintMemory(totalSize))); RangeAwareSSTableWriter writer = new RangeAwareSSTableWriter(cfs, estimatedKeys, repairedAt, pendingRepair, format, sstableLevel, totalSize, session.getTransaction(tableId), getHeader(cfs.metadata())); StreamHook.instance.reportIncomingFile(cfs, writer, session, fileSeqNum); return writer; } protected long totalSize() { long size = 0; for (Pair<Long, Long> section : sections) size += section.right - section.left; return size; } protected void writePartition(StreamDeserializer deserializer, SSTableMultiWriter writer) throws IOException { writer.append(deserializer.newPartition()); deserializer.checkForExceptions(); } public static class StreamDeserializer extends UnmodifiableIterator<Unfiltered> implements UnfilteredRowIterator { private final TableMetadata metadata; private final DataInputPlus in; private final SerializationHeader header; private final SerializationHelper helper; private DecoratedKey key; private DeletionTime partitionLevelDeletion; private SSTableSimpleIterator iterator; private Row staticRow; private IOException exception; public StreamDeserializer(TableMetadata metadata, InputStream in, Version version, SerializationHeader header) throws IOException { this.metadata = metadata; this.in = new DataInputPlus.DataInputStreamPlus(in); this.helper = new SerializationHelper(metadata, version.correspondingMessagingVersion(), SerializationHelper.Flag.PRESERVE_SIZE); this.header = header; } public StreamDeserializer newPartition() throws IOException { key = metadata.partitioner.decorateKey(ByteBufferUtil.readWithShortLength(in)); partitionLevelDeletion = DeletionTime.serializer.deserialize(in); iterator = SSTableSimpleIterator.create(metadata, in, header, helper, partitionLevelDeletion); staticRow = iterator.readStaticRow(); return this; } public TableMetadata metadata() { return metadata; } public RegularAndStaticColumns columns() { // We don't know which columns we'll get so assume it can be all of them return metadata.regularAndStaticColumns(); } public boolean isReverseOrder() { return false; } public DecoratedKey partitionKey() { return key; } public DeletionTime partitionLevelDeletion() { return partitionLevelDeletion; } public Row staticRow() { return staticRow; } public EncodingStats stats() { return header.stats(); } public boolean hasNext() { try { return iterator.hasNext(); } catch (IOError e) { if (e.getCause() != null && e.getCause() instanceof IOException) { exception = (IOException)e.getCause(); return false; } throw e; } } public Unfiltered next() { // Note that in practice we know that IOException will be thrown by hasNext(), because that's // where the actual reading happens, so we don't bother catching RuntimeException here (contrarily // to what we do in hasNext) Unfiltered unfiltered = iterator.next(); return metadata.isCounter() && unfiltered.kind() == Unfiltered.Kind.ROW ? maybeMarkLocalToBeCleared((Row) unfiltered) : unfiltered; } private Row maybeMarkLocalToBeCleared(Row row) { return metadata.isCounter() ? row.markCounterLocalToBeCleared() : row; } public void checkForExceptions() throws IOException { if (exception != null) throw exception; } public void close() { } } }