/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.cassandra.io.sstable;
import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Map;
import java.util.TreeMap;
import org.apache.cassandra.config.CFMetaData;
import org.apache.cassandra.db.*;
import org.apache.cassandra.db.marshal.AbstractType;
import org.apache.cassandra.service.StorageService;
import org.apache.cassandra.utils.ByteBufferUtil;
/**
* A SSTable writer that doesn't assume rows are in sorted order.
* This writer buffers rows in memory and then write them all in sorted order.
* To avoid loading the entire data set in memory, the amount of rows buffered
* is configurable. Each time the threshold is met, one SSTable will be
* created (and the buffer be reseted).
*
* @see AbstractSSTableSimpleWriter
*/
public class SSTableSimpleUnsortedWriter extends AbstractSSTableSimpleWriter
{
private final Map<DecoratedKey, ColumnFamily> keys = new TreeMap<DecoratedKey, ColumnFamily>();
private final long bufferSize;
private long currentSize;
/**
* Create a new buffering writer.
* @param directory the directory where to write the sstables
* @param keyspace the keyspace name
* @param columnFamily the column family name
* @param comparator the column family comparator
* @param subComparator the column family subComparator or null if not a Super column family.
* @param bufferSizeInMB the data size in MB before which a sstable is written and the buffer reseted. This correspond roughly to the written
* data size (i.e. the size of the create sstable). The actual size used in memory will be higher (by how much depends on the size of the
* columns you add). For 1GB of heap, a 128 bufferSizeInMB is probably a reasonable choice. If you experience OOM, this value should be lowered.
*/
public SSTableSimpleUnsortedWriter(File directory,
String keyspace,
String columnFamily,
AbstractType comparator,
AbstractType subComparator,
int bufferSizeInMB) throws IOException
{
super(directory, new CFMetaData(keyspace, columnFamily, subComparator == null ? ColumnFamilyType.Standard : ColumnFamilyType.Super, comparator, subComparator));
this.bufferSize = bufferSizeInMB * 1024L * 1024L;
}
protected void writeRow(DecoratedKey key, ColumnFamily columnFamily) throws IOException
{
currentSize += key.key.remaining() + columnFamily.serializedSize() * 1.2;
if (currentSize > bufferSize)
sync();
}
protected ColumnFamily getColumnFamily()
{
ColumnFamily previous = keys.get(currentKey);
// If the CF already exist in memory, we'll just continue adding to it
if (previous == null)
{
previous = ColumnFamily.create(metadata);
keys.put(currentKey, previous);
}
else
{
// We will reuse a CF that we have counted already. But because it will be easier to add the full size
// of the CF in the next writeRow call than to find out the delta, we just remove the size until that next call
currentSize -= currentKey.key.remaining() + previous.serializedSize() * 1.2;
}
return previous;
}
public void close() throws IOException
{
sync();
}
private void sync() throws IOException
{
if (keys.isEmpty())
return;
SSTableWriter writer = getWriter();
for (Map.Entry<DecoratedKey, ColumnFamily> entry : keys.entrySet())
{
writer.append(entry.getKey(), entry.getValue());
}
writer.closeAndOpenReader();
currentSize = 0;
keys.clear();
}
}