/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.cassandra.db;
import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import org.apache.log4j.Logger;
import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.dht.IPartitioner;
import org.apache.cassandra.io.SSTableReader;
import org.apache.cassandra.io.SSTableWriter;
import org.apache.cassandra.service.StorageService;
import org.apache.cassandra.utils.ReentrantByteArrayInputStream;
import org.apache.cassandra.utils.WrappedRunnable;
import org.cliffc.high_scale_lib.NonBlockingHashMap;
public class BinaryMemtable implements IFlushable
{
private static final Logger logger = Logger.getLogger(BinaryMemtable.class);
private final int threshold = DatabaseDescriptor.getBMTThreshold() * 1024 * 1024;
private final AtomicInteger currentSize = new AtomicInteger(0);
/* Table and ColumnFamily name are used to determine the ColumnFamilyStore */
private boolean isFrozen = false;
private final Map<DecoratedKey, byte[]> columnFamilies = new NonBlockingHashMap<DecoratedKey, byte[]>();
/* Lock and Condition for notifying new clients about Memtable switches */
private final Lock lock = new ReentrantLock();
Condition condition;
private final IPartitioner partitioner = StorageService.getPartitioner();
private final ColumnFamilyStore cfs;
public BinaryMemtable(ColumnFamilyStore cfs)
{
this.cfs = cfs;
condition = lock.newCondition();
}
boolean isThresholdViolated()
{
return currentSize.get() >= threshold;
}
/*
* This version is used by the external clients to put data into
* the memtable. This version will respect the threshold and flush
* the memtable to disk when the size exceeds the threshold.
*/
void put(String key, byte[] buffer)
{
if (isThresholdViolated())
{
lock.lock();
try
{
if (!isFrozen)
{
isFrozen = true;
cfs.submitFlush(this);
cfs.switchBinaryMemtable(key, buffer);
}
else
{
cfs.applyBinary(key, buffer);
}
}
finally
{
lock.unlock();
}
}
else
{
resolve(key, buffer);
}
}
public boolean isClean()
{
return columnFamilies.isEmpty();
}
private void resolve(String key, byte[] buffer)
{
columnFamilies.put(partitioner.decorateKey(key), buffer);
currentSize.addAndGet(buffer.length + key.length());
}
private List<DecoratedKey> getSortedKeys()
{
assert !columnFamilies.isEmpty();
logger.info("Sorting " + this);
List<DecoratedKey> keys = new ArrayList<DecoratedKey>(columnFamilies.keySet());
Collections.sort(keys);
return keys;
}
private SSTableReader writeSortedContents(List<DecoratedKey> sortedKeys) throws IOException
{
logger.info("Writing " + this);
String path = cfs.getFlushPath();
SSTableWriter writer = new SSTableWriter(path, sortedKeys.size(), sortedKeys.size()*10, StorageService.getPartitioner());
boolean bloomColumns = writer.getBloomFilterWriter().isBloomColumns();
ObservingColumnFamilyDeserializer observer=null;
DataInputStream din = null;
ReentrantByteArrayInputStream bin = null;
if (bloomColumns)
{
observer = new ObservingColumnFamilyDeserializer(writer.getBloomFilterWriter());
bin = new ReentrantByteArrayInputStream(new byte[0]);
din = new DataInputStream(bin);
}
for (DecoratedKey key : sortedKeys)
{
byte[] bytes = columnFamilies.get(key);
assert bytes.length > 0;
writer.append(key, bytes);
if (observer!=null)
{
bin.reset(bytes);
observer.deserialize(key, din );
}
}
SSTableReader sstable = writer.closeAndOpenReader();
logger.info("Completed flushing " + writer.getFilename());
return sstable;
}
public void flushAndSignal(final Condition condition, ExecutorService sorter, final ExecutorService writer)
{
sorter.submit(new Runnable()
{
public void run()
{
final List<DecoratedKey> sortedKeys = getSortedKeys();
writer.submit(new WrappedRunnable()
{
public void runMayThrow() throws IOException
{
cfs.addSSTable(writeSortedContents(sortedKeys));
condition.signalAll();
}
});
}
});
}
}