/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.cassandra.db; import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; import java.util.Collection; import java.util.Comparator; import java.util.Iterator; import java.util.Map; import java.util.concurrent.ConcurrentNavigableMap; import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import com.google.common.collect.Iterators; import com.google.common.collect.PeekingIterator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.cassandra.db.columniterator.IColumnIterator; import org.apache.cassandra.db.columniterator.SimpleAbstractColumnIterator; import org.apache.cassandra.db.filter.AbstractColumnIterator; import org.apache.cassandra.db.filter.NamesQueryFilter; import org.apache.cassandra.db.filter.SliceQueryFilter; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.io.sstable.SSTableReader; import org.apache.cassandra.io.sstable.SSTableWriter; import org.apache.cassandra.utils.WrappedRunnable; public class Memtable implements Comparable<Memtable>, IFlushable { private static final Logger logger = LoggerFactory.getLogger(Memtable.class); private volatile boolean isFrozen; private final AtomicLong currentThroughput = new AtomicLong(0); private final AtomicLong currentOperations = new AtomicLong(0); private final long creationTime; private final ConcurrentNavigableMap<DecoratedKey, ColumnFamily> columnFamilies = new ConcurrentSkipListMap<DecoratedKey, ColumnFamily>(); public final ColumnFamilyStore cfs; private final long THRESHOLD; private final long THRESHOLD_COUNT; public Memtable(ColumnFamilyStore cfs) { this.cfs = cfs; creationTime = System.currentTimeMillis(); THRESHOLD = cfs.getMemtableThroughputInMB() * 1024L * 1024L; THRESHOLD_COUNT = (long) (cfs.getMemtableOperationsInMillions() * 1024 * 1024); } /** * Compares two Memtable based on creation time. * @param rhs Memtable to compare to. * @return a negative integer, zero, or a positive integer as this object * is less than, equal to, or greater than the specified object. */ public int compareTo(Memtable rhs) { long diff = creationTime - rhs.creationTime; if ( diff > 0 ) return 1; else if ( diff < 0 ) return -1; else return 0; } public long getCurrentThroughput() { return currentThroughput.get(); } public long getCurrentOperations() { return currentOperations.get(); } boolean isThresholdViolated() { return currentThroughput.get() >= this.THRESHOLD || currentOperations.get() >= this.THRESHOLD_COUNT; } boolean isFrozen() { return isFrozen; } void freeze() { isFrozen = true; } /** * Should only be called by ColumnFamilyStore.apply. NOT a public API. * (CFS handles locking to avoid submitting an op * to a flushing memtable. Any other way is unsafe.) */ void put(DecoratedKey key, ColumnFamily columnFamily) { assert !isFrozen; // not 100% foolproof but hell, it's an assert resolve(key, columnFamily); } private void resolve(DecoratedKey key, ColumnFamily cf) { currentThroughput.addAndGet(cf.size()); currentOperations.addAndGet((cf.getColumnCount() == 0) ? cf.isMarkedForDelete() ? 1 : 0 : cf.getColumnCount()); ColumnFamily oldCf = columnFamilies.putIfAbsent(key, cf); if (oldCf == null) return; oldCf.resolve(cf); } // for debugging public String contents() { StringBuilder builder = new StringBuilder(); builder.append("{"); for (Map.Entry<DecoratedKey, ColumnFamily> entry : columnFamilies.entrySet()) { builder.append(entry.getKey()).append(": ").append(entry.getValue()).append(", "); } builder.append("}"); return builder.toString(); } private SSTableReader writeSortedContents() throws IOException { logger.info("Writing " + this); SSTableWriter writer = cfs.createFlushWriter(columnFamilies.size()); for (Map.Entry<DecoratedKey, ColumnFamily> entry : columnFamilies.entrySet()) writer.append(entry.getKey(), entry.getValue()); SSTableReader ssTable = writer.closeAndOpenReader(); logger.info(String.format("Completed flushing %s (%d bytes)", ssTable.getFilename(), new File(ssTable.getFilename()).length())); return ssTable; } public void flushAndSignal(final CountDownLatch latch, ExecutorService sorter, final ExecutorService writer) { cfs.getMemtablesPendingFlush().add(this); // it's ok for the MT to briefly be both active and pendingFlush writer.execute(new WrappedRunnable() { public void runMayThrow() throws IOException { cfs.flushLock.lock(); try { if (!cfs.isDropped()) { cfs.addSSTable(writeSortedContents()); cfs.getMemtablesPendingFlush().remove(Memtable.this); } } finally { cfs.flushLock.unlock(); } latch.countDown(); } }); } public String toString() { return String.format("Memtable-%s@%s(%s bytes, %s operations)", cfs.getColumnFamilyName(), hashCode(), currentThroughput, currentOperations); } /** * @param startWith Include data in the result from and including this key and to the end of the memtable * @return An iterator of entries with the data from the start key */ public Iterator<Map.Entry<DecoratedKey, ColumnFamily>> getEntryIterator(DecoratedKey startWith) { return columnFamilies.tailMap(startWith).entrySet().iterator(); } public boolean isClean() { return columnFamilies.isEmpty(); } public String getTableName() { return cfs.table.name; } /** * obtain an iterator of columns in this memtable in the specified order starting from a given column. */ public static IColumnIterator getSliceIterator(final DecoratedKey key, final ColumnFamily cf, SliceQueryFilter filter, AbstractType typeComparator) { assert cf != null; final boolean isSuper = cf.isSuper(); final Collection<IColumn> filteredColumns = filter.reversed ? cf.getReverseSortedColumns() : cf.getSortedColumns(); // ok to not have subcolumnComparator since we won't be adding columns to this object IColumn startColumn = isSuper ? new SuperColumn(filter.start, (AbstractType)null) : new Column(filter.start); Comparator<IColumn> comparator = filter.getColumnComparator(typeComparator); final PeekingIterator<IColumn> filteredIter = Iterators.peekingIterator(filteredColumns.iterator()); if (!filter.reversed || filter.start.remaining() != 0) { while (filteredIter.hasNext() && comparator.compare(filteredIter.peek(), startColumn) < 0) { filteredIter.next(); } } return new AbstractColumnIterator() { public ColumnFamily getColumnFamily() { return cf; } public DecoratedKey getKey() { return key; } public boolean hasNext() { return filteredIter.hasNext(); } public IColumn next() { return filteredIter.next(); } }; } public static IColumnIterator getNamesIterator(final DecoratedKey key, final ColumnFamily cf, final NamesQueryFilter filter) { assert cf != null; final boolean isStandard = !cf.isSuper(); return new SimpleAbstractColumnIterator() { private Iterator<ByteBuffer> iter = filter.columns.iterator(); public ColumnFamily getColumnFamily() { return cf; } public DecoratedKey getKey() { return key; } protected IColumn computeNext() { while (iter.hasNext()) { ByteBuffer current = iter.next(); IColumn column = cf.getColumn(current); if (column != null) // clone supercolumns so caller can freely removeDeleted or otherwise mutate it return isStandard ? column : ((SuperColumn)column).cloneMe(); } return endOfData(); } }; } public ColumnFamily getColumnFamily(DecoratedKey key) { return columnFamilies.get(key); } void clearUnsafe() { columnFamilies.clear(); } public boolean isExpired() { return System.currentTimeMillis() > creationTime + cfs.getMemtableFlushAfterMins() * 60 * 1000L; } }