/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.cassandra.db; import java.util.*; import java.io.IOException; import java.io.File; import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.concurrent.Future; import com.google.common.base.Function; import com.google.common.collect.Iterables; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.db.commitlog.CommitLog; import org.apache.cassandra.db.commitlog.CommitLogSegment; import org.apache.cassandra.dht.Range; import org.apache.cassandra.io.SSTableDeletingReference; import org.apache.cassandra.io.SSTableReader; import org.apache.cassandra.io.util.FileUtils; import java.net.InetAddress; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.utils.*; import org.apache.cassandra.db.filter.*; import org.cliffc.high_scale_lib.NonBlockingHashMap; import org.apache.log4j.Logger; public class Table { public static final String SYSTEM_TABLE = "system"; private static final Logger logger = Logger.getLogger(Table.class); private static final String SNAPSHOT_SUBDIR_NAME = "snapshots"; /* we use this lock to drain updaters before calling a flush. */ static final ReentrantReadWriteLock flusherLock = new ReentrantReadWriteLock(true); private static Timer flushTimer = new Timer("FLUSH-TIMER"); private final boolean waitForCommitLog; // This is a result of pushing down the point in time when storage directories get created. It used to happen in // CassandraDaemon, but it is possible to call Table.open without a running daemon, so it made sense to ensure // proper directories here. static { try { DatabaseDescriptor.createAllDirectories(); } catch (IOException ex) { throw new RuntimeException(ex); } } /* * This class represents the metadata of this Table. The metadata * is basically the column family name and the ID associated with * this column family. We use this ID in the Commit Log header to * determine when a log file that has been rolled can be deleted. */ public static class TableMetadata { private static HashMap<String,TableMetadata> tableMetadataMap = new HashMap<String,TableMetadata>(); private static Map<Integer, String> idCfMap_ = new HashMap<Integer, String>(); static { try { DatabaseDescriptor.storeMetadata(); } catch (IOException e) { throw new RuntimeException(e); } } public static synchronized Table.TableMetadata instance(String tableName) throws IOException { if ( tableMetadataMap.get(tableName) == null ) { tableMetadataMap.put(tableName, new Table.TableMetadata()); } return tableMetadataMap.get(tableName); } /* The mapping between column family and the column type. */ private Map<String, String> cfTypeMap_ = new HashMap<String, String>(); private Map<String, Integer> cfIdMap_ = new HashMap<String, Integer>(); public void add(String cf, int id) { add(cf, id, "Standard"); } public void add(String cf, int id, String type) { if (logger.isDebugEnabled()) logger.debug("adding " + cf + " as " + id); assert !idCfMap_.containsKey(id); cfIdMap_.put(cf, id); idCfMap_.put(id, cf); cfTypeMap_.put(cf, type); } public boolean isEmpty() { return cfIdMap_.isEmpty(); } int getColumnFamilyId(String columnFamily) { return cfIdMap_.get(columnFamily); } public static String getColumnFamilyName(int id) { return idCfMap_.get(id); } String getColumnFamilyType(String cfName) { return cfTypeMap_.get(cfName); } Set<String> getColumnFamilies() { return cfIdMap_.keySet(); } int size() { return cfIdMap_.size(); } boolean isValidColumnFamily(String cfName) { return cfIdMap_.containsKey(cfName); } public String toString() { return "TableMetadata(" + FBUtilities.mapToString(cfIdMap_) + ")"; } public static int getColumnFamilyCount() { return idCfMap_.size(); } public static String getColumnFamilyIDString() { return FBUtilities.mapToString(tableMetadataMap); } } /** Table objects, one per keyspace. only one instance should ever exist for any given keyspace. */ private static final Map<String, Table> instances = new NonBlockingHashMap<String, Table>(); /* Table name. */ public final String name; /* Handle to the Table Metadata */ private final Table.TableMetadata tableMetadata; /* ColumnFamilyStore per column family */ private final Map<String, ColumnFamilyStore> columnFamilyStores = new HashMap<String, ColumnFamilyStore>(); // cache application CFs since Range queries ask for them a _lot_ private SortedSet<String> applicationColumnFamilies; public static Table open(String table) throws IOException { Table tableInstance = instances.get(table); if (tableInstance == null) { // instantiate the Table. we could use putIfAbsent but it's important to making sure it is only done once // per keyspace, so we synchronize and re-check before doing it. synchronized (Table.class) { tableInstance = instances.get(table); if (tableInstance == null) { tableInstance = new Table(table); instances.put(table, tableInstance); } } } return tableInstance; } public Set<String> getColumnFamilies() { return tableMetadata.getColumnFamilies(); } public Collection<ColumnFamilyStore> getColumnFamilyStores() { return Collections.unmodifiableCollection(columnFamilyStores.values()); } public ColumnFamilyStore getColumnFamilyStore(String cfName) { return columnFamilyStores.get(cfName); } /** * Do a cleanup of keys that do not belong locally. */ public void forceCleanup() { if (name.equals(SYSTEM_TABLE)) throw new RuntimeException("Cleanup of the system table is neither necessary nor wise"); Set<String> columnFamilies = tableMetadata.getColumnFamilies(); for ( String columnFamily : columnFamilies ) { ColumnFamilyStore cfStore = columnFamilyStores.get( columnFamily ); if ( cfStore != null ) cfStore.forceCleanup(); } } /** * Take a snapshot of the entire set of column families with a given timestamp. * * @param clientSuppliedName the tag associated with the name of the snapshot. This * value can be null. */ public void snapshot(String clientSuppliedName) throws IOException { String snapshotName = Long.toString(System.currentTimeMillis()); if (clientSuppliedName != null && !clientSuppliedName.equals("")) { snapshotName = snapshotName + "-" + clientSuppliedName; } for (ColumnFamilyStore cfStore : columnFamilyStores.values()) { cfStore.snapshot(snapshotName); } } /** * Clear all the snapshots for a given table. */ public void clearSnapshot() throws IOException { for (String dataDirPath : DatabaseDescriptor.getAllDataFileLocations()) { String snapshotPath = dataDirPath + File.separator + name + File.separator + SNAPSHOT_SUBDIR_NAME; File snapshotDir = new File(snapshotPath); if (snapshotDir.exists()) { if (logger.isDebugEnabled()) logger.debug("Removing snapshot directory " + snapshotPath); FileUtils.deleteDir(snapshotDir); } } } /* * This method is invoked only during a bootstrap process. We basically * do a complete compaction since we can figure out based on the ranges * whether the files need to be split. */ public List<SSTableReader> forceAntiCompaction(Collection<Range> ranges, InetAddress target) { List<SSTableReader> allResults = new ArrayList<SSTableReader>(); Set<String> columnFamilies = tableMetadata.getColumnFamilies(); for ( String columnFamily : columnFamilies ) { ColumnFamilyStore cfStore = columnFamilyStores.get( columnFamily ); try { allResults.addAll(CompactionManager.instance.submitAnticompaction(cfStore, ranges, target).get()); } catch (Exception e) { throw new RuntimeException(e); } } return allResults; } /* * This method is an ADMIN operation to force compaction * of all SSTables on disk. */ public void forceCompaction() { Set<String> columnFamilies = tableMetadata.getColumnFamilies(); for ( String columnFamily : columnFamilies ) { ColumnFamilyStore cfStore = columnFamilyStores.get( columnFamily ); if ( cfStore != null ) CompactionManager.instance.submitMajor(cfStore); } } List<SSTableReader> getAllSSTablesOnDisk() { List<SSTableReader> list = new ArrayList<SSTableReader>(); Set<String> columnFamilies = tableMetadata.getColumnFamilies(); for ( String columnFamily : columnFamilies ) { ColumnFamilyStore cfStore = columnFamilyStores.get( columnFamily ); if ( cfStore != null ) list.addAll(cfStore.getSSTables()); } return list; } private Table(String table) throws IOException { name = table; waitForCommitLog = DatabaseDescriptor.getCommitLogSync() == DatabaseDescriptor.CommitLogSync.batch; tableMetadata = Table.TableMetadata.instance(table); for (String columnFamily : tableMetadata.getColumnFamilies()) { columnFamilyStores.put(columnFamily, ColumnFamilyStore.createColumnFamilyStore(table, columnFamily)); } // check 10x as often as the lifetime, so we can exceed lifetime by 10% at most int checkMs = DatabaseDescriptor.getMemtableLifetimeMS() / 10; flushTimer.schedule(new TimerTask() { public void run() { for (ColumnFamilyStore cfs : columnFamilyStores.values()) { try { cfs.forceFlushIfExpired(); } catch (IOException e) { throw new RuntimeException(e); } } } }, checkMs, checkMs); } public int getColumnFamilyId(String columnFamily) { return tableMetadata.getColumnFamilyId(columnFamily); } /** * Selects the specified column family for the specified key. */ @Deprecated // single CFs could be larger than memory public ColumnFamily get(String key, String cfName) throws IOException { ColumnFamilyStore cfStore = columnFamilyStores.get(cfName); assert cfStore != null : "Column family " + cfName + " has not been defined"; return cfStore.getColumnFamily(new IdentityQueryFilter(key, new QueryPath(cfName))); } public Row getRow(QueryFilter filter) throws IOException { ColumnFamilyStore cfStore = columnFamilyStores.get(filter.getColumnFamilyName()); ColumnFamily columnFamily = cfStore.getColumnFamily(filter); return new Row(filter.key, columnFamily); } /** * This method adds the row to the Commit Log associated with this table. * Once this happens the data associated with the individual column families * is also written to the column family store's memtable. */ public void apply(RowMutation mutation, Object serializedMutation, boolean writeCommitLog) throws IOException { HashMap<ColumnFamilyStore,Memtable> memtablesToFlush = new HashMap<ColumnFamilyStore, Memtable>(2); // write the mutation to the commitlog and memtables flusherLock.readLock().lock(); try { if (writeCommitLog) { CommitLog.instance().add(mutation, serializedMutation); } for (ColumnFamily columnFamily : mutation.getColumnFamilies()) { Memtable memtableToFlush; ColumnFamilyStore cfs = columnFamilyStores.get(columnFamily.name()); if ((memtableToFlush=cfs.apply(mutation.key(), columnFamily)) != null) memtablesToFlush.put(cfs, memtableToFlush); ColumnFamily cachedRow = cfs.getRawCachedRow(mutation.key()); if (cachedRow != null) cachedRow.addAll(columnFamily); } } finally { flusherLock.readLock().unlock(); } // flush memtables that got filled up. usually mTF will be empty and this will be a no-op for (Map.Entry<ColumnFamilyStore, Memtable> entry : memtablesToFlush.entrySet()) entry.getKey().maybeSwitchMemtable(entry.getValue(), writeCommitLog); } public List<Future<?>> flush() throws IOException { List<Future<?>> futures = new ArrayList<Future<?>>(); for (String cfName : columnFamilyStores.keySet()) { Future<?> future = columnFamilyStores.get(cfName).forceFlush(); if (future != null) futures.add(future); } return futures; } // for binary load path. skips commitlog. void load(RowMutation rowMutation) throws IOException { String key = rowMutation.key(); for (ColumnFamily columnFamily : rowMutation.getColumnFamilies()) { Collection<IColumn> columns = columnFamily.getSortedColumns(); for (IColumn column : columns) { ColumnFamilyStore cfStore = columnFamilyStores.get(new String(column.name(), "UTF-8")); cfStore.applyBinary(key, column.value()); } } } public String getDataFileLocation(long expectedCompactedFileSize) { String path = DatabaseDescriptor.getDataFileLocationForTable(name, expectedCompactedFileSize); if (path == null) { // retry after GCing to force unmap of compacted SSTables so they can be deleted StorageService.instance.requestGC(); try { Thread.sleep(SSTableDeletingReference.RETRY_DELAY * 2); } catch (InterruptedException e) { throw new AssertionError(e); } path = DatabaseDescriptor.getDataFileLocationForTable(name, expectedCompactedFileSize); } return path; } public static String getSnapshotPath(String dataDirPath, String tableName, String snapshotName) { return dataDirPath + File.separator + tableName + File.separator + SNAPSHOT_SUBDIR_NAME + File.separator + snapshotName; } public static Iterable<Table> all() { Function<String, Table> transformer = new Function<String, Table>() { public Table apply(String tableName) { try { return Table.open(tableName); } catch (IOException e) { throw new RuntimeException(e); } } }; return Iterables.transform(DatabaseDescriptor.getTables(), transformer); } }