/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.infrastructure.db; import com.facebook.infrastructure.config.DatabaseDescriptor; import com.facebook.infrastructure.io.*; import com.facebook.infrastructure.utils.FBUtilities; import com.facebook.infrastructure.utils.LogUtil; import org.apache.log4j.Logger; import java.io.File; import java.io.IOException; import java.util.*; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; /* * Commit Log tracks every write operation into the system. The aim * of the commit log is to be able to successfully recover data that was * not stored to disk via the Memtable. Every Commit Log maintains a * header represented by the abstraction CommitLogHeader. The header * contains a bit array and an array of longs and both the arrays are * of size, #column families for the Table, the Commit Log represents. * Whenever a ColumnFamily is written to, for the first time its bit flag * is set to one in the CommitLogHeader. When it is flushed to disk by the * Memtable its corresponding bit in the header is set to zero. This helps * track which CommitLogs can be thrown away as a result of Memtable flushes. * However if a ColumnFamily is flushed and again written to disk then its * entry in the array of longs is updated with the offset in the Commit Log * file where it was written. This helps speed up recovery since we can seek * to these offsets and start processing the commit log. * Every Commit Log is rolled over everytime it reaches its threshold in size. * Over time there could be a number of commit logs that would be generated. * Hovever whenever we flush a column family disk and update its bit flag we * take this bit array and bitwise & it with the headers of the other commit * logs that are older. * * Author : Avinash Lakshman ( alakshman@facebook.com) & Prashant Malik ( pmalik@facebook.com ) */ class CommitLog { private static final int bufSize_ = 128*1024*1024; private static Map<String, CommitLog> instances_ = new HashMap<String, CommitLog>(); private static Lock lock_ = new ReentrantLock(); private static Logger logger_ = Logger.getLogger(CommitLog.class); private static Map<String, CommitLogHeader> clHeaders_ = new HashMap<String, CommitLogHeader>(); public static final class CommitLogContext { static CommitLogContext NULL = new CommitLogContext(null, -1L); /* Commit Log associated with this operation */ private String file_; /* Offset within the Commit Log where this row as added */ private long position_; public CommitLogContext(String file, long position) { file_ = file; position_ = position; } boolean isValidContext() { return (position_ != -1L); } String file() { return file_; } long position() { return position_; } } public static class CommitLogFileComparator implements Comparator<String> { public int compare(String f, String f2) { return (int)(getCreationTime(f) - getCreationTime(f2)); } } static long getCreationTime(String file) { String[] entries = FBUtilities.strip(file, "-."); return Long.parseLong(entries[entries.length - 2]); } /* * Write the serialized commit log header into the specified commit log. */ private static void writeCommitLogHeader(String commitLogFileName, byte[] bytes) throws IOException { IFileWriter logWriter = CommitLog.createWriter(commitLogFileName); logWriter.seek(0L); /* write the commit log header */ logWriter.writeDirect(bytes); logWriter.close(); } private static IFileWriter createWriter(String file) throws IOException { if ( DatabaseDescriptor.isFastSync() ) { /* Add this to the threshold */ int bufSize = 4*1024*1024; return SequenceFile.fastWriter(file, CommitLog.bufSize_ + bufSize); } else return SequenceFile.concurrentWriter(file); } static CommitLog open(String table) throws IOException { CommitLog commitLog = instances_.get(table); if ( commitLog == null ) { CommitLog.lock_.lock(); try { commitLog = instances_.get(table); if ( commitLog == null ) { commitLog = new CommitLog(table, false); instances_.put(table, commitLog); } } finally { CommitLog.lock_.unlock(); } } return commitLog; } static String getTableName(String file) { String[] values = file.split("-"); return values[1]; } private String table_; /* Current commit log file */ private String logFile_; /* header for current commit log */ private CommitLogHeader clHeader_; private IFileWriter logWriter_; private long commitHeaderStartPos_; /* * Generates a file name of the format CommitLog-<table>-<timestamp>.log in the * directory specified by the Database Descriptor. */ private void setNextFileName() { logFile_ = DatabaseDescriptor.getLogFileLocation() + System.getProperty("file.separator") + "CommitLog-" + table_ + "-" + System.currentTimeMillis() + ".log"; } /* * param @ table - name of table for which we are maintaining * this commit log. * param @ recoverymode - is commit log being instantiated in * in recovery mode. */ CommitLog(String table, boolean recoveryMode) throws IOException { table_ = table; if ( !recoveryMode ) { setNextFileName(); logWriter_ = CommitLog.createWriter(logFile_); writeCommitLogHeader(); } } /* * This ctor is currently used only for debugging. We * are now using it to modify the header so that recovery * can be tested in as many scenarios as we could imagine. * * param @ logFile - logfile which we wish to modify. */ CommitLog(File logFile) throws IOException { table_ = CommitLog.getTableName(logFile.getName()); logFile_ = logFile.getAbsolutePath(); logWriter_ = CommitLog.createWriter(logFile_); commitHeaderStartPos_ = 0L; } String getLogFile() { return logFile_; } void readCommitLogHeader(String logFile, byte[] bytes) throws IOException { IFileReader logReader = SequenceFile.reader(logFile); try { logReader.readDirect(bytes); } finally { logReader.close(); } } /* * This is invoked on startup via the ctor. It basically * writes a header with all bits set to zero. */ private void writeCommitLogHeader() throws IOException { Table table = Table.open(table_); int cfSize = table.getNumberOfColumnFamilies(); /* record the beginning of the commit header */ commitHeaderStartPos_ = logWriter_.getCurrentPosition(); /* write the commit log header */ clHeader_ = new CommitLogHeader(cfSize); writeCommitLogHeader(clHeader_.toByteArray(), false); } private void writeCommitLogHeader(byte[] bytes, boolean reset) throws IOException { /* record the current position */ long currentPos = logWriter_.getCurrentPosition(); logWriter_.seek(commitHeaderStartPos_); /* write the commit log header */ logWriter_.writeDirect(bytes); if ( reset ) { /* seek back to the old position */ logWriter_.seek(currentPos); } } void recover(List<File> clogs) throws IOException { Table table = Table.open(table_); int cfSize = table.getNumberOfColumnFamilies(); int size = CommitLogHeader.size(cfSize); byte[] header = new byte[size]; byte[] header2 = new byte[size]; int index = clogs.size() - 1; File file = clogs.get(index); readCommitLogHeader(file.getAbsolutePath(), header); Stack<File> filesNeeded = new Stack<File>(); filesNeeded.push(file); /* * Identify files that we need for processing. This can be done * using the information in the header of each file. Simply and * the byte[] (which are the headers) and stop at the file where * the result is a zero. */ for ( int i = (index - 1); i >= 0; --i ) { file = clogs.get(i); readCommitLogHeader(file.getAbsolutePath(), header2); byte[] result = CommitLogHeader.and(header, header2); if ( !CommitLogHeader.isZero(result) ) { filesNeeded.push(file); } else { break; } } doRecovery(filesNeeded, header); } private void printHeader(byte[] header) { StringBuilder sb = new StringBuilder(""); for ( byte b : header ) { sb.append(b); sb.append(" "); } logger_.debug(sb.toString()); } private void doRecovery(Stack<File> filesNeeded, byte[] header) throws IOException { Table table = Table.open(table_); DataInputBuffer bufIn = new DataInputBuffer(); DataOutputBuffer bufOut = new DataOutputBuffer(); while ( !filesNeeded.isEmpty() ) { File file = filesNeeded.pop(); // IFileReader reader = SequenceFile.bufferedReader(file.getAbsolutePath(), DatabaseDescriptor.getLogFileSizeThreshold()); IFileReader reader = SequenceFile.reader(file.getAbsolutePath()); try { Map<String, Row> rows = new HashMap<String, Row>(); reader.readDirect(header); /* deserialize the commit log header */ bufIn.reset(header, 0, header.length); CommitLogHeader clHeader = CommitLogHeader.serializer().deserialize(bufIn); /* seek to the lowest position */ int lowPos = CommitLogHeader.getLowestPosition(clHeader); /* * If lowPos == 0 then we need to skip the processing of this * file. */ if (lowPos == 0) break; else reader.seek(lowPos); /* read the logs populate RowMutation and apply */ while ( !reader.isEOF() ) { bufOut.reset(); long bytesRead = reader.next(bufOut); if ( bytesRead == -1 ) break; bufIn.reset(bufOut.getData(), bufOut.getLength()); /* Skip over the commit log key portion */ bufIn.readUTF(); /* Skip over data size */ bufIn.readInt(); /* read the commit log entry */ try { Row row = Row.serializer().deserialize(bufIn); Map<String, ColumnFamily> columnFamilies = new HashMap<String, ColumnFamily>(row.getColumnFamilyMap()); /* remove column families that have already been flushed */ Set<String> cNames = columnFamilies.keySet(); for ( String cName : cNames ) { ColumnFamily columnFamily = columnFamilies.get(cName); /* TODO: Remove this to not process Hints */ if ( !DatabaseDescriptor.isApplicationColumnFamily(cName) ) { row.removeColumnFamily(columnFamily); continue; } int id = table.getColumnFamilyId(columnFamily.name()); if ( clHeader.get(id) == 0 || reader.getCurrentPosition() < clHeader.getPosition(id) ) row.removeColumnFamily(columnFamily); } if ( !row.isEmpty() ) { logger_.debug("recover applying " + row); table.applyNow(row); } } catch ( IOException e ) { logger_.debug( LogUtil.throwableToString(e) ); } } reader.close(); /* apply the rows read */ table.flush(true); } catch ( Throwable th ) { logger_.info( LogUtil.throwableToString(th) ); /* close the reader and delete this commit log. */ reader.close(); FileUtils.delete( new File[]{file} ); } } } /* * Update the header of the commit log if a new column family * is encountered for the first time. */ private void updateHeader(Row row) throws IOException { Map<String, ColumnFamily> columnFamilies = row.getColumnFamilyMap(); Table table = Table.open(table_); Set<String> cNames = columnFamilies.keySet(); for ( String cName : cNames ) { ColumnFamily columnFamily = columnFamilies.get(cName); int id = table.getColumnFamilyId(columnFamily.name()); if ( clHeader_.get(id) == 0 || ( clHeader_.get(id) == 1 && clHeader_.getPosition(id) == 0 ) ) { if ( clHeader_.get(id) == 0 || ( clHeader_.get(id) == 1 && clHeader_.getPosition(id) == 0 ) ) { clHeader_.turnOn( id, logWriter_.getCurrentPosition() ); writeCommitLogHeader(clHeader_.toByteArray(), true); } } } } /* * Adds the specified row to the commit log. This method will reset the * file offset to what it is before the start of the operation in case * of any problems. This way we can assume that the subsequent commit log * entry will override the garbage left over by the previous write. */ synchronized CommitLogContext add(Row row) throws IOException { long currentPosition = -1L; CommitLogContext cLogCtx = null; DataOutputBuffer cfBuffer = new DataOutputBuffer(); long fileSize = 0L; try { /* serialize the row */ cfBuffer.reset(); Row.serializer().serialize(row, cfBuffer); currentPosition = logWriter_.getCurrentPosition(); cLogCtx = new CommitLogContext(logFile_, currentPosition); /* Update the header */ updateHeader(row); logWriter_.append(table_, cfBuffer); fileSize = logWriter_.getFileSize(); checkThresholdAndRollLog(fileSize); } catch (IOException e) { if ( currentPosition != -1 ) logWriter_.seek(currentPosition); throw e; } finally { cfBuffer.close(); } return cLogCtx; } /* * This is called on Memtable flush to add to the commit log * a token indicating that this column family has been flushed. * The bit flag associated with this column family is set in the * header and this is used to decide if the log file can be deleted. */ synchronized void onMemtableFlush(String cf, CommitLog.CommitLogContext cLogCtx) throws IOException { Table table = Table.open(table_); int id = table.getColumnFamilyId(cf); /* trying discarding old commit log files */ discard(cLogCtx, id); } /* * Check if old commit logs can be deleted. However we cannot * do this anymore in the Fast Sync mode and hence I think we * should get rid of Fast Sync mode altogether. If there is * a pathological event where few CF's are rarely being updated * then their Memtable never gets flushed. * This will prevent commit logs from being deleted. WE NEED to * fix this using some hueristic and force flushing such Memtables. * * param @ cLogCtx The commitLog context . * param @ id id of the columnFamily being flushed to disk. * */ private void discard(CommitLog.CommitLogContext cLogCtx, int id) throws IOException { /* retrieve the commit log header associated with the file in the context */ CommitLogHeader commitLogHeader = clHeaders_.get(cLogCtx.file()); if(commitLogHeader == null ) { if( logFile_.equals(cLogCtx.file()) ) { /* this means we are dealing with the current commit log. */ commitLogHeader = clHeader_; clHeaders_.put(cLogCtx.file(), clHeader_); } else return; } /* * We do any processing only if there is a change in the position in the context. * This can happen if an older Memtable's flush comes in after a newer Memtable's * flush. Right now this cannot happen since Memtables are flushed on a single * thread. */ if ( cLogCtx.position() < commitLogHeader.getPosition(id) ) return; commitLogHeader.turnOff(id); /* Sort the commit logs based on creation time */ List<String> oldFiles = new ArrayList<String>(clHeaders_.keySet()); Collections.sort(oldFiles, new CommitLogFileComparator()); List<String> listOfDeletedFiles = new ArrayList<String>(); /* * Loop through all the commit log files in the history. Now process * all files that are older than the one in the context. For each of * these files the header needs to modified by performing a bitwise & * of the header with the header of the file in the context. If we * encounter the file in the context in our list of old commit log files * then we update the header and write it back to the commit log. */ for(String oldFile : oldFiles) { if(oldFile.equals(cLogCtx.file())) { /* * We need to turn on again. This is because we always keep * the bit turned on and the position indicates from where the * commit log needs to be read. When a flush occurs we turn off * perform & operation and then turn on with the new position. */ commitLogHeader.turnOn(id, cLogCtx.position()); writeCommitLogHeader(cLogCtx.file(), commitLogHeader.toByteArray()); break; } else { CommitLogHeader oldCommitLogHeader = clHeaders_.get(oldFile); oldCommitLogHeader.and(commitLogHeader); if(oldCommitLogHeader.isSafeToDelete()) { logger_.debug("Deleting commit log:"+ oldFile); FileUtils.deleteAsync(oldFile); listOfDeletedFiles.add(oldFile); } else { writeCommitLogHeader(oldFile, oldCommitLogHeader.toByteArray()); } } } for ( String deletedFile : listOfDeletedFiles) { clHeaders_.remove(deletedFile); } } private void checkThresholdAndRollLog( long fileSize ) { try { if ( fileSize >= DatabaseDescriptor.getLogFileSizeThreshold() ) { if ( logWriter_.getFileSize() >= DatabaseDescriptor.getLogFileSizeThreshold() ) { /* Rolls the current log file over to a new one. */ setNextFileName(); String oldLogFile = logWriter_.getFileName(); //history_.add(oldLogFile); logWriter_.close(); /* point reader/writer to a new commit log file. */ // logWriter_ = SequenceFile.writer(logFile_); logWriter_ = CommitLog.createWriter(logFile_); /* squirrel away the old commit log header */ clHeaders_.put(oldLogFile, new CommitLogHeader( clHeader_ )); /* * We need to zero out positions because the positions in * the old file do not make sense in the new one. */ clHeader_.zeroPositions(); writeCommitLogHeader(clHeader_.toByteArray(), false); // Get the list of files in commit log directory if it is greater than a certain number // Force flush all the column families that way we ensure that a slowly populated column family is not screwing up // by accumulating the commit logs . } } } catch ( IOException e ) { logger_.info(LogUtil.throwableToString(e)); } } }