/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/
package org.apache.bookkeeper.bookie;
import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map.Entry;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import org.apache.bookkeeper.bookie.LedgerDirsManager.LedgerDirsListener;
import org.apache.bookkeeper.bookie.LedgerDirsManager.NoWritableLedgerDirException;
import org.apache.bookkeeper.conf.ServerConfiguration;
import org.apache.bookkeeper.stats.Counter;
import org.apache.bookkeeper.stats.Gauge;
import org.apache.bookkeeper.stats.StatsLogger;
import org.apache.bookkeeper.util.SnapshotMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.annotations.VisibleForTesting;
import static org.apache.bookkeeper.bookie.BookKeeperServerStats.LEDGER_CACHE_NUM_EVICTED_LEDGERS;
import static org.apache.bookkeeper.bookie.BookKeeperServerStats.NUM_OPEN_LEDGERS;
public class IndexPersistenceMgr {
private final static Logger LOG = LoggerFactory.getLogger(IndexPersistenceMgr.class);
private static final String IDX = ".idx";
static final String RLOC = ".rloc";
@VisibleForTesting
public static final String getLedgerName(long ledgerId) {
int parent = (int) (ledgerId & 0xff);
int grandParent = (int) ((ledgerId & 0xff00) >> 8);
StringBuilder sb = new StringBuilder();
sb.append(Integer.toHexString(grandParent));
sb.append('/');
sb.append(Integer.toHexString(parent));
sb.append('/');
sb.append(Long.toHexString(ledgerId));
sb.append(IDX);
return sb.toString();
}
final ConcurrentMap<Long, FileInfo> fileInfoCache = new ConcurrentHashMap<Long, FileInfo>();
final int openFileLimit;
final int pageSize;
final int entriesPerPage;
// Manage all active ledgers in LedgerManager
// so LedgerManager has knowledge to garbage collect inactive/deleted ledgers
final SnapshotMap<Long, Boolean> activeLedgers;
private LedgerDirsManager ledgerDirsManager;
final LinkedList<Long> openLedgers = new LinkedList<Long>();
// Stats
private final Counter evictedLedgersCounter;
public IndexPersistenceMgr(int pageSize,
int entriesPerPage,
ServerConfiguration conf,
SnapshotMap<Long, Boolean> activeLedgers,
LedgerDirsManager ledgerDirsManager,
StatsLogger statsLogger) throws IOException {
this.openFileLimit = conf.getOpenFileLimit();
this.activeLedgers = activeLedgers;
this.ledgerDirsManager = ledgerDirsManager;
this.pageSize = pageSize;
this.entriesPerPage = entriesPerPage;
LOG.info("openFileLimit = {}", openFileLimit);
// Retrieve all of the active ledgers.
getActiveLedgers();
ledgerDirsManager.addLedgerDirsListener(getLedgerDirsListener());
// Expose Stats
evictedLedgersCounter = statsLogger.getCounter(LEDGER_CACHE_NUM_EVICTED_LEDGERS);
statsLogger.registerGauge(NUM_OPEN_LEDGERS, new Gauge<Integer>() {
@Override
public Integer getDefaultValue() {
return 0;
}
@Override
public Integer getSample() {
return getNumOpenLedgers();
}
});
}
FileInfo getFileInfo(Long ledger, byte masterKey[]) throws IOException {
FileInfo fi = fileInfoCache.get(ledger);
if (null == fi) {
boolean createdNewFile = false;
File lf = null;
synchronized (this) {
// Check if the index file exists on disk.
lf = findIndexFile(ledger);
if (null == lf) {
if (null == masterKey) {
throw new Bookie.NoLedgerException(ledger);
}
// We don't have a ledger index file on disk, so create it.
lf = getNewLedgerIndexFile(ledger, null);
createdNewFile = true;
}
}
fi = putFileInfo(ledger, masterKey, lf, createdNewFile);
}
assert null != fi;
fi.use();
return fi;
}
private FileInfo putFileInfo(Long ledger, byte masterKey[], File lf, boolean createdNewFile) throws IOException {
FileInfo fi = new FileInfo(lf, masterKey);
FileInfo oldFi = fileInfoCache.putIfAbsent(ledger, fi);
if (null != oldFi) {
// Some other thread won the race. We should delete our file if we created
// a new one and the paths are different.
if (createdNewFile && !oldFi.isSameFile(lf)) {
fi.delete();
}
fi = oldFi;
} else {
if (createdNewFile) {
// Else, we won and the active ledger manager should know about this.
LOG.debug("New ledger index file created for ledgerId: {}", ledger);
activeLedgers.put(ledger, true);
}
// Evict cached items from the file info cache if necessary
evictFileInfoIfNecessary();
synchronized (openLedgers) {
openLedgers.offer(ledger);
}
}
return fi;
}
/**
* Get a new index file for ledger excluding directory <code>excludedDir</code>.
*
* @param ledger
* Ledger id.
* @param excludedDir
* The ledger directory to exclude.
* @return new index file object.
* @throws NoWritableLedgerDirException if there is no writable dir available.
*/
private File getNewLedgerIndexFile(Long ledger, File excludedDir)
throws NoWritableLedgerDirException {
File dir = ledgerDirsManager.pickRandomWritableDir(excludedDir);
String ledgerName = getLedgerName(ledger);
return new File(dir, ledgerName);
}
/**
* This method will look within the ledger directories for the ledger index
* files. That will comprise the set of active ledgers this particular
* BookieServer knows about that have not yet been deleted by the BookKeeper
* Client. This is called only once during initialization.
*/
private void getActiveLedgers() throws IOException {
// Ledger index files are stored in a file hierarchy with a parent and
// grandParent directory. We'll have to go two levels deep into these
// directories to find the index files.
for (File ledgerDirectory : ledgerDirsManager.getAllLedgerDirs()) {
File[] grandParents = ledgerDirectory.listFiles();
if (grandParents == null) {
continue;
}
for (File grandParent : grandParents) {
if (grandParent.isDirectory()) {
File[] parents = grandParent.listFiles();
if (parents == null) {
continue;
}
for (File parent : parents) {
if (parent.isDirectory()) {
File[] indexFiles = parent.listFiles();
if (indexFiles == null) {
continue;
}
for (File index : indexFiles) {
if (!index.isFile()
|| (!index.getName().endsWith(IDX) && !index.getName().endsWith(RLOC))) {
continue;
}
// We've found a ledger index file. The file
// name is the HexString representation of the
// ledgerId.
String ledgerIdInHex = index.getName().replace(RLOC, "").replace(IDX, "");
if (index.getName().endsWith(RLOC)) {
if (findIndexFile(Long.parseLong(ledgerIdInHex)) != null) {
if (!index.delete()) {
LOG.warn("Deleting the rloc file " + index + " failed");
}
continue;
} else {
File dest = new File(index.getParentFile(), ledgerIdInHex + IDX);
if (!index.renameTo(dest)) {
throw new IOException("Renaming rloc file " + index
+ " to index file has failed");
}
}
}
activeLedgers.put(Long.parseLong(ledgerIdInHex, 16), true);
}
}
}
}
}
}
}
/**
* This method is called whenever a ledger is deleted by the BookKeeper Client
* and we want to remove all relevant data for it stored in the LedgerCache.
*/
void removeLedger(long ledgerId) throws IOException {
// Delete the ledger's index file and close the FileInfo
FileInfo fi = null;
try {
fi = getFileInfo(ledgerId, null);
fi.close(false);
fi.delete();
} finally {
// should release use count
// otherwise the file channel would not be closed.
if (null != fi) {
fi.release();
}
}
// Remove it from the active ledger manager
activeLedgers.remove(ledgerId);
// Now remove it from all the other lists and maps.
fileInfoCache.remove(ledgerId);
synchronized (openLedgers) {
openLedgers.remove(ledgerId);
}
}
private File findIndexFile(long ledgerId) throws IOException {
String ledgerName = getLedgerName(ledgerId);
for (File d : ledgerDirsManager.getAllLedgerDirs()) {
File lf = new File(d, ledgerName);
if (lf.exists()) {
return lf;
}
}
return null;
}
boolean ledgerExists(long ledgerId) throws IOException {
return activeLedgers.containsKey(ledgerId);
}
int getNumOpenLedgers() {
return openLedgers.size();
}
// evict file info if necessary
private void evictFileInfoIfNecessary() throws IOException {
if (openLedgers.size() > openFileLimit) {
Long ledgerToRemove;
synchronized (openLedgers) {
ledgerToRemove = openLedgers.poll();
}
if (null == ledgerToRemove) {
// Should not reach here. We probably cleared this while the thread
// was executing.
return;
}
evictedLedgersCounter.inc();
FileInfo fi = fileInfoCache.remove(ledgerToRemove);
if (null == fi) {
// Seems like someone else already closed the file.
return;
}
fi.close(true);
}
}
void close() throws IOException {
for (Entry<Long, FileInfo> fileInfo : fileInfoCache.entrySet()) {
FileInfo value = fileInfo.getValue();
if (value != null) {
value.close(true);
}
}
fileInfoCache.clear();
}
Long getLastAddConfirmed(long ledgerId) throws IOException {
FileInfo fi = null;
try {
fi = getFileInfo(ledgerId, null);
return fi.getLastAddConfirmed();
} finally {
if (null != fi) {
fi.release();
}
}
}
long updateLastAddConfirmed(long ledgerId, long lac) throws IOException {
FileInfo fi = null;
try {
fi = getFileInfo(ledgerId, null);
return fi.setLastAddConfirmed(lac);
} finally {
if (null != fi) {
fi.release();
}
}
}
byte[] readMasterKey(long ledgerId) throws IOException, BookieException {
FileInfo fi = fileInfoCache.get(ledgerId);
if (fi == null) {
File lf = findIndexFile(ledgerId);
if (lf == null) {
throw new Bookie.NoLedgerException(ledgerId);
}
fi = putFileInfo(ledgerId, null, lf, false);
}
return fi.getMasterKey();
}
void setMasterKey(long ledgerId, byte[] masterKey) throws IOException {
FileInfo fi = null;
try {
fi = getFileInfo(ledgerId, masterKey);
} finally {
if (null != fi) {
fi.release();
}
}
}
boolean setFenced(long ledgerId) throws IOException {
FileInfo fi = null;
try {
fi = getFileInfo(ledgerId, null);
return fi.setFenced();
} finally {
if (null != fi) {
fi.release();
}
}
}
boolean isFenced(long ledgerId) throws IOException {
FileInfo fi = null;
try {
fi = getFileInfo(ledgerId, null);
return fi.isFenced();
} finally {
if (null != fi) {
fi.release();
}
}
}
void setExplicitLac(long ledgerId, ByteBuffer lac) throws IOException {
FileInfo fi = null;
try {
fi = getFileInfo(ledgerId, null);
fi.setExplicitLac(lac);
return;
} finally {
if (null != fi) {
fi.release();
}
}
}
public ByteBuffer getExplicitLac(long ledgerId) {
FileInfo fi = null;
try {
fi = getFileInfo(ledgerId, null);
return fi.getExplicitLac();
} catch (IOException e) {
LOG.error("Exception during getLastAddConfirmed: {}", e);
return null;
} finally {
if (null != fi) {
fi.release();
}
}
}
int getOpenFileLimit() {
return openFileLimit;
}
private LedgerDirsListener getLedgerDirsListener() {
return new LedgerDirsListener() {
@Override
public void diskFull(File disk) {
// Nothing to handle here. Will be handled in Bookie
}
@Override
public void diskAlmostFull(File disk) {
// Nothing to handle here. Will be handled in Bookie
}
@Override
public void diskFailed(File disk) {
// Nothing to handle here. Will be handled in Bookie
}
@Override
public void allDisksFull() {
// Nothing to handle here. Will be handled in Bookie
}
@Override
public void fatalError() {
// Nothing to handle here. Will be handled in Bookie
}
@Override
public void diskWritable(File disk) {
// Nothing to handle here. Will be handled in Bookie
}
@Override
public void diskJustWritable(File disk) {
// Nothing to handle here. Will be handled in Bookie
}
};
}
private void relocateIndexFileAndFlushHeader(long ledger, FileInfo fi) throws IOException {
File currentDir = getLedgerDirForLedger(fi);
if (ledgerDirsManager.isDirFull(currentDir)) {
moveLedgerIndexFile(ledger, fi);
}
fi.flushHeader();
}
/**
* Get the ledger directory that the ledger index belongs to.
*
* @param fi File info of a ledger
* @return ledger directory that the ledger belongs to.
*/
private File getLedgerDirForLedger(FileInfo fi) {
return fi.getLf().getParentFile().getParentFile().getParentFile();
}
private void moveLedgerIndexFile(Long l, FileInfo fi) throws NoWritableLedgerDirException, IOException {
File newLedgerIndexFile = getNewLedgerIndexFile(l, getLedgerDirForLedger(fi));
fi.moveToNewLocation(newLedgerIndexFile, fi.getSizeSinceLastwrite());
}
void flushLedgerHeader(long ledger) throws IOException {
FileInfo fi = null;
try {
fi = getFileInfo(ledger, null);
relocateIndexFileAndFlushHeader(ledger, fi);
} catch (Bookie.NoLedgerException nle) {
// ledger has been deleted
LOG.info("No ledger {} found when flushing header.", ledger);
return;
} finally {
if (null != fi) {
fi.release();
}
}
}
void flushLedgerEntries(long l, List<LedgerEntryPage> entries) throws IOException {
FileInfo fi = null;
try {
Collections.sort(entries, new Comparator<LedgerEntryPage>() {
@Override
public int compare(LedgerEntryPage o1, LedgerEntryPage o2) {
return (int) (o1.getFirstEntry() - o2.getFirstEntry());
}
});
int[] versions = new int[entries.size()];
try {
fi = getFileInfo(l, null);
} catch (Bookie.NoLedgerException nle) {
// ledger has been deleted
LOG.info("No ledger {} found when flushing entries.", l);
return;
}
// flush the header if necessary
relocateIndexFileAndFlushHeader(l, fi);
int start = 0;
long lastOffset = -1;
for (int i = 0; i < entries.size(); i++) {
versions[i] = entries.get(i).getVersion();
if (lastOffset != -1 && (entries.get(i).getFirstEntry() - lastOffset) != entriesPerPage) {
// send up a sequential list
int count = i - start;
if (count == 0) {
LOG.warn("Count cannot possibly be zero!");
}
writeBuffers(l, entries, fi, start, count);
start = i;
}
lastOffset = entries.get(i).getFirstEntry();
}
if (entries.size() - start == 0 && entries.size() != 0) {
LOG.warn("Nothing to write, but there were entries!");
}
writeBuffers(l, entries, fi, start, entries.size() - start);
for (int i = 0; i < entries.size(); i++) {
LedgerEntryPage lep = entries.get(i);
lep.setClean(versions[i]);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Flushed ledger {} with {} pages.", l, entries.size());
}
} finally {
if (fi != null) {
fi.release();
}
}
}
private void writeBuffers(Long ledger,
List<LedgerEntryPage> entries, FileInfo fi,
int start, int count) throws IOException {
if (LOG.isTraceEnabled()) {
LOG.trace("Writing {} buffers of {}", count, Long.toHexString(ledger));
}
if (count == 0) {
return;
}
ByteBuffer buffs[] = new ByteBuffer[count];
for (int j = 0; j < count; j++) {
buffs[j] = entries.get(start + j).getPageToWrite();
if (entries.get(start + j).getLedger() != ledger) {
throw new IOException("Writing to " + ledger + " but page belongs to "
+ entries.get(start + j).getLedger());
}
}
long totalWritten = 0;
while (buffs[buffs.length - 1].remaining() > 0) {
long rc = fi.write(buffs, entries.get(start + 0).getFirstEntryPosition());
if (rc <= 0) {
throw new IOException("Short write to ledger " + ledger + " rc = " + rc);
}
totalWritten += rc;
}
if (totalWritten != (long) count * (long) pageSize) {
throw new IOException("Short write to ledger " + ledger + " wrote " + totalWritten
+ " expected " + count * pageSize);
}
}
void updatePage(LedgerEntryPage lep) throws IOException {
if (!lep.isClean()) {
throw new IOException("Trying to update a dirty page");
}
FileInfo fi = null;
try {
fi = getFileInfo(lep.getLedger(), null);
long pos = lep.getFirstEntryPosition();
if (pos >= fi.size()) {
lep.zeroPage();
} else {
lep.readPage(fi);
}
} finally {
if (fi != null) {
fi.release();
}
}
}
long getPersistEntryBeyondInMem(long ledgerId, long lastEntryInMem) throws IOException {
FileInfo fi = null;
long lastEntry = lastEntryInMem;
try {
fi = getFileInfo(ledgerId, null);
long size = fi.size();
// make sure the file size is aligned with index entry size
// otherwise we may read incorret data
if (0 != size % LedgerEntryPage.getIndexEntrySize()) {
LOG.warn("Index file of ledger {} is not aligned with index entry size.", ledgerId);
size = size - size % LedgerEntryPage.getIndexEntrySize();
}
// we may not have the last entry in the cache
if (size > lastEntry * LedgerEntryPage.getIndexEntrySize()) {
ByteBuffer bb = ByteBuffer.allocate(pageSize);
long position = size - pageSize;
if (position < 0) {
position = 0;
}
fi.read(bb, position);
bb.flip();
long startingEntryId = position / LedgerEntryPage.getIndexEntrySize();
for (int i = entriesPerPage - 1; i >= 0; i--) {
if (bb.getLong(i * LedgerEntryPage.getIndexEntrySize()) != 0) {
if (lastEntry < startingEntryId + i) {
lastEntry = startingEntryId + i;
}
break;
}
}
}
} finally {
if (fi != null) {
fi.release();
}
}
return lastEntry;
}
}