/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/
package org.apache.bookkeeper.bookie;
import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Random;
import org.apache.log4j.Logger;
/**
* This class maps a ledger entry number into a location (entrylogid, offset) in
* an entry log file. It does user level caching to more efficiently manage disk
* head scheduling.
*/
public class LedgerCache {
private final static Logger LOG = Logger.getLogger(LedgerDescriptor.class);
final File ledgerDirectories[];
public LedgerCache(File ledgerDirectories[]) {
this.ledgerDirectories = ledgerDirectories;
}
/**
* the list of potentially clean ledgers
*/
LinkedList<Long> cleanLedgers = new LinkedList<Long>();
/**
* the list of potentially dirty ledgers
*/
LinkedList<Long> dirtyLedgers = new LinkedList<Long>();
HashMap<Long, FileInfo> fileInfoCache = new HashMap<Long, FileInfo>();
LinkedList<Long> openLedgers = new LinkedList<Long>();
static int OPEN_FILE_LIMIT = 900;
static {
if (System.getProperty("openFileLimit") != null) {
OPEN_FILE_LIMIT = Integer.parseInt(System.getProperty("openFileLimit"));
}
LOG.info("openFileLimit is " + OPEN_FILE_LIMIT);
}
// allocate half of the memory to the page cache
private static int pageLimit = (int)(Runtime.getRuntime().maxMemory() / 3) / LedgerEntryPage.PAGE_SIZE;
static {
LOG.info("maxMemory = " + Runtime.getRuntime().maxMemory());
if (System.getProperty("pageLimit") != null) {
pageLimit = Integer.parseInt(System.getProperty("pageLimit"));
}
LOG.info("pageLimit is " + pageLimit);
}
// The number of pages that have actually been used
private int pageCount;
HashMap<Long, HashMap<Long,LedgerEntryPage>> pages = new HashMap<Long, HashMap<Long,LedgerEntryPage>>();
private void putIntoTable(HashMap<Long, HashMap<Long,LedgerEntryPage>> table, LedgerEntryPage lep) {
HashMap<Long, LedgerEntryPage> map = table.get(lep.getLedger());
if (map == null) {
map = new HashMap<Long, LedgerEntryPage>();
table.put(lep.getLedger(), map);
}
map.put(lep.getFirstEntry(), lep);
}
private static LedgerEntryPage getFromTable(HashMap<Long, HashMap<Long,LedgerEntryPage>> table, Long ledger, Long firstEntry) {
HashMap<Long, LedgerEntryPage> map = table.get(ledger);
if (map != null) {
return map.get(firstEntry);
}
return null;
}
synchronized private LedgerEntryPage getLedgerEntryPage(Long ledger, Long firstEntry, boolean onlyDirty) {
LedgerEntryPage lep = getFromTable(pages, ledger, firstEntry);
try {
if (onlyDirty && lep.isClean()) {
return null;
}
return lep;
} finally {
if (lep != null) {
lep.usePage();
}
}
}
public void putEntryOffset(long ledger, long entry, long offset) throws IOException {
int offsetInPage = (int) (entry%LedgerEntryPage.ENTRIES_PER_PAGES);
// find the id of the first entry of the page that has the entry
// we are looking for
long pageEntry = entry-offsetInPage;
LedgerEntryPage lep = getLedgerEntryPage(ledger, pageEntry, false);
if (lep == null) {
// find a free page
lep = grabCleanPage(ledger, pageEntry);
updatePage(lep);
synchronized(this) {
putIntoTable(pages, lep);
}
}
if (lep != null) {
lep.setOffset(offset, offsetInPage*8);
lep.releasePage();
return;
}
}
public long getEntryOffset(long ledger, long entry) throws IOException {
int offsetInPage = (int) (entry%LedgerEntryPage.ENTRIES_PER_PAGES);
// find the id of the first entry of the page that has the entry
// we are looking for
long pageEntry = entry-offsetInPage;
LedgerEntryPage lep = getLedgerEntryPage(ledger, pageEntry, false);
try {
if (lep == null) {
lep = grabCleanPage(ledger, pageEntry);
synchronized(this) {
putIntoTable(pages, lep);
}
updatePage(lep);
}
return lep.getOffset(offsetInPage*8);
} finally {
if (lep != null) {
lep.releasePage();
}
}
}
static final private String getLedgerName(long ledgerId) {
int parent = (int) (ledgerId & 0xff);
int grandParent = (int) ((ledgerId & 0xff00) >> 8);
StringBuilder sb = new StringBuilder();
sb.append(Integer.toHexString(grandParent));
sb.append('/');
sb.append(Integer.toHexString(parent));
sb.append('/');
sb.append(Long.toHexString(ledgerId));
sb.append(".idx");
return sb.toString();
}
static final private void checkParents(File f) throws IOException {
File parent = f.getParentFile();
if (parent.exists()) {
return;
}
if (parent.mkdirs() == false) {
throw new IOException("Counldn't mkdirs for " + parent);
}
}
static final private Random rand = new Random();
static final private File pickDirs(File dirs[]) {
return dirs[rand.nextInt(dirs.length)];
}
FileInfo getFileInfo(Long ledger, boolean create) throws IOException {
synchronized(fileInfoCache) {
FileInfo fi = fileInfoCache.get(ledger);
if (fi == null) {
String ledgerName = getLedgerName(ledger);
File lf = null;
for(File d: ledgerDirectories) {
lf = new File(d, ledgerName);
if (lf.exists()) {
break;
}
lf = null;
}
if (lf == null) {
if (!create) {
throw new Bookie.NoLedgerException(ledger);
}
File dir = pickDirs(ledgerDirectories);
lf = new File(dir, ledgerName);
checkParents(lf);
}
if (openLedgers.size() > OPEN_FILE_LIMIT) {
fileInfoCache.remove(openLedgers.removeFirst()).close();
}
fi = new FileInfo(lf);
fileInfoCache.put(ledger, fi);
openLedgers.add(ledger);
}
if (fi != null) {
fi.use();
}
return fi;
}
}
private void updatePage(LedgerEntryPage lep) throws IOException {
if (!lep.isClean()) {
throw new IOException("Trying to update a dirty page");
}
FileInfo fi = null;
try {
fi = getFileInfo(lep.getLedger(), true);
long pos = lep.getFirstEntry()*8;
if (pos >= fi.size()) {
lep.zeroPage();
} else {
lep.readPage(fi);
}
} finally {
if (fi != null) {
fi.release();
}
}
}
void flushLedger(boolean doAll) throws IOException {
synchronized(dirtyLedgers) {
if (dirtyLedgers.isEmpty()) {
synchronized(this) {
for(Long l: pages.keySet()) {
if (LOG.isTraceEnabled()) {
LOG.trace("Adding " + Long.toHexString(l) + " to dirty pages");
}
dirtyLedgers.add(l);
}
}
}
if (dirtyLedgers.isEmpty()) {
return;
}
while(!dirtyLedgers.isEmpty()) {
Long l = dirtyLedgers.removeFirst();
LinkedList<Long> firstEntryList;
synchronized(this) {
HashMap<Long, LedgerEntryPage> pageMap = pages.get(l);
if (pageMap == null || pageMap.isEmpty()) {
continue;
}
firstEntryList = new LinkedList<Long>();
for(Map.Entry<Long, LedgerEntryPage> entry: pageMap.entrySet()) {
LedgerEntryPage lep = entry.getValue();
if (lep.isClean()) {
if (LOG.isTraceEnabled()) {
LOG.trace("Page is clean " + lep);
}
continue;
}
firstEntryList.add(lep.getFirstEntry());
}
}
// Now flush all the pages of a ledger
List<LedgerEntryPage> entries = new ArrayList<LedgerEntryPage>(firstEntryList.size());
FileInfo fi = null;
try {
for(Long firstEntry: firstEntryList) {
LedgerEntryPage lep = getLedgerEntryPage(l, firstEntry, true);
if (lep != null) {
entries.add(lep);
}
}
Collections.sort(entries, new Comparator<LedgerEntryPage>() {
@Override
public int compare(LedgerEntryPage o1, LedgerEntryPage o2) {
return (int)(o1.getFirstEntry()-o2.getFirstEntry());
}});
ArrayList<Integer> versions = new ArrayList<Integer>(entries.size());
fi = getFileInfo(l, true);
int start = 0;
long lastOffset = -1;
for(int i = 0; i < entries.size(); i++) {
versions.add(i, entries.get(i).getVersion());
if (lastOffset != -1 && (entries.get(i).getFirstEntry() - lastOffset) != LedgerEntryPage.ENTRIES_PER_PAGES) {
// send up a sequential list
int count = i - start;
if (count == 0) {
System.out.println("Count cannot possibly be zero!");
}
writeBuffers(l, entries, fi, start, count);
start = i;
}
lastOffset = entries.get(i).getFirstEntry();
}
if (entries.size()-start == 0 && entries.size() != 0) {
System.out.println("Nothing to write, but there were entries!");
}
writeBuffers(l, entries, fi, start, entries.size()-start);
synchronized(this) {
for(int i = 0; i < entries.size(); i++) {
LedgerEntryPage lep = entries.get(i);
lep.setClean(versions.get(i));
}
}
} finally {
for(LedgerEntryPage lep: entries) {
lep.releasePage();
}
if (fi != null) {
fi.release();
}
}
if (!doAll) {
break;
}
// Yeild. if we are doing all the ledgers we don't want to block other flushes that
// need to happen
try {
dirtyLedgers.wait(1);
} catch (InterruptedException e) {
// just pass it on
Thread.currentThread().interrupt();
}
}
}
}
private void writeBuffers(Long ledger,
List<LedgerEntryPage> entries, FileInfo fi,
int start, int count) throws IOException {
if (LOG.isTraceEnabled()) {
LOG.trace("Writing " + count + " buffers of " + Long.toHexString(ledger));
}
if (count == 0) {
//System.out.println("Count is zero!");
return;
}
ByteBuffer buffs[] = new ByteBuffer[count];
for(int j = 0; j < count; j++) {
buffs[j] = entries.get(start+j).getPageToWrite();
if (entries.get(start+j).getLedger() != ledger) {
throw new IOException("Writing to " + ledger + " but page belongs to " + entries.get(start+j).getLedger());
}
}
long totalWritten = 0;
while(buffs[buffs.length-1].remaining() > 0) {
long rc = fi.write(buffs, entries.get(start+0).getFirstEntry()*8);
if (rc <= 0) {
throw new IOException("Short write to ledger " + ledger + " rc = " + rc);
}
//System.out.println("Wrote " + rc + " to " + ledger);
totalWritten += rc;
}
if (totalWritten != count*LedgerEntryPage.PAGE_SIZE) {
throw new IOException("Short write to ledger " + ledger + " wrote " + totalWritten + " expected " + count*LedgerEntryPage.PAGE_SIZE);
}
}
private LedgerEntryPage grabCleanPage(long ledger, long entry) throws IOException {
if (entry % LedgerEntryPage.ENTRIES_PER_PAGES != 0) {
throw new IllegalArgumentException(entry + " is not a multiple of " + LedgerEntryPage.ENTRIES_PER_PAGES);
}
synchronized(this) {
if (pageCount < pageLimit) {
// let's see if we can allocate something
LedgerEntryPage lep = new LedgerEntryPage();
lep.setLedger(ledger);
lep.setFirstEntry(entry);
// note, this will not block since it is a new page
lep.usePage();
pageCount++;
return lep;
}
}
outerLoop:
while(true) {
synchronized(cleanLedgers) {
if (cleanLedgers.isEmpty()) {
flushLedger(false);
synchronized(this) {
for(Long l: pages.keySet()) {
cleanLedgers.add(l);
}
}
}
synchronized(this) {
Long cleanLedger = cleanLedgers.getFirst();
Map<Long, LedgerEntryPage> map = pages.get(cleanLedger);
if (map == null || map.isEmpty()) {
cleanLedgers.removeFirst();
continue;
}
Iterator<Map.Entry<Long, LedgerEntryPage>> it = map.entrySet().iterator();
LedgerEntryPage lep = it.next().getValue();
while((lep.inUse() || !lep.isClean())) {
if (it.hasNext()) {
continue outerLoop;
}
lep = it.next().getValue();
}
it.remove();
if (map.isEmpty()) {
pages.remove(lep.getLedger());
}
lep.usePage();
lep.zeroPage();
lep.setLedger(ledger);
lep.setFirstEntry(entry);
return lep;
}
}
}
}
public long getLastEntry(long ledgerId) {
long lastEntry = 0;
// Find the last entry in the cache
synchronized(this) {
Map<Long, LedgerEntryPage> map = pages.get(ledgerId);
if (map != null) {
for(LedgerEntryPage lep: map.values()) {
if (lep.getFirstEntry() + LedgerEntryPage.ENTRIES_PER_PAGES < lastEntry) {
continue;
}
lep.usePage();
long highest = lep.getLastEntry();
if (highest > lastEntry) {
lastEntry = highest;
}
lep.releasePage();
}
}
}
return lastEntry;
}
}