/** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * */ package org.apache.bookkeeper.bookie; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.stats.Gauge; import org.apache.bookkeeper.stats.StatsLogger; import org.apache.bookkeeper.util.DirectMemoryUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ConcurrentSkipListSet; import java.util.concurrent.atomic.AtomicInteger; import static org.apache.bookkeeper.bookie.BookKeeperServerStats.NUM_INDEX_PAGES; class IndexInMemPageMgr { private final static Logger LOG = LoggerFactory.getLogger(IndexInMemPageMgr.class); private final static ConcurrentHashMap<Long, LedgerEntryPage> EMPTY_PAGE_MAP = new ConcurrentHashMap<Long, LedgerEntryPage>(); private static class InMemPageCollection implements LEPStateChangeCallback { ConcurrentMap<Long, ConcurrentMap<Long,LedgerEntryPage>> pages; Map<EntryKey, LedgerEntryPage> lruCleanPageMap; public InMemPageCollection() { pages = new ConcurrentHashMap<Long, ConcurrentMap<Long,LedgerEntryPage>>(); lruCleanPageMap = Collections.synchronizedMap(new LinkedHashMap<EntryKey, LedgerEntryPage>(16, 0.75f, true)); } /** * Retrieve the LedgerEntryPage corresponding to the ledger and firstEntry * * @param ledgerId * Ledger id * @param firstEntry * Id of the first entry in the page * @returns LedgerEntryPage if present */ private LedgerEntryPage getPage(long ledgerId, long firstEntry) { ConcurrentMap<Long, LedgerEntryPage> map = pages.get(ledgerId); if (null != map) { return map.get(firstEntry); } return null; } /** * Add a LedgerEntryPage to the page map * * @param lep * Ledger Entry Page object */ private LedgerEntryPage putPage(LedgerEntryPage lep) { // Do a get here to avoid too many new ConcurrentHashMaps() as putIntoTable is called frequently. ConcurrentMap<Long, LedgerEntryPage> map = pages.get(lep.getLedger()); if (null == map) { ConcurrentMap<Long, LedgerEntryPage> mapToPut = new ConcurrentHashMap<Long, LedgerEntryPage>(); map = pages.putIfAbsent(lep.getLedger(), mapToPut); if (null == map) { map = mapToPut; } } LedgerEntryPage oldPage = map.putIfAbsent(lep.getFirstEntry(), lep); if (null == oldPage) { oldPage = lep; // Also include this in the clean page map if it qualifies. // Note: This is done for symmetry and correctness, however it should never // get exercised since we shouldn't attempt a put without the page being in use addToCleanPagesList(lep); } return oldPage; } /** * Traverse the pages for a given ledger in memory and find the highest * entry amongst these pages * * @param ledgerId * Ledger id * @returns last entry in the in memory pages */ private long getLastEntryInMem(long ledgerId) { long lastEntry = 0; // Find the last entry in the cache ConcurrentMap<Long, LedgerEntryPage> map = pages.get(ledgerId); if (map != null) { for(LedgerEntryPage lep: map.values()) { if (lep.getMaxPossibleEntry() < lastEntry) { continue; } lep.usePage(); long highest = lep.getLastEntry(); if (highest > lastEntry) { lastEntry = highest; } lep.releasePage(); } } return lastEntry; } /** * Removes ledger entry pages for a given ledger * * @param ledgerId * Ledger id * @returns number of pages removed */ private int removeEntriesForALedger(long ledgerId) { // remove pages first to avoid page flushed when deleting file info ConcurrentMap<Long, LedgerEntryPage> lPages = pages.remove(ledgerId); if (null != lPages) { for (long entryId: lPages.keySet()) { synchronized(lruCleanPageMap) { lruCleanPageMap.remove(new EntryKey(ledgerId, entryId)); } } return lPages.size(); } return 0; } /** * Gets the list of pages in memory that have been changed and hence need to * be written as a part of the flush operation that is being issued * * @param ledgerId * Ledger id * @returns last entry in the in memory pages. */ private LinkedList<Long> getFirstEntryListToBeFlushed(long ledgerId) { ConcurrentMap<Long, LedgerEntryPage> pageMap = pages.get(ledgerId); if (pageMap == null || pageMap.isEmpty()) { return null; } LinkedList<Long> firstEntryList = new LinkedList<Long>(); for(ConcurrentMap.Entry<Long, LedgerEntryPage> entry: pageMap.entrySet()) { LedgerEntryPage lep = entry.getValue(); if (lep.isClean()) { if (!lep.inUse()) { addToCleanPagesList(lep); } if (LOG.isTraceEnabled()) { LOG.trace("Page is clean " + lep); } } else { firstEntryList.add(lep.getFirstEntry()); } } return firstEntryList; } /** * Add the LedgerEntryPage to the clean page LRU map * * @param lep * Ledger Entry Page object */ private void addToCleanPagesList(LedgerEntryPage lep) { synchronized(lruCleanPageMap) { if (lep.isClean() && !lep.inUse()) { lruCleanPageMap.put(lep.getEntryKey(), lep); } } } /** * Remove the LedgerEntryPage from the clean page LRU map * * @param lep * Ledger Entry Page object */ private void removeFromCleanPageList(LedgerEntryPage lep) { synchronized(lruCleanPageMap) { if (!lep.isClean() || lep.inUse()) { lruCleanPageMap.remove(lep.getEntryKey()); } } } /** * Get the set of active ledgers * */ Set<Long> getActiveLedgers() { return pages.keySet(); } /** * Get a clean page and provision it for the specified ledger and firstEntry within * the ledger * * @param ledgerId * Ledger id * @param firstEntry * Id of the first entry in the page * @returns LedgerEntryPage if present */ LedgerEntryPage grabCleanPage(long ledgerId, long firstEntry) { LedgerEntryPage lep = null; while (lruCleanPageMap.size() > 0) { lep = null; synchronized(lruCleanPageMap) { Iterator<Map.Entry<EntryKey,LedgerEntryPage>> iterator = lruCleanPageMap.entrySet().iterator(); Map.Entry<EntryKey,LedgerEntryPage> entry = null; while (iterator.hasNext()) { entry = iterator.next(); iterator.remove(); if (entry.getValue().isClean() && !entry.getValue().inUse()) { lep = entry.getValue(); break; } } if (null == lep) { LOG.debug("Did not find eligible page in the first pass"); return null; } } // We found a candidate page, lets see if we can reclaim it before its re-used ConcurrentMap<Long, LedgerEntryPage> pageMap = pages.get(lep.getLedger()); // Remove from map only if nothing has changed since we checked this lep. // Its possible for the ledger to have been deleted or the page to have already // been reclaimed. The page map is the definitive source of information, if anything // has changed we should leave this page along and continue iterating to find // another suitable page. if ((null != pageMap) && (pageMap.remove(lep.getFirstEntry(), lep))) { if (!lep.isClean()) { // Someone wrote to this page while we were reclaiming it. pageMap.put(lep.getFirstEntry(), lep); lep = null; } else { // Do some bookkeeping on the page table pages.remove(lep.getLedger(), EMPTY_PAGE_MAP); // We can now safely reset this lep and return it. lep.usePage(); lep.zeroPage(); lep.setLedgerAndFirstEntry(ledgerId, firstEntry); return lep; } } else { lep = null; } } return lep; } @Override public void onSetInUse(LedgerEntryPage lep) { removeFromCleanPageList(lep); } @Override public void onResetInUse(LedgerEntryPage lep) { addToCleanPagesList(lep); } @Override public void onSetClean(LedgerEntryPage lep) { addToCleanPagesList(lep); } @Override public void onSetDirty(LedgerEntryPage lep) { removeFromCleanPageList(lep); } } final int pageSize; final int entriesPerPage; final int pageLimit; final InMemPageCollection pageMapAndList; // The number of pages that have actually been used private final AtomicInteger pageCount = new AtomicInteger(0); // The persistence manager that this page manager uses to // flush and read pages private final IndexPersistenceMgr indexPersistenceManager; /** * the list of potentially dirty ledgers */ private final ConcurrentLinkedQueue<Long> ledgersToFlush = new ConcurrentLinkedQueue<Long>(); private final ConcurrentSkipListSet<Long> ledgersFlushing = new ConcurrentSkipListSet<Long>(); public IndexInMemPageMgr(int pageSize, int entriesPerPage, ServerConfiguration conf, IndexPersistenceMgr indexPersistenceManager, StatsLogger statsLogger) { this.pageSize = pageSize; this.entriesPerPage = entriesPerPage; this.indexPersistenceManager = indexPersistenceManager; this.pageMapAndList = new InMemPageCollection(); long maxDirectMemory = DirectMemoryUtils.maxDirectMemory(); if (conf.getPageLimit() <= 0) { // By default, allocate a third of the direct memory to the page cache this.pageLimit = (int) ((maxDirectMemory / 3) / this.pageSize); } else { this.pageLimit = conf.getPageLimit(); } LOG.info("maxDirectMemory = {}, pageSize = {}, pageLimit = {}", new Object[] { maxDirectMemory, pageSize, pageLimit }); // Expose Stats statsLogger.registerGauge(NUM_INDEX_PAGES, new Gauge<Number>() { @Override public Number getDefaultValue() { return 0; } @Override public Number getSample() { return getNumUsedPages(); } }); } /** * @return page size used in ledger cache */ public int getPageSize() { return pageSize; } /** * @return entries per page used in ledger cache */ public int getEntriesPerPage() { return entriesPerPage; } /** * @return page limitation in ledger cache */ public int getPageLimit() { return pageLimit; } /** * @return number of page used in ledger cache */ public int getNumUsedPages() { return pageCount.get(); } LedgerEntryPage getLedgerEntryPage(Long ledger, Long firstEntry, boolean onlyDirty) { LedgerEntryPage lep = pageMapAndList.getPage(ledger, firstEntry); if (onlyDirty && null != lep && lep.isClean()) { return null; } if (null != lep) { lep.usePage(); } return lep; } /** * Grab ledger entry page whose first entry is <code>pageEntry</code>. * * If the page doesn't existed before, we allocate a memory page. * Otherwise, we grab a clean page and read it from disk. * * @param ledger * Ledger Id * @param pageEntry * Start entry of this entry page. */ private LedgerEntryPage grabLedgerEntryPage(long ledger, long pageEntry) throws IOException { LedgerEntryPage lep = grabCleanPage(ledger, pageEntry); try { // should get the up to date page from the persistence manager // before we put it into table otherwise we would put // an empty page in it indexPersistenceManager.updatePage(lep); LedgerEntryPage oldLep; if (lep != (oldLep = pageMapAndList.putPage(lep))) { lep.releasePage(); // Decrement the page count because we couldn't put this lep in the page cache. pageCount.decrementAndGet(); // Increment the use count of the old lep because this is unexpected oldLep.usePage(); lep = oldLep; } } catch (IOException ie) { // if we grab a clean page, but failed to update the page // we are exhausting the count of ledger entry pages. // since this page will be never used, so we need to decrement // page count of ledger cache. lep.releasePage(); pageCount.decrementAndGet(); throw ie; } return lep; } void removePagesForLedger(long ledgerId) { int removedPageCount = pageMapAndList.removeEntriesForALedger(ledgerId); if (pageCount.addAndGet(-removedPageCount) < 0) { throw new RuntimeException("Page count of ledger cache has been decremented to be less than zero."); } ledgersToFlush.remove(ledgerId); } long getLastEntryInMem(long ledgerId) { return pageMapAndList.getLastEntryInMem(ledgerId); } private LedgerEntryPage grabCleanPage(long ledger, long entry) throws IOException { if (entry % entriesPerPage != 0) { throw new IllegalArgumentException(entry + " is not a multiple of " + entriesPerPage); } while(true) { boolean canAllocate = false; if (pageCount.incrementAndGet() <= pageLimit) { canAllocate = true; } else { pageCount.decrementAndGet(); } if (canAllocate) { LedgerEntryPage lep = new LedgerEntryPage(pageSize, entriesPerPage, pageMapAndList); lep.setLedgerAndFirstEntry(ledger, entry); lep.usePage(); return lep; } LedgerEntryPage lep = pageMapAndList.grabCleanPage(ledger, entry); if (null != lep) { return lep; } LOG.info("Could not grab a clean page for ledger {}, entry {}, force flushing dirty ledgers.", ledger, entry); flushOneOrMoreLedgers(false); } } void flushOneOrMoreLedgers(boolean doAll) throws IOException { if (ledgersToFlush.isEmpty()) { ledgersToFlush.addAll(pageMapAndList.getActiveLedgers()); } Long potentiallyDirtyLedger; while (null != (potentiallyDirtyLedger = ledgersToFlush.poll())) { if (!ledgersFlushing.add(potentiallyDirtyLedger)) { continue; } try { flushSpecificLedger(potentiallyDirtyLedger); } finally { ledgersFlushing.remove(potentiallyDirtyLedger); } if (!doAll) { break; } } } /** * Flush a specified ledger * * @param ledger * Ledger Id * @throws IOException */ private void flushSpecificLedger(long ledger) throws IOException { LinkedList<Long> firstEntryList = pageMapAndList.getFirstEntryListToBeFlushed(ledger); // flush ledger index file header if necessary indexPersistenceManager.flushLedgerHeader(ledger); if (null == firstEntryList || firstEntryList.size() == 0) { LOG.debug("Nothing to flush for ledger {}.", ledger); // nothing to do return; } // Now flush all the pages of a ledger List<LedgerEntryPage> entries = new ArrayList<LedgerEntryPage>(firstEntryList.size()); try { for(Long firstEntry: firstEntryList) { LedgerEntryPage lep = getLedgerEntryPage(ledger, firstEntry, true); if (lep != null) { entries.add(lep); } } indexPersistenceManager.flushLedgerEntries(ledger, entries); } finally { for(LedgerEntryPage lep: entries) { lep.releasePage(); } } } void putEntryOffset(long ledger, long entry, long offset) throws IOException { int offsetInPage = (int) (entry % entriesPerPage); // find the id of the first entry of the page that has the entry // we are looking for long pageEntry = entry - offsetInPage; LedgerEntryPage lep = getLedgerEntryPage(ledger, pageEntry, false); if (lep == null) { lep = grabLedgerEntryPage(ledger, pageEntry); } assert lep != null; lep.setOffset(offset, offsetInPage * LedgerEntryPage.getIndexEntrySize()); lep.releasePage(); } long getEntryOffset(long ledger, long entry) throws IOException { int offsetInPage = (int) (entry % entriesPerPage); // find the id of the first entry of the page that has the entry // we are looking for long pageEntry = entry - offsetInPage; LedgerEntryPage lep = getLedgerEntryPage(ledger, pageEntry, false); try { if (lep == null) { lep = grabLedgerEntryPage(ledger, pageEntry); } return lep.getOffset(offsetInPage * LedgerEntryPage.getIndexEntrySize()); } finally { if (lep != null) { lep.releasePage(); } } } }