IndexSegment.java example

Explorer
ambry-master
/**
 * Copyright 2016 LinkedIn Corp. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */
package com.github.ambry.store;

import com.github.ambry.config.StoreConfig;
import com.github.ambry.utils.ByteBufferInputStream;
import com.github.ambry.utils.CrcInputStream;
import com.github.ambry.utils.CrcOutputStream;
import com.github.ambry.utils.FilterFactory;
import com.github.ambry.utils.IFilter;
import com.github.ambry.utils.Pair;
import com.github.ambry.utils.Time;
import com.github.ambry.utils.Utils;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentNavigableMap;
import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


/**
 * Represents a segment of an index. The segment is represented by a
 * start offset and an end offset and has the keys sorted. The segment
 * can either be read only and memory mapped or writable and in memory.
 * The segment uses a bloom filter to optimize reads from disk. If the
 * index is read only, a key is searched by doing a binary search on
 * the memory mapped file. If the index is in memory, a normal map
 * lookup is performed to find key.
 */
class IndexSegment {
  static final String INDEX_SEGMENT_FILE_NAME_SUFFIX = "index";
  static final String BLOOM_FILE_NAME_SUFFIX = "bloom";

  private final static int KEY_SIZE_INVALID_VALUE = -1;
  private final static int VALUE_SIZE_INVALID_VALUE = -1;

  private final int VERSION_FIELD_LENGTH = 2;
  private final int KEY_SIZE_FIELD_LENGTH = 4;
  private final int VALUE_SIZE_FIELD_LENGTH = 4;
  private final int CRC_FIELD_LENGTH = 8;
  private final int LOG_END_OFFSET_FIELD_LENGTH = 8;
  private final int LAST_MODIFIED_TIME_FIELD_LENGTH = 8;
  private final int RESET_KEY_TYPE_FIELD_LENGTH = 2;

  private int indexSizeExcludingEntries;
  private int firstKeyRelativeOffset;
  private final String indexSegmentFilenamePrefix;
  private final Offset startOffset;
  private final AtomicReference<Offset> endOffset;
  private final File indexFile;
  private final ReadWriteLock rwLock;
  private final AtomicBoolean mapped;
  private final Logger logger = LoggerFactory.getLogger(getClass());
  private final AtomicLong sizeWritten;
  private final StoreKeyFactory factory;
  private final File bloomFile;
  private final StoreMetrics metrics;
  private final AtomicInteger numberOfItems;
  private final Time time;

  // an approximation of the last modified time.
  private final AtomicLong lastModifiedTimeSec;
  private MappedByteBuffer mmap = null;
  private IFilter bloomFilter;
  private int keySize;
  private int valueSize;
  private short version;
  private Offset prevSafeEndPoint = null;
  // reset key refers to the first StoreKey that is added to the index segment
  private Pair<StoreKey, PersistentIndex.IndexEntryType> resetKey = null;
  protected ConcurrentSkipListMap<StoreKey, IndexValue> index = null;

  /**
   * Creates a new segment
   * @param dataDir The data directory to use for this segment
   * @param startOffset The start {@link Offset} in the {@link Log} that this segment represents.
   * @param factory The store key factory used to create new store keys
   * @param keySize The key size that this segment supports
   * @param valueSize The value size that this segment supports
   * @param config The store config used to initialize the index segment
   * @param time the {@link Time} instance to use
   */
  IndexSegment(String dataDir, Offset startOffset, StoreKeyFactory factory, int keySize, int valueSize,
      StoreConfig config, StoreMetrics metrics, Time time) {
    this.rwLock = new ReentrantReadWriteLock();
    this.startOffset = startOffset;
    this.endOffset = new AtomicReference<>(startOffset);
    index = new ConcurrentSkipListMap<>();
    mapped = new AtomicBoolean(false);
    sizeWritten = new AtomicLong(0);
    this.factory = factory;
    this.keySize = keySize;
    this.valueSize = valueSize;
    this.version = PersistentIndex.CURRENT_VERSION;
    bloomFilter = FilterFactory.getFilter(config.storeIndexMaxNumberOfInmemElements,
        config.storeIndexBloomMaxFalsePositiveProbability);
    numberOfItems = new AtomicInteger(0);
    this.metrics = metrics;
    this.time = time;
    lastModifiedTimeSec = new AtomicLong(time.seconds());
    indexSegmentFilenamePrefix = generateIndexSegmentFilenamePrefix();
    indexFile = new File(dataDir, indexSegmentFilenamePrefix + INDEX_SEGMENT_FILE_NAME_SUFFIX);
    bloomFile = new File(dataDir, indexSegmentFilenamePrefix + BLOOM_FILE_NAME_SUFFIX);
  }

  /**
   * Initializes an existing segment. Memory maps the segment or reads the segment into memory. Also reads the
   * persisted bloom filter from disk.
   * @param indexFile The index file that the segment needs to be initialized from
   * @param shouldMap Indicates if the segment needs to be memory mapped
   * @param factory The store key factory used to create new store keys
   * @param config The store config used to initialize the index segment
   * @param metrics The store metrics used to track metrics
   * @param journal The journal to use
   * @param time the {@link Time} instance to use
   * @throws StoreException
   */
  IndexSegment(File indexFile, boolean shouldMap, StoreKeyFactory factory, StoreConfig config, StoreMetrics metrics,
      Journal journal, Time time) throws StoreException {
    try {
      startOffset = getIndexSegmentStartOffset(indexFile.getName());
      endOffset = new AtomicReference<>(startOffset);
      indexSegmentFilenamePrefix = generateIndexSegmentFilenamePrefix();
      this.indexFile = indexFile;
      this.rwLock = new ReentrantReadWriteLock();
      this.factory = factory;
      this.time = time;
      sizeWritten = new AtomicLong(0);
      numberOfItems = new AtomicInteger(0);
      mapped = new AtomicBoolean(false);
      lastModifiedTimeSec = new AtomicLong(0);
      if (shouldMap) {
        map(false);
        // Load the bloom filter for this index
        // We need to load the bloom filter only for mapped indexes
        bloomFile = new File(indexFile.getParent(), indexSegmentFilenamePrefix + BLOOM_FILE_NAME_SUFFIX);
        CrcInputStream crcBloom = new CrcInputStream(new FileInputStream(bloomFile));
        DataInputStream stream = new DataInputStream(crcBloom);
        bloomFilter = FilterFactory.deserialize(stream);
        long crcValue = crcBloom.getValue();
        if (crcValue != stream.readLong()) {
          // TODO metrics
          // we don't recover the filter. we just by pass the filter. Crc corrections will be done
          // by the scrubber
          bloomFilter = null;
          logger.error("IndexSegment : {} error validating crc for bloom filter for {}", indexFile.getAbsolutePath(),
              bloomFile.getAbsolutePath());
        }
        stream.close();
      } else {
        index = new ConcurrentSkipListMap<StoreKey, IndexValue>();
        bloomFilter = FilterFactory.getFilter(config.storeIndexMaxNumberOfInmemElements,
            config.storeIndexBloomMaxFalsePositiveProbability);
        bloomFile = new File(indexFile.getParent(), indexSegmentFilenamePrefix + BLOOM_FILE_NAME_SUFFIX);
        try {
          readFromFile(indexFile, journal);
        } catch (StoreException e) {
          if (e.getErrorCode() == StoreErrorCodes.Index_Creation_Failure
              || e.getErrorCode() == StoreErrorCodes.Index_Version_Error) {
            // we just log the error here and retain the index so far created.
            // subsequent recovery process will add the missed out entries
            logger.error("Index Segment : {} error while reading from index {}", indexFile.getAbsolutePath(),
                e.getMessage());
          } else {
            throw e;
          }
        }
      }
    } catch (Exception e) {
      throw new StoreException(
          "Index Segment : " + indexFile.getAbsolutePath() + " error while loading index from file", e,
          StoreErrorCodes.Index_Creation_Failure);
    }
    this.metrics = metrics;
  }

  /**
   * @return the name of the log segment that this index segment refers to;
   */
  String getLogSegmentName() {
    return startOffset.getName();
  }

  /**
   * The start offset that this segment represents
   * @return The start offset that this segment represents
   */
  Offset getStartOffset() {
    return startOffset;
  }

  /**
   * The end offset that this segment represents
   * @return The end offset that this segment represents
   */
  Offset getEndOffset() {
    return endOffset.get();
  }

  /**
   * Returns if this segment is mapped or not
   * @return True, if the segment is readonly and mapped. False, otherwise
   */
  boolean isMapped() {
    return mapped.get();
  }

  /**
   * The underlying file that this segment represents
   * @return The file that this segment represents
   */
  File getFile() {
    return indexFile;
  }

  /**
   * The key size in this segment
   * @return The key size in this segment
   */
  int getKeySize() {
    return keySize;
  }

  /**
   * The value size in this segment
   * @return The value size in this segment
   */
  int getValueSize() {
    return valueSize;
  }

  /**
   * The time of last modification of this segment in ms
   * @return The time in ms of the last modification of this segment.
   */
  long getLastModifiedTimeMs() {
    return TimeUnit.SECONDS.toMillis(lastModifiedTimeSec.get());
  }

  /**
   * The time of last modification of this segment in secs
   * @return The time in secs of the last modification of this segment.
   */
  long getLastModifiedTimeSecs() {
    return lastModifiedTimeSec.get();
  }

  /**
   * Sets the last modified time (secs) of this segment.
   * @param lastModifiedTimeSec the value to set to (secs).
   */
  void setLastModifiedTimeSecs(long lastModifiedTimeSec) {
    this.lastModifiedTimeSec.set(lastModifiedTimeSec);
  }

  /**
   * The version of the {@link PersistentIndex} that this {@link IndexSegment} is based on
   * @return the version of the {@link PersistentIndex} that this {@link IndexSegment} is based on
   */
  short getVersion() {
    return version;
  }

  /**
   * The resetKey for the index segment.
   * @return the reset key for the index segment which is a {@link Pair} of StoreKey and
   * {@link PersistentIndex.IndexEntryType}
   */
  Pair<StoreKey, PersistentIndex.IndexEntryType> getResetKey() {
    return resetKey;
  }

  /**
   * Finds an entry given a key. It finds from the in memory map or
   * does a binary search on the mapped persistent segment
   * @param keyToFind The key to find
   * @return The blob index value that represents the key or null if not found
   * @throws StoreException
   */
  IndexValue find(StoreKey keyToFind) throws StoreException {
    IndexValue toReturn = null;
    try {
      rwLock.readLock().lock();
      if (!mapped.get()) {
        IndexValue value = index.get(keyToFind);
        if (value != null) {
          metrics.blobFoundInActiveSegmentCount.inc();
        }
        toReturn = value;
      } else {
        if (bloomFilter != null) {
          metrics.bloomAccessedCount.inc();
        }
        if (bloomFilter == null || bloomFilter.isPresent(ByteBuffer.wrap(keyToFind.toBytes()))) {
          if (bloomFilter == null) {
            logger.trace("IndexSegment {} bloom filter empty. Searching file with start offset {} and for key {}",
                indexFile.getAbsolutePath(), startOffset, keyToFind);
          } else {
            metrics.bloomPositiveCount.inc();
            logger.trace("IndexSegment {} found in bloom filter for index with start offset {} and for key {} ",
                indexFile.getAbsolutePath(), startOffset, keyToFind);
          }
          // binary search on the mapped file
          ByteBuffer duplicate = mmap.duplicate();
          int low = 0;
          int high = numberOfEntries(duplicate) - 1;
          logger.trace("binary search low : {} high : {}", low, high);
          while (low <= high) {
            int mid = (int) (Math.ceil(high / 2.0 + low / 2.0));
            StoreKey found = getKeyAt(duplicate, mid);
            logger.trace("Index Segment {} binary search - key found on iteration {}", indexFile.getAbsolutePath(),
                found);
            int result = found.compareTo(keyToFind);
            if (result == 0) {
              byte[] buf = new byte[valueSize];
              duplicate.get(buf);
              toReturn = new IndexValue(startOffset.getName(), ByteBuffer.wrap(buf), getVersion());
              break;
            } else if (result < 0) {
              low = mid + 1;
            } else {
              high = mid - 1;
            }
          }
          if (bloomFilter != null && toReturn == null) {
            metrics.bloomFalsePositiveCount.inc();
          }
        }
      }
    } catch (IOException e) {
      throw new StoreException("IndexSegment : " + indexFile.getAbsolutePath() + " IO error while searching", e,
          StoreErrorCodes.IOError);
    } finally {
      rwLock.readLock().unlock();
    }
    return toReturn;
  }

  private int numberOfEntries(ByteBuffer mmap) {
    return (mmap.capacity() - indexSizeExcludingEntries) / (keySize + valueSize);
  }

  private StoreKey getKeyAt(ByteBuffer mmap, int index) throws IOException {
    mmap.position(firstKeyRelativeOffset + (index * (keySize + valueSize)));
    return factory.getStoreKey(new DataInputStream(new ByteBufferInputStream(mmap)));
  }

  private int findIndex(StoreKey keyToFind, ByteBuffer mmap) throws IOException {
    // binary search on the mapped file
    int low = 0;
    int high = numberOfEntries(mmap) - 1;
    logger.trace("IndexSegment {} binary search low : {} high : {}", indexFile.getAbsolutePath(), low, high);
    while (low <= high) {
      int mid = (int) (Math.ceil(high / 2.0 + low / 2.0));
      StoreKey found = getKeyAt(mmap, mid);
      logger.trace("IndexSegment {} binary search - key found on iteration {}", indexFile.getAbsolutePath(), found);
      int result = found.compareTo(keyToFind);
      if (result == 0) {
        return mid;
      } else if (result < 0) {
        low = mid + 1;
      } else {
        high = mid - 1;
      }
    }
    return -1;
  }

  /**
   * Adds an entry into the segment. The operation works only if the segment is read/write
   * @param entry The entry that needs to be added to the segment.
   * @param fileEndOffset The file end offset that this entry represents.
   * @throws StoreException
   */
  void addEntry(IndexEntry entry, Offset fileEndOffset) throws StoreException {
    try {
      rwLock.readLock().lock();
      if (mapped.get()) {
        throw new StoreException("IndexSegment : " + indexFile.getAbsolutePath() + " cannot add to a mapped index ",
            StoreErrorCodes.Illegal_Index_Operation);
      }
      logger.trace("IndexSegment {} inserting key - {} value - offset {} size {} ttl {} "
              + "originalMessageOffset {} fileEndOffset {}", indexFile.getAbsolutePath(), entry.getKey(),
          entry.getValue().getOffset(), entry.getValue().getSize(), entry.getValue().getExpiresAtMs(),
          entry.getValue().getOriginalMessageOffset(), fileEndOffset);
      if (index.put(entry.getKey(), entry.getValue()) == null) {
        numberOfItems.incrementAndGet();
        sizeWritten.addAndGet(entry.getKey().sizeInBytes() + entry.getValue().getBytes().capacity());
        bloomFilter.add(ByteBuffer.wrap(entry.getKey().toBytes()));
        if (resetKey == null) {
          resetKey = new Pair<>(entry.getKey(),
              entry.getValue().isFlagSet(IndexValue.Flags.Delete_Index) ? PersistentIndex.IndexEntryType.DELETE
                  : PersistentIndex.IndexEntryType.PUT);
        }
      }
      endOffset.set(fileEndOffset);
      long operationTimeInMs = entry.getValue().getOperationTimeInMs();
      if (operationTimeInMs == Utils.Infinite_Time) {
        lastModifiedTimeSec.set(time.seconds());
      } else if ((operationTimeInMs / Time.MsPerSec) > lastModifiedTimeSec.get()) {
        lastModifiedTimeSec.set(operationTimeInMs / Time.MsPerSec);
      }
      if (keySize == KEY_SIZE_INVALID_VALUE) {
        StoreKey key = entry.getKey();
        keySize = key.sizeInBytes();
        logger.info("IndexSegment : {} setting key size to {} of key {} for index with start offset {}",
            indexFile.getAbsolutePath(), key.sizeInBytes(), key.getLongForm(), startOffset);
      }
      if (valueSize == VALUE_SIZE_INVALID_VALUE) {
        valueSize = entry.getValue().getBytes().capacity();
        logger.info("IndexSegment : {} setting value size to {} for index with start offset {}",
            indexFile.getAbsolutePath(), valueSize, startOffset);
      }
    } finally {
      rwLock.readLock().unlock();
    }
  }

  /**
   * The total size in bytes written to this segment so far
   * @return The total size in bytes written to this segment so far
   */
  long getSizeWritten() {
    try {
      rwLock.readLock().lock();
      if (mapped.get()) {
        throw new UnsupportedOperationException("Operation supported only on umapped indexes");
      }
      return sizeWritten.get();
    } finally {
      rwLock.readLock().unlock();
    }
  }

  /**
   * The number of items contained in this segment
   * @return The number of items contained in this segment
   */
  int getNumberOfItems() {
    try {
      rwLock.readLock().lock();
      if (mapped.get()) {
        throw new UnsupportedOperationException("Operation supported only on unmapped indexes");
      }
      return numberOfItems.get();
    } finally {
      rwLock.readLock().unlock();
    }
  }

  /**
   * Writes the index to a persistent file. Writes the data in the following format in version 1
   *  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   * | version | keysize | valuesize | fileendpointer |  last modified time(in secs) | Reset key | Reset key type
   * |(2 bytes)|(4 bytes)| (4 bytes) |    (8 bytes)   |     (4 bytes)                | (n bytes) |   ( 2 bytes)
   *  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   *                                                   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   *                                                     key 1  | value 1  |  ...  |   key n   | value n   | crc      |
   *                                                   (n bytes)| (n bytes)|       | (n bytes) | (n bytes) | (8 bytes)|
   *                                                   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   *  version            - the index format version
   *  keysize            - the size of the key in this index segment
   *  valuesize          - the size of the value in this index segment
   *  fileendpointer     - the log end pointer that pertains to the index being persisted
   *  last modified time - the last modified time of the index segment in secs
   *  reset key          - the reset key(StoreKey) of the index segment
   *  reset key type     - the reset key index entry type(PUT/DELETE)
   *  key n / value n    - the key and value entries contained in this index segment
   *  crc                - the crc of the index segment content
   *
   * Those that were written in version 0 has the following format
   *  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   * | version | keysize | valuesize | fileendpointer |   key 1  | value 1  |  ...  |   key n   | value n   | crc      |
   * |(2 bytes)|(4 bytes)| (4 bytes) |    (8 bytes)   | (n bytes)| (n bytes)|       | (n bytes) | (n bytes) | (8 bytes)|
   *  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   *  version         - the index format version
   *  keysize         - the size of the key in this index segment
   *  valuesize       - the size of the value in this index segment
   *  fileendpointer  - the log end pointer that pertains to the index being persisted
   *  key n / value n - the key and value entries contained in this index segment
   *  crc             - the crc of the index segment content
   *
   * @param safeEndPoint the end point (that is relevant to this segment) until which the log has been flushed.
   * @throws IOException
   * @throws StoreException
   */
  void writeIndexSegmentToFile(Offset safeEndPoint) throws IOException, StoreException {
    if (safeEndPoint.compareTo(startOffset) <= 0) {
      return;
    }
    if (!safeEndPoint.equals(prevSafeEndPoint)) {
      if (safeEndPoint.compareTo(getEndOffset()) > 0) {
        throw new StoreException(
            "SafeEndOffSet " + safeEndPoint + " is greater than current end offset for current " + "index segment "
                + getEndOffset(), StoreErrorCodes.Illegal_Index_Operation);
      }
      File temp = new File(getFile().getAbsolutePath() + ".tmp");
      FileOutputStream fileStream = new FileOutputStream(temp);
      CrcOutputStream crc = new CrcOutputStream(fileStream);
      DataOutputStream writer = new DataOutputStream(crc);
      try {
        rwLock.readLock().lock();

        // write the current version
        writer.writeShort(getVersion());
        // write key, value size, file end pointer
        writer.writeInt(this.keySize);
        writer.writeInt(this.valueSize);
        writer.writeLong(safeEndPoint.getOffset());
        if (getVersion() == PersistentIndex.VERSION_1) {
          // write last modified time and reset key incase of version 1
          writer.writeLong(lastModifiedTimeSec.get());
          writer.write(resetKey.getFirst().toBytes());
          writer.writeShort(resetKey.getSecond().ordinal());
        }

        // NOTE: In the event of a crash, it is possible that there is a part of the log that is not covered by the
        // index. This happens due to the fact that a DELETE that occurs in the same segment as a PUT overwrites the
        // PUT entry. Consider the following case:-
        // (entries are of the form ID:TYPE:START_OFFSET-END_OFFSET)
        // This is the order of operations
        // A:PUT:0-100
        // B:PUT:101-200
        // A:DELETE:201-250
        // These are the entries in the index segment
        // A:DELETE:201-250
        // B:PUT:101-200
        // If safeEndPoint < 250, then B:PUT:101-200 will be written but not A:DELETE:201-250. If the process were to
        // crash at this point, the index end offset would be 200 and the span 0-100 would not be represented in the
        // index.
        // write the entries
        for (Map.Entry<StoreKey, IndexValue> entry : index.entrySet()) {
          if (entry.getValue().getOffset().getOffset() + entry.getValue().getSize() <= safeEndPoint.getOffset()) {
            writer.write(entry.getKey().toBytes());
            writer.write(entry.getValue().getBytes().array());
            logger.trace("IndexSegment : {} writing key - {} value - offset {} size {} fileEndOffset {}",
                getFile().getAbsolutePath(), entry.getKey(), entry.getValue().getOffset(), entry.getValue().getSize(),
                safeEndPoint);
          }
        }
        prevSafeEndPoint = safeEndPoint;
        long crcValue = crc.getValue();
        writer.writeLong(crcValue);

        // flush and overwrite old file
        fileStream.getChannel().force(true);
        // swap temp file with the original file
        temp.renameTo(getFile());
      } catch (IOException e) {
        throw new StoreException(
            "IndexSegment : " + indexFile.getAbsolutePath() + " IO error while persisting index to disk", e,
            StoreErrorCodes.IOError);
      } finally {
        writer.close();
        rwLock.readLock().unlock();
      }
      logger.trace("IndexSegment : {} completed writing index to file", indexFile.getAbsolutePath());
    }
  }

  /**
   * Memory maps the segment of index. Optionally, it also persist the bloom filter to disk
   * @param persistBloom True, if the bloom filter needs to be persisted. False otherwise.
   * @throws IOException
   * @throws StoreException
   */
  void map(boolean persistBloom) throws IOException, StoreException {
    RandomAccessFile raf = new RandomAccessFile(indexFile, "r");
    rwLock.writeLock().lock();
    try {
      mmap = raf.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, indexFile.length());
      mmap.position(0);
      version = mmap.getShort();
      indexSizeExcludingEntries =
          VERSION_FIELD_LENGTH + KEY_SIZE_FIELD_LENGTH + VALUE_SIZE_FIELD_LENGTH + LOG_END_OFFSET_FIELD_LENGTH
              + CRC_FIELD_LENGTH;
      switch (version) {
        case 0:
          keySize = mmap.getInt();
          valueSize = mmap.getInt();
          endOffset.set(new Offset(startOffset.getName(), mmap.getLong()));
          lastModifiedTimeSec.set(indexFile.lastModified() / 1000);
          firstKeyRelativeOffset = indexSizeExcludingEntries - CRC_FIELD_LENGTH;
          break;
        case 1:
          keySize = mmap.getInt();
          valueSize = mmap.getInt();
          endOffset.set(new Offset(startOffset.getName(), mmap.getLong()));
          lastModifiedTimeSec.set(mmap.getLong());
          StoreKey storeKey = factory.getStoreKey(new DataInputStream(new ByteBufferInputStream(mmap)));
          short resetKeyType = mmap.getShort();
          resetKey = new Pair<>(storeKey, PersistentIndex.IndexEntryType.values()[resetKeyType]);
          indexSizeExcludingEntries +=
              (LAST_MODIFIED_TIME_FIELD_LENGTH + resetKey.getFirst().sizeInBytes() + RESET_KEY_TYPE_FIELD_LENGTH);
          firstKeyRelativeOffset = indexSizeExcludingEntries - CRC_FIELD_LENGTH;
          break;
        default:
          throw new StoreException("IndexSegment : " + indexFile.getAbsolutePath() + " unknown version in index file",
              StoreErrorCodes.Index_Version_Error);
      }
      mapped.set(true);
      index = null;
    } finally {
      raf.close();
      rwLock.writeLock().unlock();
    }
    // we should be fine reading bloom filter here without synchronization as the index is read only
    // we only persist the bloom filter once during its entire lifetime
    if (persistBloom) {
      CrcOutputStream crcStream = new CrcOutputStream(new FileOutputStream(bloomFile));
      DataOutputStream stream = new DataOutputStream(crcStream);
      FilterFactory.serialize(bloomFilter, stream);
      long crcValue = crcStream.getValue();
      stream.writeLong(crcValue);
    }
  }

  /**
   * Reads the index segment from file into an in memory representation
   * @param fileToRead The file to read the index segment from
   * @param journal The journal to use.
   * @throws StoreException
   * @throws IOException
   */
  private void readFromFile(File fileToRead, Journal journal) throws StoreException, IOException {
    logger.info("IndexSegment : {} reading index from file", indexFile.getAbsolutePath());
    index.clear();
    CrcInputStream crcStream = new CrcInputStream(new FileInputStream(fileToRead));
    DataInputStream stream = new DataInputStream(crcStream);
    try {
      version = stream.readShort();
      switch (version) {
        case PersistentIndex.VERSION_0:
        case PersistentIndex.VERSION_1:
          keySize = stream.readInt();
          valueSize = stream.readInt();
          long logEndOffset = stream.readLong();
          indexSizeExcludingEntries =
              VERSION_FIELD_LENGTH + KEY_SIZE_FIELD_LENGTH + VALUE_SIZE_FIELD_LENGTH + LOG_END_OFFSET_FIELD_LENGTH
                  + CRC_FIELD_LENGTH;
          if (version == PersistentIndex.VERSION_0) {
            lastModifiedTimeSec.set(indexFile.lastModified() / 1000);
          } else if (version == PersistentIndex.VERSION_1) {
            lastModifiedTimeSec.set(stream.readLong());
            StoreKey storeKey = factory.getStoreKey(stream);
            short resetKeyType = stream.readShort();
            resetKey = new Pair<>(storeKey, PersistentIndex.IndexEntryType.values()[resetKeyType]);
            indexSizeExcludingEntries +=
                (LAST_MODIFIED_TIME_FIELD_LENGTH + resetKey.getFirst().sizeInBytes() + RESET_KEY_TYPE_FIELD_LENGTH);
          }
          firstKeyRelativeOffset = indexSizeExcludingEntries - CRC_FIELD_LENGTH;
          logger.trace("IndexSegment : {} reading log end offset {} from file", indexFile.getAbsolutePath(),
              logEndOffset);
          long maxEndOffset = Long.MIN_VALUE;
          while (stream.available() > CRC_FIELD_LENGTH) {
            StoreKey key = factory.getStoreKey(stream);
            byte[] value = new byte[valueSize];
            stream.read(value);
            IndexValue blobValue = new IndexValue(startOffset.getName(), ByteBuffer.wrap(value), version);
            long offsetInLogSegment = blobValue.getOffset().getOffset();
            // ignore entries that have offsets outside the log end offset that this index represents
            if (offsetInLogSegment + blobValue.getSize() <= logEndOffset) {
              index.put(key, blobValue);
              logger.trace("IndexSegment : {} putting key {} in index offset {} size {}", indexFile.getAbsolutePath(),
                  key, blobValue.getOffset(), blobValue.getSize());
              // regenerate the bloom filter for in memory indexes
              bloomFilter.add(ByteBuffer.wrap(key.toBytes()));
              // add to the journal
              if (blobValue.getOriginalMessageOffset() != IndexValue.UNKNOWN_ORIGINAL_MESSAGE_OFFSET
                  && offsetInLogSegment != blobValue.getOriginalMessageOffset()
                  && blobValue.getOriginalMessageOffset() >= startOffset.getOffset()) {
                // we add an entry for the original message offset if it is within the same index segment
                journal.addEntry(new Offset(startOffset.getName(), blobValue.getOriginalMessageOffset()), key);
              }
              journal.addEntry(blobValue.getOffset(), key);
              sizeWritten.addAndGet(key.sizeInBytes() + valueSize);
              numberOfItems.incrementAndGet();
              if (offsetInLogSegment + blobValue.getSize() > maxEndOffset) {
                maxEndOffset = offsetInLogSegment + blobValue.getSize();
              }
            } else {
              logger.info(
                  "IndexSegment : {} ignoring index entry outside the log end offset that was not synced logEndOffset "
                      + "{} key {} entryOffset {} entrySize {} entryDeleteState {}", indexFile.getAbsolutePath(),
                  logEndOffset, key, blobValue.getOffset(), blobValue.getSize(),
                  blobValue.isFlagSet(IndexValue.Flags.Delete_Index));
            }
          }
          endOffset.set(new Offset(startOffset.getName(), maxEndOffset));
          logger.trace("IndexSegment : {} setting end offset for index {}", indexFile.getAbsolutePath(), maxEndOffset);
          long crc = crcStream.getValue();
          if (crc != stream.readLong()) {
            // reset structures
            keySize = KEY_SIZE_INVALID_VALUE;
            valueSize = VALUE_SIZE_INVALID_VALUE;
            endOffset.set(startOffset);
            index.clear();
            bloomFilter.clear();
            throw new StoreException("IndexSegment : " + indexFile.getAbsolutePath() + " crc check does not match",
                StoreErrorCodes.Index_Creation_Failure);
          }
          break;
        default:
          throw new StoreException("IndexSegment : " + indexFile.getAbsolutePath() + " invalid version in index file ",
              StoreErrorCodes.Index_Version_Error);
      }
    } catch (IOException e) {
      throw new StoreException("IndexSegment : " + indexFile.getAbsolutePath() + " IO error while reading from file ",
          e, StoreErrorCodes.IOError);
    } finally {
      stream.close();
    }
  }

  /**
   * Gets all the entries upto maxEntries from the start of a given key (exclusive) or all entries if key is null,
   * till maxTotalSizeOfEntriesInBytes
   * @param key The key from where to start retrieving entries.
   *            If the key is null, all entries are retrieved upto maxentries
   * @param findEntriesCondition The condition that determines when to stop fetching entries.
   * @param entries The input entries list that needs to be filled. The entries list can have existing entries
   * @param currentTotalSizeOfEntriesInBytes The current total size in bytes of the entries
   * @return true if any entries were added.
   * @throws IOException
   */
  boolean getEntriesSince(StoreKey key, FindEntriesCondition findEntriesCondition, List<MessageInfo> entries,
      AtomicLong currentTotalSizeOfEntriesInBytes) throws IOException {
    List<IndexEntry> indexEntries = new ArrayList<>();
    boolean isNewEntriesAdded =
        getIndexEntriesSince(key, findEntriesCondition, indexEntries, currentTotalSizeOfEntriesInBytes);
    for (IndexEntry indexEntry : indexEntries) {
      IndexValue value = indexEntry.getValue();
      MessageInfo info =
          new MessageInfo(indexEntry.getKey(), value.getSize(), value.isFlagSet(IndexValue.Flags.Delete_Index),
              value.getExpiresAtMs());
      entries.add(info);
    }
    return isNewEntriesAdded;
  }

  /**
   * Gets all the index entries upto maxEntries from the start of a given key (exclusive) or all entries if key is null,
   * till maxTotalSizeOfEntriesInBytes
   * @param key The key from where to start retrieving entries.
   *            If the key is null, all entries are retrieved upto maxentries
   * @param findEntriesCondition The condition that determines when to stop fetching entries.
   * @param entries The input entries list that needs to be filled. The entries list can have existing entries
   * @param currentTotalSizeOfEntriesInBytes The current total size in bytes of the entries
   * @return true if any entries were added.
   * @throws IOException
   */
  boolean getIndexEntriesSince(StoreKey key, FindEntriesCondition findEntriesCondition, List<IndexEntry> entries,
      AtomicLong currentTotalSizeOfEntriesInBytes) throws IOException {
    if (!findEntriesCondition.proceed(currentTotalSizeOfEntriesInBytes.get(), getLastModifiedTimeSecs())) {
      return false;
    }
    int entriesSizeAtStart = entries.size();
    if (mapped.get()) {
      int index = 0;
      if (key != null) {
        index = findIndex(key, mmap.duplicate());
      }
      if (index != -1) {
        ByteBuffer readBuf = mmap.duplicate();
        int totalEntries = numberOfEntries(readBuf);
        while (findEntriesCondition.proceed(currentTotalSizeOfEntriesInBytes.get(), getLastModifiedTimeSecs())
            && index < totalEntries) {
          StoreKey newKey = getKeyAt(readBuf, index);
          byte[] buf = new byte[valueSize];
          readBuf.get(buf);
          // we include the key in the final list if it is not the initial key or if the initial key was null
          if (key == null || newKey.compareTo(key) != 0) {
            IndexValue newValue = new IndexValue(startOffset.getName(), ByteBuffer.wrap(buf), getVersion());
            entries.add(new IndexEntry(newKey, newValue));
            currentTotalSizeOfEntriesInBytes.addAndGet(newValue.getSize());
          }
          index++;
        }
      } else {
        logger.error("IndexSegment : " + indexFile.getAbsolutePath() + " index not found for key " + key);
      }
    } else if (key == null || index.containsKey(key)) {
      ConcurrentNavigableMap<StoreKey, IndexValue> tempMap = index;
      if (key != null) {
        tempMap = index.tailMap(key, true);
      }
      for (Map.Entry<StoreKey, IndexValue> entry : tempMap.entrySet()) {
        if (key == null || entry.getKey().compareTo(key) != 0) {
          IndexValue newValue = new IndexValue(startOffset.getName(), entry.getValue().getBytes(), getVersion());
          entries.add(new IndexEntry(entry.getKey(), newValue));
          currentTotalSizeOfEntriesInBytes.addAndGet(entry.getValue().getSize());
          if (!findEntriesCondition.proceed(currentTotalSizeOfEntriesInBytes.get(), getLastModifiedTimeSecs())) {
            break;
          }
        }
      }
    } else {
      logger.error("IndexSegment : " + indexFile.getAbsolutePath() + " key not found: " + key);
    }
    return entries.size() > entriesSizeAtStart;
  }

  /**
   * @return the prefix for the index segment file name (also used for bloom filter file name).
   */
  private String generateIndexSegmentFilenamePrefix() {
    String logSegmentName = startOffset.getName();
    StringBuilder filenamePrefix = new StringBuilder(logSegmentName);
    if (!logSegmentName.isEmpty()) {
      filenamePrefix.append(BlobStore.SEPARATOR);
    }
    return filenamePrefix.append(startOffset.getOffset()).append(BlobStore.SEPARATOR).toString();
  }

  /**
   * Gets the start {@link Offset} in the {@link Log} that the index with file name {@code filename} represents.
   * @param filename the name of the index file.
   * @return the start {@link Offset} in the {@link Log} that the index with file name {@code filename} represents.
   */
  static Offset getIndexSegmentStartOffset(String filename) {
    // file name pattern for index is {logSegmentName}_{offset}_index.
    // If the logSegment name is empty, then the file name pattern is {offset}_index.
    String logSegmentName;
    String startOffsetValue;
    int firstSepIdx = filename.indexOf(BlobStore.SEPARATOR);
    int lastSepIdx = filename.lastIndexOf(BlobStore.SEPARATOR);
    if (firstSepIdx == lastSepIdx) {
      // pattern is offset_index.
      logSegmentName = "";
      startOffsetValue = filename.substring(0, firstSepIdx);
    } else {
      // pattern is logSegmentName_offset_index.
      int lastButOneSepIdx = filename.substring(0, lastSepIdx).lastIndexOf(BlobStore.SEPARATOR);
      logSegmentName = filename.substring(0, lastButOneSepIdx);
      startOffsetValue = filename.substring(lastButOneSepIdx + 1, lastSepIdx);
    }
    return new Offset(logSegmentName, Long.parseLong(startOffsetValue));
  }
}