SingleFileIndexDirectory.java example

Explorer
pinot-master
/**
 * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.linkedin.pinot.core.segment.store;

import com.google.common.base.Preconditions;
import com.linkedin.pinot.common.segment.ReadMode;
import com.linkedin.pinot.core.segment.index.SegmentMetadataImpl;
import com.linkedin.pinot.core.segment.memory.PinotDataBuffer;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.configuration.PropertiesConfiguration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


// There are a couple of un-addressed issues right now
//
// thread-safety : methods in this class are not thread safe. External synchronization
// is required. This will be addressed soon
//
// ACID: Various failures can lead to inconsistency. We will rely on retrieving segments
// in case of failures. Some parts of this will improve in future but there will be
// no complete ACID guarantee
//
// TODO/Missing features:
// newBuffer : opening new buffer maps a new buffer separately. User can avoid
// it by making all the write calls followed by reads.
// Remove index: Ability to remove an index (particularly inverted index)
// Abort writes: There is no way to abort discard changes
//
class SingleFileIndexDirectory extends ColumnIndexDirectory {
  private static Logger LOGGER = LoggerFactory.getLogger(SingleFileIndexDirectory.class);

  private static final String DEFAULT_INDEX_FILE_NAME = "columns.psf";
  private static final String INDEX_MAP_FILE = "index_map";
  private static final long MAGIC_MARKER = 0xdeadbeefdeafbeadL;
  private static final int MAGIC_MARKER_SIZE_BYTES = 8;
  private static final String MAP_KEY_SEPARATOR = ".";
  private static final String MAP_KEY_NAME_START_OFFSET = "startOffset";
  private static final String MAP_KEY_NAME_SIZE = "size";

  // Max size of buffer we want to allocate
  // ByteBuffer limits the size to 2GB - (some platform dependent size)
  // This breaks the abstraction with PinotDataBuffer....a workaround for
  // now till PinotDataBuffer can support large buffers again
  private static final int MAX_ALLOCATION_SIZE = 2000 * 1024 * 1024;

  private File indexFile;
  private Map<IndexKey, IndexEntry> columnEntries;
  private List<PinotDataBuffer> allocBuffers;

  public SingleFileIndexDirectory(File segmentDirectory, SegmentMetadataImpl metadata, ReadMode readMode)
      throws IOException, ConfigurationException {
    super(segmentDirectory, metadata, readMode);
    indexFile = new File(segmentDirectory, DEFAULT_INDEX_FILE_NAME);
    if (! indexFile.exists()) {
      indexFile.createNewFile();
    }
    columnEntries = new HashMap<>(metadata.getAllColumns().size());
    allocBuffers = new ArrayList<>();
    load() ;
  }

  @Override
  public PinotDataBuffer getDictionaryBufferFor(String column)
      throws IOException {
    return checkAndGetIndexBuffer(column, ColumnIndexType.DICTIONARY);
  }

  @Override
  public PinotDataBuffer getForwardIndexBufferFor(String column)
      throws IOException {
    return checkAndGetIndexBuffer(column, ColumnIndexType.FORWARD_INDEX);
  }

  @Override
  public PinotDataBuffer getInvertedIndexBufferFor(String column)
      throws IOException {
    return checkAndGetIndexBuffer(column, ColumnIndexType.INVERTED_INDEX);
  }

  @Override
  public boolean hasIndexFor(String column, ColumnIndexType type) {
    IndexKey key = new IndexKey(column, type);
    return columnEntries.containsKey(key);
  }

  @Override
  public PinotDataBuffer newDictionaryBuffer(String column, int sizeBytes)
      throws IOException {
    return allocNewBufferInternal(column, ColumnIndexType.DICTIONARY, sizeBytes, "dictionary.create");
  }

  @Override
  public PinotDataBuffer newForwardIndexBuffer(String column, int sizeBytes)
      throws IOException {
    return allocNewBufferInternal(column, ColumnIndexType.FORWARD_INDEX, sizeBytes, "forward_index.create");
  }

  @Override
  public PinotDataBuffer newInvertedIndexBuffer(String column, int sizeBytes)
      throws IOException {
    return  allocNewBufferInternal(column, ColumnIndexType.INVERTED_INDEX, sizeBytes, "inverted_index.create");
  }

  private PinotDataBuffer checkAndGetIndexBuffer(String column, ColumnIndexType type) {
    IndexKey key = new IndexKey(column, type);
    IndexEntry entry = columnEntries.get(key);
    if (entry == null || entry.buffer == null) {
      throw new RuntimeException("Could not find index for column: " + column + ", type: " + type +
          ", segment: " + segmentDirectory.toString());
    }
    return entry.buffer;
  }

  // This is using extra resources right now which can be changed.
  private PinotDataBuffer allocNewBufferInternal(String column, ColumnIndexType indexType, int size,
      String context)
      throws IOException {

    IndexKey key = new IndexKey(column, indexType);
    checkKeyNotPresent(key);

    String allocContext = allocationContext(key) + context;
    IndexEntry entry = new IndexEntry(key);
    entry.startOffset = indexFile.length();
    entry.size = size + MAGIC_MARKER_SIZE_BYTES;

    // read-mode is always mmap so that buffer changes are synced
    // to the file
    PinotDataBuffer appendBuffer = PinotDataBuffer.fromFile(indexFile,
        entry.startOffset,
        entry.size,
        ReadMode.mmap,
        FileChannel.MapMode.READ_WRITE,
        allocContext);

    LOGGER.debug("Allotted buffer for key: {}, startOffset: {}, size: {}", key, entry.startOffset, entry.size);
    appendBuffer.putLong(0, MAGIC_MARKER);
    allocBuffers.add(appendBuffer);

    entry.buffer = appendBuffer.view(0 + MAGIC_MARKER_SIZE_BYTES, entry.size);
    columnEntries.put(key, entry);

    persistIndexMap(entry);

    return entry.buffer.duplicate();
  }

  private void checkKeyNotPresent(IndexKey key) {
    if (columnEntries.containsKey(key)) {
      throw new RuntimeException("Attempt to re-create an existing index for key: " + key.toString()
          + ", for segmentDirectory: " + segmentDirectory.getAbsolutePath());
    }
  }

  private void validateMagicMarker(PinotDataBuffer buffer, int startOffset) {
    long actualMarkerValue = buffer.getLong(startOffset);
    if (actualMarkerValue != MAGIC_MARKER) {
      LOGGER.error("Missing magic marker in index file: {} at position: {}",
          indexFile, startOffset);
      throw new RuntimeException("Inconsistent data read. Index data file " +
          indexFile.toString() + " is possibly corrupted");
    }
  }

  private void load()
      throws IOException, ConfigurationException {
    loadMap();
    mapBufferEntries();
  }

  private void loadMap()
      throws ConfigurationException {
    File mapFile = new File(segmentDirectory, INDEX_MAP_FILE);

    PropertiesConfiguration mapConfig = new PropertiesConfiguration(mapFile);
    Iterator keys = mapConfig.getKeys();
    while (keys.hasNext()) {
      String key = (String) keys.next();
      // column names can have '.' in it hence scan from backwards
      // parsing names like "column.name.dictionary.startOffset"
      // or, "column.name.dictionary.endOffset" where column.name is the key
      int lastSeparatorPos = key.lastIndexOf(MAP_KEY_SEPARATOR);
      Preconditions.checkState(lastSeparatorPos != -1, "Key separator not found: " + key +
          ", segment: " + segmentDirectory);
      String propertyName = key.substring(lastSeparatorPos + 1);

      int indexSeparatorPos = key.lastIndexOf(MAP_KEY_SEPARATOR, lastSeparatorPos-1);
      Preconditions.checkState(indexSeparatorPos != -1, "Index separator not found: " + key +
          " , segment: " + segmentDirectory);
      String indexName = key.substring(indexSeparatorPos + 1, lastSeparatorPos);
      String columnName = key.substring(0, indexSeparatorPos);
      IndexKey indexKey = new IndexKey(columnName, ColumnIndexType.getValue(indexName));
      IndexEntry entry = columnEntries.get(indexKey);
      if (entry == null) {
        entry = new IndexEntry(indexKey);
        columnEntries.put(indexKey, entry);
      }

      if (propertyName.equals(MAP_KEY_NAME_START_OFFSET)) {
        entry.startOffset = mapConfig.getLong(key);
      } else if (propertyName.equals(MAP_KEY_NAME_SIZE)) {
        entry.size = mapConfig.getLong(key);
      } else {
        throw new ConfigurationException("Invalid map file key: " + key +
            ", segmentDirectory: " + segmentDirectory.toString());
      }
    }

    // validation
    for (Map.Entry<IndexKey, IndexEntry> colIndexEntry : columnEntries.entrySet()) {
      IndexEntry entry = colIndexEntry.getValue();
      if (entry.size < 0 || entry.startOffset < 0) {
        throw new ConfigurationException("Invalid map entry for key: " + colIndexEntry.getKey().toString() +
            ", segment: " + segmentDirectory.toString());
      }
    }
  }

  private void mapBufferEntries()
      throws IOException {
    SortedMap<Long, IndexEntry> indexStartMap = new TreeMap<>();

    for (Map.Entry<IndexKey, IndexEntry> columnEntry : columnEntries.entrySet()) {
      long startOffset = columnEntry.getValue().startOffset;
      indexStartMap.put(startOffset, columnEntry.getValue());
    }

    long runningSize = 0;
    List<Long> offsetAccum = new ArrayList<>();
    for (Map.Entry<Long, IndexEntry> offsetEntry : indexStartMap.entrySet()) {
      IndexEntry entry = offsetEntry.getValue();
      runningSize += entry.size;

      if ( runningSize >= MAX_ALLOCATION_SIZE) {
        mapAndSliceFile(indexStartMap, offsetAccum, offsetEntry.getKey());
        runningSize = entry.size;
        offsetAccum.clear();
      }
      offsetAccum.add(offsetEntry.getKey());
    }

    if (offsetAccum.size() > 0) {
      mapAndSliceFile(indexStartMap, offsetAccum, offsetAccum.get(0) + runningSize);
    }
  }

  private void mapAndSliceFile(SortedMap<Long, IndexEntry> startOffsets, List<Long> offsetAccum, long endOffset)
      throws IOException {
    Preconditions.checkNotNull(startOffsets);
    Preconditions.checkNotNull(offsetAccum);
    Preconditions.checkArgument(offsetAccum.size() >= 1);

    long fromFilePos = offsetAccum.get(0);
    long toFilePos = endOffset - fromFilePos;

    String context = allocationContext(indexFile, "single_file_index.rw." +
        "." + String.valueOf(fromFilePos) + "." + String.valueOf(toFilePos));

    PinotDataBuffer buffer = PinotDataBuffer.fromFile(indexFile, fromFilePos, toFilePos, readMode,
        FileChannel.MapMode.READ_WRITE, context);
    allocBuffers.add(buffer);

    int prevSlicePoint = 0;
    for (Long fileOffset : offsetAccum) {
      IndexEntry entry = startOffsets.get(fileOffset);
      int endSlicePoint = prevSlicePoint + (int) entry.size;
      validateMagicMarker(buffer, prevSlicePoint);
      PinotDataBuffer viewBuffer = buffer.view(prevSlicePoint + MAGIC_MARKER_SIZE_BYTES, endSlicePoint);
      entry.buffer = viewBuffer;
      prevSlicePoint = endSlicePoint;
    }
  }

  private void persistIndexMap(IndexEntry entry)
      throws IOException {
    File mapFile = new File(segmentDirectory, INDEX_MAP_FILE);
    try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter(mapFile, true)))) {
      String startKey = getKey(entry.key.name, entry.key.type.getIndexName(), true);

      StringBuilder sb = new StringBuilder();
      sb.append(startKey).append(" = ").append(entry.startOffset);
      writer.println(sb.toString());

      String endKey = getKey(entry.key.name, entry.key.type.getIndexName(), false);
      sb = new StringBuilder();
      sb.append(endKey).append(" = ").append(entry.size);
      writer.println(sb.toString());
    }
  }

  private String getKey(String column, String indexName, boolean isStartOffset) {
    return column + MAP_KEY_SEPARATOR + indexName + MAP_KEY_SEPARATOR + (isStartOffset ? "startOffset" : "size");
  }

  private String allocationContext(IndexKey key) {
    return this.getClass().getSimpleName() + key.toString();
  }

  @Override
  public void close() {
    for (PinotDataBuffer buf : allocBuffers) {
      buf.close();
    }
    columnEntries.clear();
    allocBuffers.clear();
  }

  @Override
  public void removeIndex(String columnName, ColumnIndexType indexType) {
    throw new UnsupportedOperationException("Index removal is not supported for single file index format. Requested colum: "
        + columnName + " indexType: " + indexType);
  }

  @Override
  public boolean isIndexRemovalSupported() {
    return false;
  }

  @Override
  public String toString(){
    return segmentDirectory.toString() + "/" + indexFile.toString();
  }
}