StarTreeDataTable.java example

Explorer
pinot-master
/**
 * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.linkedin.pinot.core.startree;

import com.linkedin.pinot.common.utils.Pairs.IntPair;
import com.linkedin.pinot.core.segment.creator.impl.V1Constants;
import it.unimi.dsi.fastutil.Arrays;
import it.unimi.dsi.fastutil.Swapper;
import it.unimi.dsi.fastutil.ints.Int2ObjectLinkedOpenHashMap;
import it.unimi.dsi.fastutil.ints.Int2ObjectMap;
import it.unimi.dsi.fastutil.ints.IntComparator;
import java.io.File;
import java.io.IOException;
import java.nio.ByteOrder;
import java.util.Iterator;
import org.apache.commons.lang3.tuple.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import xerial.larray.buffer.LBuffer;
import xerial.larray.buffer.LBufferAPI;
import xerial.larray.mmap.MMapBuffer;
import xerial.larray.mmap.MMapMode;


/**
 * The StarTreeDataTable should be able to handle the memory range greater than 2GB.
 * As a result, all fields related to memory position should be declared as long to avoid int overflow.
 */
public class StarTreeDataTable {
  private static final Logger LOGGER = LoggerFactory.getLogger(StarTreeDataTable.class);

  private static final Int2ObjectMap<IntPair> EMPTY_INT_OBJECT_MAP = new Int2ObjectLinkedOpenHashMap<>();
  private static final ByteOrder nativeByteOrder = ByteOrder.nativeOrder();

  private File file;
  private int dimensionSizeInBytes;
  private int metricSizeInBytes;
  private int totalSizeInBytes;
  final int[] sortOrder;

  // Re-usable buffers
  private LBuffer dimLbuf1;
  private LBuffer dimLbuf2;
  private LBufferAPI dimMetLbuf1;
  private LBufferAPI dimMetLbuf2;

  public StarTreeDataTable(File file, int dimensionSizeInBytes, int metricSizeInBytes, int[] sortOrder) {
    this.file = file;
    this.dimensionSizeInBytes = dimensionSizeInBytes;
    this.metricSizeInBytes = metricSizeInBytes;
    this.sortOrder = sortOrder;
    this.totalSizeInBytes = dimensionSizeInBytes + metricSizeInBytes;

    dimLbuf1 = new LBuffer(dimensionSizeInBytes);
    dimLbuf2 = new LBuffer(dimensionSizeInBytes);
    dimMetLbuf1 = new LBuffer(totalSizeInBytes);
    dimMetLbuf2 = new LBuffer(totalSizeInBytes);
  }

  /**
   *
   * @param startRecordId inclusive
   * @param endRecordId exclusive
   */
  public void sort(int startRecordId, int endRecordId) {
    final MMapBuffer mappedByteBuffer;
    try {
      int numRecords = endRecordId - startRecordId;
      final long startOffset = startRecordId * (long) totalSizeInBytes;

      // Sort the docIds without actually moving the docs themselves.
      mappedByteBuffer = new MMapBuffer(file, startOffset, numRecords * (long) totalSizeInBytes, MMapMode.READ_WRITE);
      final int[] sortedDocIds = getSortedDocIds(mappedByteBuffer, totalSizeInBytes, dimensionSizeInBytes, numRecords);

      // Re-arrange the docs as per the sorted docId order.
      sortMmapBuffer(mappedByteBuffer, totalSizeInBytes, numRecords, sortedDocIds);
    } catch (IOException e) {
      LOGGER.error("Exception caught while sorting records", e);
    }
  }

  /**
   * Helper method that returns an array of docIds sorted as per dimension sort order.
   *
   * @param mappedByteBuffer Mmap buffer containing docs to sort
   * @param recordSizeInBytes Size of one record in bytes
   * @param dimensionSizeInBytes Size of dimension columns in bytes
   * @param numRecords Number of records
   * @return DocId array in sorted order
   */
  private int[] getSortedDocIds(final MMapBuffer mappedByteBuffer, final long recordSizeInBytes,
      final long dimensionSizeInBytes, int numRecords) {
    final int[] ids = new int[numRecords];
    for (int i = 0; i < ids.length; i++) {
      ids[i] = i;
    }

    IntComparator comparator = new IntComparator() {
      @Override
      public int compare(int i1, int i2) {
        long pos1 = (ids[i1]) * recordSizeInBytes;
        long pos2 = (ids[i2]) * recordSizeInBytes;

        mappedByteBuffer.copyTo(pos1, dimLbuf1, 0, dimensionSizeInBytes);
        mappedByteBuffer.copyTo(pos2, dimLbuf2, 0, dimensionSizeInBytes);

        for (int dimIndex : sortOrder) {
          int v1 = flipEndiannessIfNeeded(dimLbuf1.getInt(dimIndex * V1Constants.Numbers.INTEGER_SIZE));
          int v2 = flipEndiannessIfNeeded(dimLbuf2.getInt(dimIndex * V1Constants.Numbers.INTEGER_SIZE));
          if (v1 != v2) {
            return v1 - v2;
          }
        }
        return 0;
      }

      @Override
      public int compare(Integer o1, Integer o2) {
        return compare(o1.intValue(), o2.intValue());
      }
    };

    Swapper swapper = new Swapper() {
      @Override
      public void swap(int i, int j) {
        int tmp = ids[i];
        ids[i] = ids[j];
        ids[j] = tmp;
      }
    };
    Arrays.quickSort(0, numRecords, comparator, swapper);
    return ids;
  }

  /**
   * Helper method to re-arrange the given MMap buffer as per the sorted docId order.
   *
   * @param mappedByteBuffer Mmap buffer to re-arrange
   * @param recordSizeInBytes Size of one record in bytes
   * @param numRecords Total number of records
   * @param sortedDocIds Sorted docId array
   * @throws IOException
   */
  private void sortMmapBuffer(MMapBuffer mappedByteBuffer, long recordSizeInBytes, int numRecords, int[] sortedDocIds)
      throws IOException {
    int[] currentPositions = new int[numRecords];
    int[] indexToRecordIdMapping = new int[numRecords];

    for (int i = 0; i < numRecords; i++) {
      currentPositions[i] = i;
      indexToRecordIdMapping[i] = i;
    }
    for (int i = 0; i < numRecords; i++) {
      int thisRecordId = indexToRecordIdMapping[i];
      int thisRecordIdPos = currentPositions[thisRecordId];

      int thatRecordId = sortedDocIds[i];
      int thatRecordIdPos = currentPositions[thatRecordId];

      // Swap the buffers
      long thisOffset = thisRecordIdPos *  recordSizeInBytes;
      long thatOffset = thatRecordIdPos *  recordSizeInBytes;

      mappedByteBuffer.copyTo(thisOffset, dimMetLbuf1, 0, recordSizeInBytes);
      mappedByteBuffer.copyTo(thatOffset, dimMetLbuf2, 0, recordSizeInBytes);

      dimMetLbuf1.copyTo(0, mappedByteBuffer, thatOffset, recordSizeInBytes);
      dimMetLbuf2.copyTo(0, mappedByteBuffer, thisOffset, recordSizeInBytes);

      indexToRecordIdMapping[i] = thatRecordId;
      indexToRecordIdMapping[thatRecordIdPos] = thisRecordId;

      currentPositions[thatRecordId] = i;
      currentPositions[thisRecordId] = thatRecordIdPos;
    }

    if (mappedByteBuffer != null) {
      mappedByteBuffer.flush();
      mappedByteBuffer.close();
    }
  }

  /**
   *
   * @param startDocId inclusive
   * @param endDocId exclusive
   * @param colIndex
   * @return start,end for each value. inclusive start, exclusive end
   */
  public Int2ObjectMap<IntPair> groupByIntColumnCount(int startDocId, int endDocId, Integer colIndex) {
    MMapBuffer mappedByteBuffer = null;
    try {
      int length = endDocId - startDocId;
      Int2ObjectMap<IntPair> rangeMap = new Int2ObjectLinkedOpenHashMap<>();
      final long startOffset = startDocId * (long) totalSizeInBytes;
      mappedByteBuffer = new MMapBuffer(file, startOffset, length * (long) totalSizeInBytes, MMapMode.READ_WRITE);
      int prevValue = -1;
      int prevStart = 0;

      for (int i = 0; i < length; i++) {
        int value = flipEndiannessIfNeeded(
            mappedByteBuffer.getInt((i * (long) totalSizeInBytes) + (colIndex * V1Constants.Numbers.INTEGER_SIZE)));

        if (prevValue != -1 && prevValue != value) {
          rangeMap.put(prevValue, new IntPair(startDocId + prevStart, startDocId + i));
          prevStart = i;
        }
        prevValue = value;
      }
      rangeMap.put(prevValue, new IntPair(startDocId + prevStart, endDocId));
      return rangeMap;
    } catch (IOException e) {
      e.printStackTrace();
    } finally {
      if (mappedByteBuffer != null) {
        try {
          mappedByteBuffer.close();
        } catch (IOException e) {
          e.printStackTrace();
        }
      }
    }
    return EMPTY_INT_OBJECT_MAP;
  }

  public Iterator<Pair<byte[], byte[]>> iterator(int startDocId, int endDocId) throws IOException {
    final int length = endDocId - startDocId;
    final long startOffset = startDocId * (long) totalSizeInBytes;
    final MMapBuffer mappedByteBuffer = new MMapBuffer(file, startOffset, length * (long) totalSizeInBytes, MMapMode.READ_WRITE);
    return new Iterator<Pair<byte[], byte[]>>() {
      int pointer = 0;

      @Override
      public boolean hasNext() {
        return pointer < length;
      }

      @Override
      public void remove() {
        throw new UnsupportedOperationException();
      }

      @Override
      public Pair<byte[], byte[]> next() {
        byte[] dimBuff = new byte[dimensionSizeInBytes];
        byte[] metBuff = new byte[metricSizeInBytes];

        mappedByteBuffer.toDirectByteBuffer(pointer * (long) totalSizeInBytes, dimensionSizeInBytes).get(dimBuff);
        if (metricSizeInBytes > 0) {
          mappedByteBuffer.toDirectByteBuffer(pointer * (long) totalSizeInBytes + dimensionSizeInBytes,
              metricSizeInBytes).get(metBuff);
        }
        pointer = pointer + 1;
        if(pointer == length){
          try {
            mappedByteBuffer.close();
          } catch (IOException e) {
            LOGGER.error("Exception caught in record iterator", e);
          }
        }
        return Pair.of(dimBuff, metBuff);
      }
    };
  }

  /**
   * Flip the endianness of an int if needed. This is required when a file was written using
   * FileOutputStream (which is BIG_ENDIAN), but memory mapped using MMapBuffer, which uses Java Unsafe,
   * that can be LITTLE_ENDIAN if the host is LITTLE_ENDIAN.
   *
   * @param value Input integer
   * @return Flipped integer
   */
  protected static int flipEndiannessIfNeeded(int value) {
    if (nativeByteOrder == ByteOrder.LITTLE_ENDIAN) {
      return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | ((value >> 24) & 0xff);
    } else {
      return value;
    }
  }
}