/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.core.startree; import com.linkedin.pinot.common.utils.Pairs.IntPair; import com.linkedin.pinot.core.segment.creator.impl.V1Constants; import it.unimi.dsi.fastutil.Arrays; import it.unimi.dsi.fastutil.Swapper; import it.unimi.dsi.fastutil.ints.Int2ObjectLinkedOpenHashMap; import it.unimi.dsi.fastutil.ints.Int2ObjectMap; import it.unimi.dsi.fastutil.ints.IntComparator; import java.io.File; import java.io.IOException; import java.nio.ByteOrder; import java.util.Iterator; import org.apache.commons.lang3.tuple.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import xerial.larray.buffer.LBuffer; import xerial.larray.buffer.LBufferAPI; import xerial.larray.mmap.MMapBuffer; import xerial.larray.mmap.MMapMode; /** * The StarTreeDataTable should be able to handle the memory range greater than 2GB. * As a result, all fields related to memory position should be declared as long to avoid int overflow. */ public class StarTreeDataTable { private static final Logger LOGGER = LoggerFactory.getLogger(StarTreeDataTable.class); private static final Int2ObjectMap<IntPair> EMPTY_INT_OBJECT_MAP = new Int2ObjectLinkedOpenHashMap<>(); private static final ByteOrder nativeByteOrder = ByteOrder.nativeOrder(); private File file; private int dimensionSizeInBytes; private int metricSizeInBytes; private int totalSizeInBytes; final int[] sortOrder; // Re-usable buffers private LBuffer dimLbuf1; private LBuffer dimLbuf2; private LBufferAPI dimMetLbuf1; private LBufferAPI dimMetLbuf2; public StarTreeDataTable(File file, int dimensionSizeInBytes, int metricSizeInBytes, int[] sortOrder) { this.file = file; this.dimensionSizeInBytes = dimensionSizeInBytes; this.metricSizeInBytes = metricSizeInBytes; this.sortOrder = sortOrder; this.totalSizeInBytes = dimensionSizeInBytes + metricSizeInBytes; dimLbuf1 = new LBuffer(dimensionSizeInBytes); dimLbuf2 = new LBuffer(dimensionSizeInBytes); dimMetLbuf1 = new LBuffer(totalSizeInBytes); dimMetLbuf2 = new LBuffer(totalSizeInBytes); } /** * * @param startRecordId inclusive * @param endRecordId exclusive */ public void sort(int startRecordId, int endRecordId) { final MMapBuffer mappedByteBuffer; try { int numRecords = endRecordId - startRecordId; final long startOffset = startRecordId * (long) totalSizeInBytes; // Sort the docIds without actually moving the docs themselves. mappedByteBuffer = new MMapBuffer(file, startOffset, numRecords * (long) totalSizeInBytes, MMapMode.READ_WRITE); final int[] sortedDocIds = getSortedDocIds(mappedByteBuffer, totalSizeInBytes, dimensionSizeInBytes, numRecords); // Re-arrange the docs as per the sorted docId order. sortMmapBuffer(mappedByteBuffer, totalSizeInBytes, numRecords, sortedDocIds); } catch (IOException e) { LOGGER.error("Exception caught while sorting records", e); } } /** * Helper method that returns an array of docIds sorted as per dimension sort order. * * @param mappedByteBuffer Mmap buffer containing docs to sort * @param recordSizeInBytes Size of one record in bytes * @param dimensionSizeInBytes Size of dimension columns in bytes * @param numRecords Number of records * @return DocId array in sorted order */ private int[] getSortedDocIds(final MMapBuffer mappedByteBuffer, final long recordSizeInBytes, final long dimensionSizeInBytes, int numRecords) { final int[] ids = new int[numRecords]; for (int i = 0; i < ids.length; i++) { ids[i] = i; } IntComparator comparator = new IntComparator() { @Override public int compare(int i1, int i2) { long pos1 = (ids[i1]) * recordSizeInBytes; long pos2 = (ids[i2]) * recordSizeInBytes; mappedByteBuffer.copyTo(pos1, dimLbuf1, 0, dimensionSizeInBytes); mappedByteBuffer.copyTo(pos2, dimLbuf2, 0, dimensionSizeInBytes); for (int dimIndex : sortOrder) { int v1 = flipEndiannessIfNeeded(dimLbuf1.getInt(dimIndex * V1Constants.Numbers.INTEGER_SIZE)); int v2 = flipEndiannessIfNeeded(dimLbuf2.getInt(dimIndex * V1Constants.Numbers.INTEGER_SIZE)); if (v1 != v2) { return v1 - v2; } } return 0; } @Override public int compare(Integer o1, Integer o2) { return compare(o1.intValue(), o2.intValue()); } }; Swapper swapper = new Swapper() { @Override public void swap(int i, int j) { int tmp = ids[i]; ids[i] = ids[j]; ids[j] = tmp; } }; Arrays.quickSort(0, numRecords, comparator, swapper); return ids; } /** * Helper method to re-arrange the given MMap buffer as per the sorted docId order. * * @param mappedByteBuffer Mmap buffer to re-arrange * @param recordSizeInBytes Size of one record in bytes * @param numRecords Total number of records * @param sortedDocIds Sorted docId array * @throws IOException */ private void sortMmapBuffer(MMapBuffer mappedByteBuffer, long recordSizeInBytes, int numRecords, int[] sortedDocIds) throws IOException { int[] currentPositions = new int[numRecords]; int[] indexToRecordIdMapping = new int[numRecords]; for (int i = 0; i < numRecords; i++) { currentPositions[i] = i; indexToRecordIdMapping[i] = i; } for (int i = 0; i < numRecords; i++) { int thisRecordId = indexToRecordIdMapping[i]; int thisRecordIdPos = currentPositions[thisRecordId]; int thatRecordId = sortedDocIds[i]; int thatRecordIdPos = currentPositions[thatRecordId]; // Swap the buffers long thisOffset = thisRecordIdPos * recordSizeInBytes; long thatOffset = thatRecordIdPos * recordSizeInBytes; mappedByteBuffer.copyTo(thisOffset, dimMetLbuf1, 0, recordSizeInBytes); mappedByteBuffer.copyTo(thatOffset, dimMetLbuf2, 0, recordSizeInBytes); dimMetLbuf1.copyTo(0, mappedByteBuffer, thatOffset, recordSizeInBytes); dimMetLbuf2.copyTo(0, mappedByteBuffer, thisOffset, recordSizeInBytes); indexToRecordIdMapping[i] = thatRecordId; indexToRecordIdMapping[thatRecordIdPos] = thisRecordId; currentPositions[thatRecordId] = i; currentPositions[thisRecordId] = thatRecordIdPos; } if (mappedByteBuffer != null) { mappedByteBuffer.flush(); mappedByteBuffer.close(); } } /** * * @param startDocId inclusive * @param endDocId exclusive * @param colIndex * @return start,end for each value. inclusive start, exclusive end */ public Int2ObjectMap<IntPair> groupByIntColumnCount(int startDocId, int endDocId, Integer colIndex) { MMapBuffer mappedByteBuffer = null; try { int length = endDocId - startDocId; Int2ObjectMap<IntPair> rangeMap = new Int2ObjectLinkedOpenHashMap<>(); final long startOffset = startDocId * (long) totalSizeInBytes; mappedByteBuffer = new MMapBuffer(file, startOffset, length * (long) totalSizeInBytes, MMapMode.READ_WRITE); int prevValue = -1; int prevStart = 0; for (int i = 0; i < length; i++) { int value = flipEndiannessIfNeeded( mappedByteBuffer.getInt((i * (long) totalSizeInBytes) + (colIndex * V1Constants.Numbers.INTEGER_SIZE))); if (prevValue != -1 && prevValue != value) { rangeMap.put(prevValue, new IntPair(startDocId + prevStart, startDocId + i)); prevStart = i; } prevValue = value; } rangeMap.put(prevValue, new IntPair(startDocId + prevStart, endDocId)); return rangeMap; } catch (IOException e) { e.printStackTrace(); } finally { if (mappedByteBuffer != null) { try { mappedByteBuffer.close(); } catch (IOException e) { e.printStackTrace(); } } } return EMPTY_INT_OBJECT_MAP; } public Iterator<Pair<byte[], byte[]>> iterator(int startDocId, int endDocId) throws IOException { final int length = endDocId - startDocId; final long startOffset = startDocId * (long) totalSizeInBytes; final MMapBuffer mappedByteBuffer = new MMapBuffer(file, startOffset, length * (long) totalSizeInBytes, MMapMode.READ_WRITE); return new Iterator<Pair<byte[], byte[]>>() { int pointer = 0; @Override public boolean hasNext() { return pointer < length; } @Override public void remove() { throw new UnsupportedOperationException(); } @Override public Pair<byte[], byte[]> next() { byte[] dimBuff = new byte[dimensionSizeInBytes]; byte[] metBuff = new byte[metricSizeInBytes]; mappedByteBuffer.toDirectByteBuffer(pointer * (long) totalSizeInBytes, dimensionSizeInBytes).get(dimBuff); if (metricSizeInBytes > 0) { mappedByteBuffer.toDirectByteBuffer(pointer * (long) totalSizeInBytes + dimensionSizeInBytes, metricSizeInBytes).get(metBuff); } pointer = pointer + 1; if(pointer == length){ try { mappedByteBuffer.close(); } catch (IOException e) { LOGGER.error("Exception caught in record iterator", e); } } return Pair.of(dimBuff, metBuff); } }; } /** * Flip the endianness of an int if needed. This is required when a file was written using * FileOutputStream (which is BIG_ENDIAN), but memory mapped using MMapBuffer, which uses Java Unsafe, * that can be LITTLE_ENDIAN if the host is LITTLE_ENDIAN. * * @param value Input integer * @return Flipped integer */ protected static int flipEndiannessIfNeeded(int value) { if (nativeByteOrder == ByteOrder.LITTLE_ENDIAN) { return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | ((value >> 24) & 0xff); } else { return value; } } }