/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.core.startree; import com.linkedin.pinot.common.utils.Pairs.IntPair; import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.IntBuffer; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import org.apache.commons.lang3.tuple.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import xerial.larray.mmap.MMapBuffer; import xerial.larray.mmap.MMapMode; /** * A star tree data table optimized for in-place modifications/sorting * of data. This class allows for the following operations on a fixed size data table: * - Sort all or range of data. * - Group by column count * - Iterate over dimension/metric buffers. */ public class StarTreeDataSorter { Logger LOGGER = LoggerFactory.getLogger(StarTreeDataSorter.class); final MMapBuffer mappedByteBuffer; private int dimensionSizeInBytes; private int metricSizeInBytes; private int totalSizeInBytes; public StarTreeDataSorter(File file, int dimensionSizeInBytes, int metricSizeInBytes) throws IOException { this.dimensionSizeInBytes = dimensionSizeInBytes; this.metricSizeInBytes = metricSizeInBytes; this.totalSizeInBytes = dimensionSizeInBytes + metricSizeInBytes; mappedByteBuffer = new MMapBuffer(file, 0, file.length(), MMapMode.READ_WRITE); } /** * Sort from to given start (inclusive) to end (exclusive) as per the provided sort order. * @param startRecordId inclusive * @param endRecordId exclusive * @param sortOrder */ public void sort(int startRecordId, int endRecordId, final int[] sortOrder) { int length = endRecordId - startRecordId; final int startOffset = startRecordId * totalSizeInBytes; List<Integer> idList = new ArrayList<Integer>(); for (int i = startRecordId; i < endRecordId; i++) { idList.add(i - startRecordId); } Comparator<Integer> comparator = new Comparator<Integer>() { byte[] buf1 = new byte[dimensionSizeInBytes]; byte[] buf2 = new byte[dimensionSizeInBytes]; @Override public int compare(Integer o1, Integer o2) { int pos1 = startOffset + (o1) * totalSizeInBytes; int pos2 = startOffset + (o2) * totalSizeInBytes; mappedByteBuffer.copyTo(pos1, buf1, 0, dimensionSizeInBytes); mappedByteBuffer.copyTo(pos2, buf2, 0, dimensionSizeInBytes); IntBuffer bb1 = ByteBuffer.wrap(buf1).asIntBuffer(); IntBuffer bb2 = ByteBuffer.wrap(buf2).asIntBuffer(); for (int dimIndex : sortOrder) { int v1 = bb1.get(dimIndex); int v2 = bb2.get(dimIndex); if (v1 != v2) { return v1 - v2; } } return 0; } }; Collections.sort(idList, comparator); int[] currentPositions = new int[length]; int[] indexToRecordIdMapping = new int[length]; byte[] buf1 = new byte[totalSizeInBytes]; byte[] buf2 = new byte[totalSizeInBytes]; for (int i = 0; i < length; i++) { currentPositions[i] = i; indexToRecordIdMapping[i] = i; } for (int i = 0; i < length; i++) { int thisRecordId = indexToRecordIdMapping[i]; int thisRecordIdPos = currentPositions[thisRecordId]; int thatRecordId = idList.get(i); int thatRecordIdPos = currentPositions[thatRecordId]; // swap the buffers mappedByteBuffer.copyTo(startOffset + thisRecordIdPos * totalSizeInBytes, buf1, 0, totalSizeInBytes); mappedByteBuffer.copyTo(startOffset + thatRecordIdPos * totalSizeInBytes, buf2, 0, totalSizeInBytes); mappedByteBuffer.readFrom(buf2, 0, startOffset + thisRecordIdPos * totalSizeInBytes, totalSizeInBytes); mappedByteBuffer.readFrom(buf1, 0, startOffset + thatRecordIdPos * totalSizeInBytes, totalSizeInBytes); // update the positions indexToRecordIdMapping[i] = thatRecordId; indexToRecordIdMapping[thatRecordIdPos] = thisRecordId; currentPositions[thatRecordId] = i; currentPositions[thisRecordId] = thatRecordIdPos; } } /** * Close the mappedByteBuffer */ public void close() { if (mappedByteBuffer != null) { mappedByteBuffer.flush(); try { mappedByteBuffer.close(); } catch (IOException e) { LOGGER.error("Exception caught while trying to close byte-buffer", e); } } } /** * Perform group-by based on the 'count' for the given column. * @param startDocId inclusive * @param endDocId exclusive * @param colIndex * @return start, end for each value. inclusive start, exclusive end */ public Map<Integer, IntPair> groupByIntColumnCount(int startDocId, int endDocId, Integer colIndex) { int length = endDocId - startDocId; Map<Integer, IntPair> rangeMap = new LinkedHashMap<>(); final int startOffset = startDocId * totalSizeInBytes; int prevValue = -1; int prevStart = 0; byte[] dimBuff = new byte[dimensionSizeInBytes]; for (int i = 0; i < length; i++) { mappedByteBuffer.copyTo(startOffset + (i * totalSizeInBytes), dimBuff, 0, dimensionSizeInBytes); int value = ByteBuffer.wrap(dimBuff).asIntBuffer().get(colIndex); if (prevValue != -1 && prevValue != value) { rangeMap.put(prevValue, new IntPair(startDocId + prevStart, startDocId + i)); prevStart = i; } prevValue = value; } rangeMap.put(prevValue, new IntPair(startDocId + prevStart, endDocId)); return rangeMap; } /** * Returns iterator over dimension and metric byte buffers. * * @param startDocId * @param endDocId * @return * @throws IOException */ public Iterator<Pair<byte[], byte[]>> iterator(int startDocId, int endDocId) throws IOException { final int length = endDocId - startDocId; final int startOffset = startDocId * totalSizeInBytes; return new Iterator<Pair<byte[], byte[]>>() { int pointer = 0; @Override public boolean hasNext() { return pointer < length; } @Override public void remove() { throw new UnsupportedOperationException(); } @Override public Pair<byte[], byte[]> next() { byte[] dimBuff = new byte[dimensionSizeInBytes]; byte[] metBuff = new byte[metricSizeInBytes]; mappedByteBuffer.copyTo(startOffset + (pointer * totalSizeInBytes), dimBuff, 0, dimensionSizeInBytes); if (metricSizeInBytes > 0) { mappedByteBuffer .copyTo(startOffset + (pointer * totalSizeInBytes) + dimensionSizeInBytes, metBuff, 0, metricSizeInBytes); } pointer = pointer + 1; return Pair.of(dimBuff, metBuff); } }; } }