/**
* Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.linkedin.pinot.core.startree;
import com.google.common.base.Preconditions;
import com.google.common.collect.HashBiMap;
import com.linkedin.pinot.common.segment.ReadMode;
import com.linkedin.pinot.core.segment.creator.impl.V1Constants;
import com.linkedin.pinot.core.segment.store.SegmentDirectoryPaths;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import org.apache.commons.io.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import xerial.larray.buffer.LBuffer;
import xerial.larray.buffer.LBufferAPI;
import xerial.larray.mmap.MMapBuffer;
import xerial.larray.mmap.MMapMode;
/**
* Utility class for serializing/de-serializing the StarTree
* data structure.
*/
public class StarTreeSerDe {
private static final Logger LOGGER = LoggerFactory.getLogger(StarTreeSerDe.class);
public static final long MAGIC_MARKER = 0xBADDA55B00DAD00DL;
public static final int MAGIC_MARKER_SIZE_IN_BYTES = 8;
private static final String UTF8 = "UTF-8";
private static byte version = 1;
/**
* De-serializes a StarTree structure.
*/
public static StarTreeInterf fromBytes(InputStream inputStream)
throws IOException, ClassNotFoundException {
BufferedInputStream bufferedInputStream = new BufferedInputStream(inputStream);
StarTreeFormatVersion version = getStarTreeVersion(bufferedInputStream);
switch (version) {
case ON_HEAP:
return fromBytesToOnHeapFormat(bufferedInputStream);
case OFF_HEAP:
return fromBytesToOffHeapFormat(bufferedInputStream);
default:
throw new RuntimeException("StarTree version number not recognized: " + version);
}
}
/**
* Write the on-heap version of StarTree to the output file.
* @param starTree
* @param outputFile
* @throws IOException
*/
public static void writeTreeOnHeapFormat(StarTreeInterf starTree, File outputFile)
throws IOException {
Preconditions.checkArgument(starTree.getVersion() == StarTreeFormatVersion.ON_HEAP,
"Cannot write on-heap version of star tree from another version");
starTree.writeTree(outputFile);
}
/**
* Write the off-heap version of StarTree to the output file.
* @param starTree
* @param outputFile
* @throws IOException
*/
public static void writeTreeOffHeapFormat(StarTreeInterf starTree, File outputFile)
throws IOException {
if (starTree.getVersion() == StarTreeFormatVersion.ON_HEAP) {
writeTreeOffHeapFromOnHeap((StarTree) starTree, outputFile);
} else {
starTree.writeTree(outputFile);
}
}
/**
* Utility method to StarTree version.
* Presence of {@ref #MAGIC_MARKER} indicates on-heap format, while its
* absence indicates on-heap format.
*
* @param bufferedInputStream
* @return
* @throws IOException
*/
public static StarTreeFormatVersion getStarTreeVersion(BufferedInputStream bufferedInputStream)
throws IOException {
byte[] magicBytes = new byte[MAGIC_MARKER_SIZE_IN_BYTES];
bufferedInputStream.mark(MAGIC_MARKER_SIZE_IN_BYTES);
bufferedInputStream.read(magicBytes, 0, MAGIC_MARKER_SIZE_IN_BYTES);
bufferedInputStream.reset();
LBufferAPI lBuffer = new LBuffer(MAGIC_MARKER_SIZE_IN_BYTES);
lBuffer.readFrom(magicBytes, 0);
long magicMarker = lBuffer.getLong(0);
if (magicMarker == MAGIC_MARKER) {
return StarTreeFormatVersion.OFF_HEAP;
} else {
return StarTreeFormatVersion.ON_HEAP;
}
}
/**
* Given a star tree file, return its version (on-heap or off-heap).
* Assumes that the file is a valid star tree file.
*
* @param starTreeFile
* @return
* @throws IOException
*/
public static StarTreeFormatVersion getStarTreeVersion(File starTreeFile)
throws IOException {
InputStream inputStream = new FileInputStream(starTreeFile);
BufferedInputStream bufferedInputStream = new BufferedInputStream(inputStream);
StarTreeFormatVersion version = getStarTreeVersion(bufferedInputStream);
bufferedInputStream.close();
return version;
}
/**
* Given a StarTree in on-heap format, serialize it into OFF_HEAP format and write to the
* given file.
* @param starTree
* @param outputFile
*/
private static void writeTreeOffHeapFromOnHeap(StarTree starTree, File outputFile)
throws IOException {
int headerSizeInBytes = computeOffHeapHeaderSizeInBytes(starTree);
long totalSize = headerSizeInBytes + computeOffHeapNodesSizeInBytes(starTree);
MMapBuffer mappedByteBuffer = new MMapBuffer(outputFile, 0, totalSize, MMapMode.READ_WRITE);
long offset = writeHeaderOffHeap(starTree, headerSizeInBytes, mappedByteBuffer);
// Ensure that the computed offset is the same as actual offset.
Preconditions.checkState((offset == headerSizeInBytes), "Error writing Star Tree file, header size mis-match");
// Write the actual star tree nodes in level order.
writeNodesOffHeap(starTree, mappedByteBuffer, offset);
mappedByteBuffer.flush();
mappedByteBuffer.close();
}
/**
* Helper method to write the star tree nodes for Star Tree off-heap format
*
* @param starTree
* @param mappedByteBuffer
* @param offset
*/
private static void writeNodesOffHeap(StarTree starTree, MMapBuffer mappedByteBuffer, long offset) {
int index = 0;
Queue<StarTreeIndexNode> queue = new LinkedList<>();
StarTreeIndexNode root = (StarTreeIndexNode) starTree.getRoot();
queue.add(root);
while (!queue.isEmpty()) {
StarTreeIndexNode node = queue.poll();
List<StarTreeIndexNode> children = getSortedChildren(node); // Returns empty list instead of null.
int numChildren = children.size();
int startChildrenIndex = (numChildren != 0) ? (index + queue.size() + 1) : StarTreeIndexNodeOffHeap.INVALID_INDEX;
int endChildrenIndex =
(numChildren != 0) ? (startChildrenIndex + numChildren - 1) : StarTreeIndexNodeOffHeap.INVALID_INDEX;
offset = writeOneOffHeapNode(mappedByteBuffer, offset, node, startChildrenIndex, endChildrenIndex);
for (StarTreeIndexNode child : children) {
queue.add(child);
}
index++;
}
}
/**
* Helper method to write the Header information for Star Tree off-heap format
* - MAGIC_MARKER
* - Version
* - Header size
* - Dimension Name to Index Map
* - Number of nodes in the tree.
*
* @param starTree
* @param headerSizeInBytes
* @param mappedByteBuffer
* @return
* @throws UnsupportedEncodingException
*/
private static long writeHeaderOffHeap(StarTree starTree, int headerSizeInBytes, MMapBuffer mappedByteBuffer)
throws UnsupportedEncodingException {
long offset = 0;
mappedByteBuffer.putLong(offset, MAGIC_MARKER);
offset += V1Constants.Numbers.LONG_SIZE;
mappedByteBuffer.putInt(offset, version);
offset += V1Constants.Numbers.INTEGER_SIZE;
mappedByteBuffer.putInt(offset, headerSizeInBytes);
offset += V1Constants.Numbers.INTEGER_SIZE;
HashBiMap<String, Integer> dimensionNameToIndexMap = starTree.getDimensionNameToIndexMap();
mappedByteBuffer.putInt(offset, dimensionNameToIndexMap.size());
offset += V1Constants.Numbers.INTEGER_SIZE;
// Write the dimensionName to Index map
for (Map.Entry<String, Integer> entry : dimensionNameToIndexMap.entrySet()) {
String dimension = entry.getKey();
int index = entry.getValue();
mappedByteBuffer.putInt(offset, index);
offset += V1Constants.Numbers.INTEGER_SIZE;
int dimensionLength = dimension.length();
mappedByteBuffer.putInt(offset, dimensionLength);
offset += V1Constants.Numbers.INTEGER_SIZE;
mappedByteBuffer.readFrom(dimension.getBytes(UTF8), offset);
offset += dimensionLength;
}
mappedByteBuffer.putInt(offset, starTree.getNumNodes());
offset += V1Constants.Numbers.INTEGER_SIZE;
return offset;
}
/**
* Helper method that returns a list of children for the given node, sorted based on
* the dimension value.
*
* @param node
* @return A list of sorted child nodes (empty if no children).
*/
private static List<StarTreeIndexNode> getSortedChildren(StarTreeIndexNode node) {
Map<Integer, StarTreeIndexNode> children = node.getChildren();
if (children == null) {
return Collections.EMPTY_LIST;
}
List<StarTreeIndexNode> sortedChildren = new ArrayList<>();
sortedChildren.addAll(children.values());
Collections.sort(sortedChildren, new Comparator<StarTreeIndexNode>() {
@Override
public int compare(StarTreeIndexNode node1, StarTreeIndexNode node2) {
int v1 = node1.getDimensionValue();
int v2 = node2.getDimensionValue();
if (v1 < v2) {
return -1;
} else if (v1 > v2) {
return v1;
} else {
return 0;
}
}
});
return sortedChildren;
}
/**
* Helper method to write one StarTreeIndexNodeOffHeap into the mappedByteBuffer at the provided
* offset.
*
* @param mappedByteBuffer
* @param offset
* @param node
* @param startChildrenIndex
* @param endChildrenIndex
*/
private static long writeOneOffHeapNode(MMapBuffer mappedByteBuffer, long offset, StarTreeIndexNode node,
int startChildrenIndex, int endChildrenIndex) {
mappedByteBuffer.putInt(offset, node.getDimensionName());
offset += V1Constants.Numbers.INTEGER_SIZE;
mappedByteBuffer.putInt(offset, node.getDimensionValue());
offset += V1Constants.Numbers.INTEGER_SIZE;
mappedByteBuffer.putInt(offset, node.getStartDocumentId());
offset += V1Constants.Numbers.INTEGER_SIZE;
mappedByteBuffer.putInt(offset, node.getEndDocumentId());
offset += V1Constants.Numbers.INTEGER_SIZE;
mappedByteBuffer.putInt(offset, node.getAggregatedDocumentId());
offset += V1Constants.Numbers.INTEGER_SIZE;
mappedByteBuffer.putInt(offset, startChildrenIndex);
offset += V1Constants.Numbers.INTEGER_SIZE;
mappedByteBuffer.putInt(offset, endChildrenIndex);
offset += V1Constants.Numbers.INTEGER_SIZE;
return offset;
}
/**
* Helper method to compute size of tree in bytes, required to
* store in off-heap format. The size is computed as follows:
* - Long (8 bytes) for magic marker
* - Integer (4 bytes) for size of the header
* - Integer (4 bytes) for version
* - Integer (4 bytes) to store number of dimensions.
* - Total size to store dimension name strings
* - Integer (4 bytes) per dimension to store the index of the string
* - Integer (4 bytes) to store total number of nodes in the tree.
* @param starTree
* @return
*/
private static int computeOffHeapHeaderSizeInBytes(StarTreeInterf starTree) {
int size = 20; // magic marker, version, size of header and number of dimensions
HashBiMap<String, Integer> dimensionNameToIndexMap = starTree.getDimensionNameToIndexMap();
for (String dimension : dimensionNameToIndexMap.keySet()) {
size += V1Constants.Numbers.INTEGER_SIZE; // For dimension index
size += V1Constants.Numbers.INTEGER_SIZE; // For length of dimension name
size += dimension.length(); // For dimension name
}
size += V1Constants.Numbers.INTEGER_SIZE; // For number of nodes.
return size;
}
/**
* Helper method to compute size of nodes of tree in bytes.
* The size is computed as follows:
* - Total number of nodes * size of one node
*
* @param starTree
* @return
*/
private static long computeOffHeapNodesSizeInBytes(StarTreeInterf starTree) {
return (starTree.getNumNodes() * StarTreeIndexNodeOffHeap.getSerializableSize());
}
/**
* Utility method that de-serializes bytes from inputStream
* into the V0 version of star tree.
*
* @param inputStream
* @return
* @throws IOException
* @throws ClassNotFoundException
*/
private static StarTreeInterf fromBytesToOnHeapFormat(InputStream inputStream)
throws IOException, ClassNotFoundException {
ObjectInputStream ois = new ObjectInputStream(inputStream);
return (StarTree) ois.readObject();
}
/**
* Utility method that de-serializes bytes from inputStream into
* the on-heap format of star tree.
*
* @param inputStream
* @return
*/
private static StarTreeInterf fromBytesToOffHeapFormat(InputStream inputStream) {
throw new RuntimeException("StarTree Version off-heap does not support reading from bytes.");
}
/**
*
* @param starTreeFile Star Tree index file
* @param readMode Read mode MMAP or OFF-HEAP (direct memory), only applicable to StarTreeOffHeap.
* @return
*/
public static StarTreeInterf fromFile(File starTreeFile, ReadMode readMode)
throws IOException, ClassNotFoundException {
InputStream inputStream = new FileInputStream(starTreeFile);
BufferedInputStream bufferedInputStream = new BufferedInputStream(inputStream);
StarTreeFormatVersion starTreeVersion = getStarTreeVersion(bufferedInputStream);
if (starTreeVersion.equals(StarTreeFormatVersion.ON_HEAP)) {
return fromBytesToOnHeapFormat(bufferedInputStream);
} else if (starTreeVersion.equals(StarTreeFormatVersion.OFF_HEAP)) {
return new StarTreeOffHeap(starTreeFile, readMode);
} else {
throw new RuntimeException("Unrecognized version for Star Tree " + starTreeVersion);
}
}
/**
* Utility method to convert star tree from on-heap to off-heap format:
* <p>- If star tree does not exist, or if actual version is the same as
* expected version, then no action is taken. </p>
*
* <p>- If actual version is on-heap and expected version is off-heap, then conversion from
* on-heap to off-heap is performed. Both on-heap and off-heap formats are also backed up.</p>
*
* <p>- If actual version is off-heap and expected version is on-heap, then on-heap is restored from
* backup version, if available, no-op otherwise. Note, there is no off-heap to on-heap
* conversion as of now.</p>
*
* @param indexDir
* @param starTreeVersionToLoad
* @throws IOException
* @throws ClassNotFoundException
*/
public static void convertStarTreeFormatIfNeeded(File indexDir, StarTreeFormatVersion starTreeVersionToLoad)
throws IOException, ClassNotFoundException {
File starTreeFile = SegmentDirectoryPaths.findStarTreeFile(indexDir);
// If the star-tree file does not exist, this is not a star tree index, nothing to do here.
if (starTreeFile == null || !starTreeFile.exists()) {
LOGGER.debug("Skipping Star Tree format conversion, as star tree file {} does not exist in directory {}",
V1Constants.STAR_TREE_INDEX_FILE, indexDir);
return;
}
File parentDir = starTreeFile.getParentFile();
StarTreeFormatVersion actualVersion = getStarTreeVersion(starTreeFile);
if (actualVersion == StarTreeFormatVersion.ON_HEAP && starTreeVersionToLoad == StarTreeFormatVersion.OFF_HEAP) {
LOGGER.info("Converting Star Tree from on-heap to off-heap format for {}", starTreeFile.getAbsolutePath());
File starTreeOffHeapFile = new File(parentDir, V1Constants.STAR_TREE_OFF_HEAP_INDEX_FILE);
if (starTreeOffHeapFile.exists()) {
LOGGER.info("Replacing star tree on-heap format with off-heap format for {}", starTreeFile.getAbsolutePath());
FileUtils.copyFile(starTreeOffHeapFile, starTreeFile);
} else {
StarTreeInterf starTreeOnHeap = fromFile(starTreeFile, ReadMode.heap); // OnHeap only supports HEAP mode.
try {
writeTreeOffHeapFormat(starTreeOnHeap, starTreeOffHeapFile);
FileUtils.copyFile(starTreeFile, new File(parentDir, V1Constants.STAR_TREE_ON_HEAP_INDEX_FILE));
FileUtils.copyFile(starTreeOffHeapFile, starTreeFile);
} catch (Exception e) {
LOGGER.warn("Exception caught while convert star tree on-heap to off-heap format for {}",
starTreeFile.getAbsolutePath(), e);
}
}
} else if (actualVersion == StarTreeFormatVersion.OFF_HEAP
&& starTreeVersionToLoad == StarTreeFormatVersion.ON_HEAP) {
File starTreeOnHeapFile = new File(parentDir, V1Constants.STAR_TREE_ON_HEAP_INDEX_FILE);
if (starTreeOnHeapFile.exists()) {
try {
FileUtils.copyFile(starTreeFile, new File(parentDir, V1Constants.STAR_TREE_OFF_HEAP_INDEX_FILE));
FileUtils.copyFile(starTreeOnHeapFile, starTreeFile);
} catch (Exception e) {
LOGGER.warn("Exception caught while converting star tree off-heap to on-heap for {}",
starTreeFile.getAbsolutePath(), e);
}
} else {
LOGGER.info(
"Could not replace star tree format off-heap to on-heap as {} does not exist, will load off-heap format",
starTreeOnHeapFile.getAbsolutePath());
}
}
}
}