/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.infrastructure.io; import java.io.DataInput; import java.io.DataOutputStream; import java.io.IOException; import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import com.facebook.infrastructure.io.SSTable.KeyPositionInfo; /** * Provides helper to serialize, deserialize and use column indexes. * Author : Karthik Ranganathan ( kranganathan@facebook.com ) */ public class IndexHelper { /** * Serializes a column index to a data output stream * @param indexSizeInBytes Size of index to be written * @param columnIndexList List of column index entries as objects * @param dos the output stream into which the column index is to be written * @throws IOException */ public static void serialize(int indexSizeInBytes, List<ColumnPositionInfo> columnIndexList, DataOutputStream dos) throws IOException { /* if we have no data to index, the write that there is no index present */ if(indexSizeInBytes == 0 || columnIndexList == null || columnIndexList.size() == 0) { dos.writeBoolean(false); } else { /* write if we are storing a column index */ dos.writeBoolean(true); /* write the size of the index */ dos.writeInt(indexSizeInBytes); for( ColumnPositionInfo colPosInfo : columnIndexList ) { /* write the column name */ dos.writeUTF(colPosInfo.key); /* write the relative offset */ dos.writeInt((int) colPosInfo.position); /* write the number of columns in this bucket */ dos.writeInt((int)colPosInfo.numColumns()); } } } /** * Skip the index and return the number of bytes read. * @param file the data input from which the index should be skipped * @return number of bytes read from the data input * @throws IOException */ public static int skip(DataInput file) throws IOException { /* read if the file has column indexes */ boolean hasColumnIndexes = file.readBoolean(); int totalBytesRead = 1; if(hasColumnIndexes) { /* read only the column index list */ int columnIndexSize = file.readInt(); totalBytesRead += 4; /* skip the column index data */ file.skipBytes(columnIndexSize); totalBytesRead += columnIndexSize; } return totalBytesRead; } /** * Skip the index and return the number of bytes read. * @param buffer the byte buffer from which the index should be skipped * @return number of bytes read from the data input * @throws IOException */ public static int skip(ByteBuffer buffer) throws IOException { /* read if the file has column indexes */ boolean hasColumnIndexes = ( buffer.get() == 1 ) ? true : false; int totalBytesRead = 1; if(hasColumnIndexes) { /* read only the column index list */ int columnIndexSize = buffer.getInt(); totalBytesRead += 4; /* skip the column index data */ buffer.position(columnIndexSize); totalBytesRead += columnIndexSize; } return totalBytesRead; } /** * Deserialize the index into a structure and return the number of bytes read. * @param file Input from which the serialized form of the index is read * @param columnIndexList the structure which is filled in with the deserialized index * @return number of bytes read from the input * @throws IOException */ static int deserializeIndex(RandomAccessFile file, List<ColumnPositionInfo> columnIndexList) throws IOException { /* read only the column index list */ int columnIndexSize = file.readInt(); int totalBytesRead = 4; /* read the indexes into a separate buffer */ DataOutputBuffer indexOut = new DataOutputBuffer(); /* write the data into buffer */ indexOut.write(file, columnIndexSize); totalBytesRead += columnIndexSize; /* now deserialize the index list */ DataInputBuffer indexIn = new DataInputBuffer(); indexIn.reset(indexOut.getData(), indexOut.getLength()); String columnName; int position; int numCols; while(indexIn.available() > 0) { columnName = indexIn.readUTF(); position = indexIn.readInt(); numCols = indexIn.readInt(); columnIndexList.add(new ColumnPositionInfo(columnName, position, numCols)); } return totalBytesRead; } /** * Deserialize the index into a structure and return the number of bytes read. * @param buffer Input from which the serialized form of the index is read * @param columnIndexList columnIndexList the structure which is filled in with the deserialized index * @return number of bytes read from the input * @throws IOException */ static int deserializeIndex(ByteBuffer buffer, List<ColumnPositionInfo> columnIndexList) throws IOException { /* read only the column index list */ int columnIndexSize = buffer.getInt(); int totalBytesRead = 4; /* read the indexes into a separate buffer */ DataOutputBuffer indexOut = new DataOutputBuffer(); /* write the data into buffer */ indexOut.write(buffer, columnIndexSize); totalBytesRead += columnIndexSize; /* now deserialize the index list */ DataInputBuffer indexIn = new DataInputBuffer(); indexIn.reset(indexOut.getData(), indexOut.getLength()); String columnName; int position; int numCols; while(indexIn.available() > 0) { columnName = indexIn.readUTF(); position = indexIn.readInt(); numCols = indexIn.readInt(); columnIndexList.add(new ColumnPositionInfo(columnName, position, numCols)); } return totalBytesRead; } /** * Returns the range in which a given column falls in the index * @param column The column whose range needs to be found * @param columnIndexList the in-memory representation of the column index * @param dataSize the total size of the data * @param totalNumCols total number of columns * @return an object describing a subrange in which the column is serialized */ static ColumnPositionInfo getColumnRangeFromIndex(String column, List<ColumnPositionInfo> columnIndexList, int dataSize, int totalNumCols) { /* if column indexes were not present for this column family, the handle accordingly */ if(columnIndexList == null) { return (new ColumnPositionInfo(0, dataSize, totalNumCols)); } /* find the offset for the column */ int size = columnIndexList.size(); long start = 0; long end = dataSize; int numColumns = 0; int index = Collections.binarySearch(columnIndexList, new KeyPositionInfo(column)); if ( index < 0 ) { /* We are here which means that the requested column is not an index. */ index = (++index)*(-1); } else { ++index; } /* calculate the starting offset from which we have to read */ start = (index == 0) ? 0 : columnIndexList.get(index - 1).position; if( index < size ) { end = columnIndexList.get(index).position; numColumns = columnIndexList.get(index).numColumns(); } else { end = dataSize; int totalColsIndexed = 0; for( ColumnPositionInfo colPosInfo : columnIndexList ) { totalColsIndexed += colPosInfo.numColumns(); } numColumns = totalNumCols - totalColsIndexed; } return (new ColumnPositionInfo((int)start, (int)end, numColumns)); } /** * Returns the sub-ranges that contain the list of columns in columnNames. * @param columnNames The list of columns whose subranges need to be found * @param columnIndexList the deserialized column indexes * @param dataSize the total size of data * @param totalNumCols the total number of columns * @return a list of subranges which contain all the columns in columnNames */ static List<ColumnPositionInfo> getMultiColumnRangesFromIndex(List<String> columnNames, List<ColumnPositionInfo> columnIndexList, int dataSize, int totalNumCols) { List<ColumnPositionInfo> columnPosInfoList = new ArrayList<ColumnPositionInfo>(); ColumnPositionInfo colPosInfo = null; Map<Integer,Integer> startPositions = new HashMap<Integer,Integer>(); for(String column : columnNames) { colPosInfo = getColumnRangeFromIndex(column, columnIndexList, dataSize, totalNumCols); if( colPosInfo != null && startPositions.get(colPosInfo.start()) == null ) { columnPosInfoList.add(colPosInfo); startPositions.put(colPosInfo.start(), colPosInfo.end()); } } return columnPosInfoList; } /** * A helper class to keep track of column positions. */ public static class ColumnPositionInfo extends KeyPositionInfo { private int start_; private int end_; private int numColumns_; public ColumnPositionInfo(String key, long position, int numColumns) { super(key, position); numColumns_ = numColumns; } public ColumnPositionInfo(int start, int end, int numColumns) { super(""); start_ = start; end_ = end; numColumns_ = numColumns; } public int start() { return start_; } public int end() { return end_; } public int numColumns() { return numColumns_; } boolean equals(ColumnPositionInfo colPosInfo) { return (colPosInfo.start() == this.start()); } } }