/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.cassandra.db; import java.io.IOException; import java.io.DataOutput; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Iterator; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.io.util.DataOutputBuffer; import org.apache.cassandra.io.IndexHelper; import org.apache.cassandra.utils.BloomFilter; import org.apache.cassandra.db.marshal.AbstractType; /** * Help to create an index for a column family based on size of columns */ public class ColumnIndexer { /** * Given a column family this, function creates an in-memory structure that represents the * column index for the column family, and subsequently writes it to disk. * @param columnFamily Column family to create index for * @param dos data output stream * @throws IOException */ public static void serialize(ColumnFamily columnFamily, DataOutput dos) { Collection<IColumn> columns = columnFamily.getSortedColumns(); BloomFilter bf = createColumnBloomFilter(columns); /* Write out the bloom filter. */ DataOutputBuffer bufOut = new DataOutputBuffer(); try { BloomFilter.serializer().serialize(bf, bufOut); /* write the length of the serialized bloom filter. */ dos.writeInt(bufOut.getLength()); /* write out the serialized bytes. */ dos.write(bufOut.getData(), 0, bufOut.getLength()); /* Do the indexing */ doIndexing(columnFamily.getComparator(), columns, dos); } catch (IOException e) { throw new RuntimeException(e); } } /** * Create a bloom filter that contains the subcolumns and the columns that * make up this Column Family. * @param columns columns of the ColumnFamily * @return BloomFilter with the summarized information. */ private static BloomFilter createColumnBloomFilter(Collection<IColumn> columns) { int columnCount = 0; for (IColumn column : columns) { columnCount += column.getObjectCount(); } BloomFilter bf = BloomFilter.getFilter(columnCount, 4); for (IColumn column : columns) { bf.add(column.name()); /* If this is SuperColumn type Column Family we need to get the subColumns too. */ if (column instanceof SuperColumn) { Collection<IColumn> subColumns = column.getSubColumns(); for (IColumn subColumn : subColumns) { bf.add(subColumn.name()); } } } return bf; } /** * Given the collection of columns in the Column Family, * the name index is generated and written into the provided * stream * @param columns for whom the name index needs to be generated * @param dos stream into which the serialized name index needs * to be written. * @throws IOException */ private static void doIndexing(AbstractType comparator, Collection<IColumn> columns, DataOutput dos) throws IOException { if (columns.isEmpty()) { dos.writeInt(0); return; } /* * Maintains a list of ColumnIndexInfo objects for the columns in this * column family. The key is the column name and the position is the * relative offset of that column name from the start of the list. * We do this so that we don't read all the columns into memory. */ List<IndexHelper.IndexInfo> indexList = new ArrayList<IndexHelper.IndexInfo>(); int endPosition = 0, startPosition = -1; int indexSizeInBytes = 0; IColumn column = null, firstColumn = null; /* column offsets at the right thresholds into the index map. */ for (Iterator<IColumn> it = columns.iterator(); it.hasNext();) { column = it.next(); if (firstColumn == null) { firstColumn = column; startPosition = endPosition; } endPosition += column.serializedSize(); /* if we hit the column index size that we have to index after, go ahead and index it. */ if (endPosition - startPosition >= DatabaseDescriptor.getColumnIndexSize()) { IndexHelper.IndexInfo cIndexInfo = new IndexHelper.IndexInfo(firstColumn.name(), column.name(), startPosition, endPosition - startPosition); indexList.add(cIndexInfo); indexSizeInBytes += cIndexInfo.serializedSize(); firstColumn = null; } } // the last column may have fallen on an index boundary already. if not, index it explicitly. if (indexList.isEmpty() || comparator.compare(indexList.get(indexList.size() - 1).lastName, column.name()) != 0) { IndexHelper.IndexInfo cIndexInfo = new IndexHelper.IndexInfo(firstColumn.name(), column.name(), startPosition, endPosition - startPosition); indexList.add(cIndexInfo); indexSizeInBytes += cIndexInfo.serializedSize(); } assert indexSizeInBytes > 0; dos.writeInt(indexSizeInBytes); for (IndexHelper.IndexInfo cIndexInfo : indexList) { cIndexInfo.serialize(dos); } } }