/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.cassandra.db;
import java.io.IOException;
import java.io.DataOutput;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Iterator;
import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.io.util.DataOutputBuffer;
import org.apache.cassandra.io.IndexHelper;
import org.apache.cassandra.utils.BloomFilter;
import org.apache.cassandra.db.marshal.AbstractType;
/**
* Help to create an index for a column family based on size of columns
*/
public class ColumnIndexer
{
/**
* Given a column family this, function creates an in-memory structure that represents the
* column index for the column family, and subsequently writes it to disk.
* @param columnFamily Column family to create index for
* @param dos data output stream
* @throws IOException
*/
public static void serialize(ColumnFamily columnFamily, DataOutput dos)
{
Collection<IColumn> columns = columnFamily.getSortedColumns();
BloomFilter bf = createColumnBloomFilter(columns);
/* Write out the bloom filter. */
DataOutputBuffer bufOut = new DataOutputBuffer();
try
{
BloomFilter.serializer().serialize(bf, bufOut);
/* write the length of the serialized bloom filter. */
dos.writeInt(bufOut.getLength());
/* write out the serialized bytes. */
dos.write(bufOut.getData(), 0, bufOut.getLength());
/* Do the indexing */
doIndexing(columnFamily.getComparator(), columns, dos);
}
catch (IOException e)
{
throw new RuntimeException(e);
}
}
/**
* Create a bloom filter that contains the subcolumns and the columns that
* make up this Column Family.
* @param columns columns of the ColumnFamily
* @return BloomFilter with the summarized information.
*/
private static BloomFilter createColumnBloomFilter(Collection<IColumn> columns)
{
int columnCount = 0;
for (IColumn column : columns)
{
columnCount += column.getObjectCount();
}
BloomFilter bf = BloomFilter.getFilter(columnCount, 4);
for (IColumn column : columns)
{
bf.add(column.name());
/* If this is SuperColumn type Column Family we need to get the subColumns too. */
if (column instanceof SuperColumn)
{
Collection<IColumn> subColumns = column.getSubColumns();
for (IColumn subColumn : subColumns)
{
bf.add(subColumn.name());
}
}
}
return bf;
}
/**
* Given the collection of columns in the Column Family,
* the name index is generated and written into the provided
* stream
* @param columns for whom the name index needs to be generated
* @param dos stream into which the serialized name index needs
* to be written.
* @throws IOException
*/
private static void doIndexing(AbstractType comparator, Collection<IColumn> columns, DataOutput dos) throws IOException
{
if (columns.isEmpty())
{
dos.writeInt(0);
return;
}
/*
* Maintains a list of ColumnIndexInfo objects for the columns in this
* column family. The key is the column name and the position is the
* relative offset of that column name from the start of the list.
* We do this so that we don't read all the columns into memory.
*/
List<IndexHelper.IndexInfo> indexList = new ArrayList<IndexHelper.IndexInfo>();
int endPosition = 0, startPosition = -1;
int indexSizeInBytes = 0;
IColumn column = null, firstColumn = null;
/* column offsets at the right thresholds into the index map. */
for (Iterator<IColumn> it = columns.iterator(); it.hasNext();)
{
column = it.next();
if (firstColumn == null)
{
firstColumn = column;
startPosition = endPosition;
}
endPosition += column.serializedSize();
/* if we hit the column index size that we have to index after, go ahead and index it. */
if (endPosition - startPosition >= DatabaseDescriptor.getColumnIndexSize())
{
IndexHelper.IndexInfo cIndexInfo = new IndexHelper.IndexInfo(firstColumn.name(), column.name(), startPosition, endPosition - startPosition);
indexList.add(cIndexInfo);
indexSizeInBytes += cIndexInfo.serializedSize();
firstColumn = null;
}
}
// the last column may have fallen on an index boundary already. if not, index it explicitly.
if (indexList.isEmpty() || comparator.compare(indexList.get(indexList.size() - 1).lastName, column.name()) != 0)
{
IndexHelper.IndexInfo cIndexInfo = new IndexHelper.IndexInfo(firstColumn.name(), column.name(), startPosition, endPosition - startPosition);
indexList.add(cIndexInfo);
indexSizeInBytes += cIndexInfo.serializedSize();
}
assert indexSizeInBytes > 0;
dos.writeInt(indexSizeInBytes);
for (IndexHelper.IndexInfo cIndexInfo : indexList)
{
cIndexInfo.serialize(dos);
}
}
}