/**
* Copyright 2011-2012 Akiban Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.persistit;
import java.util.ArrayList;
import java.util.List;
/**
* <p>
* Accumulate and hold information about the distribution of {@link Key} objects
* in a Persistit {@link Tree}. This class is used by the
* {@link Exchange#computeHistogram(Key, Key, int, int, KeyFilter, int)} method
* to accumulate and return the result of scanning all the keys at a fixed depth
* within a Tree.
* <p>
* </p>
* The result is represented by a List of {@link KeyCount} objects, each
* representing a key and a count. The count represents the number of smaller
* keys in the tree level. With this information client applications can
* estimate the number of elements between any two keys in the Tree.
* <p>
* </p>
* Application code can specify a <code>keyDepth</code> at which sibling keys
* are grouped together. For example, suppose a Tree contains keys such as
*
* <pre>
* {"BLUE",1}
* {"BLUE",2}
* {"BLUE",3}
* {"RED",1}
* {"RED",2}
* </pre>
*
* If <code>keyDepth=2</code> the resulting histogram will have 5 buckets, each
* with a <code>count<code> value of 1. But if <code>keyDepth=1</code> the
* result will have two buckets, one each for "BLUE" and "RED". Specifying
* <code>keyDepth=0</code> turns off aggregation by partial key depth.
* <p>
* </p>
* During the aggregation process the {@link Exchange#computeHistogram} method
* invokes the {@link #addKeyCopy} method for each Key it traverses. Each key is
* analyzed to determine whether the first <code>keyDepth</code> segments are
* the same as the previously added Key. If so then the previous count is
* incremented; otherwise a new KeyCount entry is added to the sample list.
* <p>
* </p>
*
*
* @author peter
*
*/
public class KeyHistogram {
private final Tree _tree;
private final Key _startKey;
private final Key _endKey;
private final int _requestedSampleSize;
private final int _treeDepth;
private final int _keyDepth;
private final List<KeyCount> _keys = new ArrayList<KeyCount>();
private int _modulus = 1;
private int _keyCount = 0;
private int _pageCount = 0;
private long _pageBytesTotal = 0;
private long _pageBytesInUse = 0;
/**
* Element in a <code>KeyHistogram</code> that denotes the estimated number
* of keys in a histogram bucket. This class has two fields representing a
* key and a count. The count indicates the estimated number of other keys
* less than this one in the tree level.
*/
public static class KeyCount {
final byte[] _bytes;
int _count;
private KeyCount(final byte[] bytes, final int count) {
_bytes = bytes;
_count = count;
}
/**
* Get the key bytes
*
* @return the bytes of the key
*/
public byte[] getBytes() {
return _bytes;
}
/**
* Get the count
*
* @return the count
*/
public int getCount() {
return _count;
}
private void setCount(final int count) {
this._count = count;
}
@Override
public String toString() {
final Key key = new Key((Persistit) null);
System.arraycopy(_bytes, 0, key.getEncodedBytes(), 0, _bytes.length);
key.setEncodedSize(_bytes.length);
return String.format("%,10d %s", _count, key);
}
}
public KeyHistogram(final Tree tree, final Key start, final Key end, final int sampleSize, final int keyDepth,
final int treeDepth) {
_tree = tree;
_startKey = start;
_endKey = end;
_requestedSampleSize = sampleSize;
_keyDepth = keyDepth;
_treeDepth = treeDepth;
}
public Tree getTree() {
return _tree;
}
public Key getStartKey() {
return _startKey;
}
public Key getEndKey() {
return _endKey;
}
public int getKeyCount() {
return _keyCount;
}
public int getRequestedSampleSize() {
return _requestedSampleSize;
}
public int getSampleSize() {
return _keys.size();
}
public List<KeyCount> getSamples() {
return _keys;
}
public int getTreeDepth() {
return _treeDepth;
}
public int getKeyDepth() {
return _keyDepth;
}
public int getPageCount() {
return _pageCount;
}
public long getPageBytesTotal() {
return _pageBytesTotal;
}
public long getPageBytesInUse() {
return _pageBytesInUse;
}
/**
* Add a key. Keys must be added in key-sort order. If the supplied key is
* the same as the previously added key up to the segment specified by the
* keyDepth property, then accumulate to the same KeyCount bucket. Otherwise
* add a new KeyCount bucket. When the sample list becomes too long, this
* method removes every other sample and aggregates the count values. By so
* doing, this method keeps the number of retained samples relatively small.
*
* @param key
* The Key to add to the sample set
*/
void addKeyCopy(final Key key) {
_keyCount++;
if (_keyCount % _modulus == 0) {
final int length = _keyDepth == 0 ? key.getEncodedSize() : key.indexTo(_keyDepth).getIndex();
final int end = _keys.size() - 1;
boolean same = false;
if (end >= 0) {
final byte[] last = _keys.get(end).getBytes();
same = last.length == length;
for (int index = 0; same && index < length; index++) {
same &= last[index] == key.getEncodedBytes()[index];
}
}
if (same) {
_keys.get(end).setCount(_keyCount);
} else {
final byte[] bytes = new byte[length];
System.arraycopy(key.getEncodedBytes(), 0, bytes, 0, length);
_keys.add(new KeyCount(bytes, _keyCount));
}
}
if (_keys.size() >= _requestedSampleSize * 16) {
for (int index = (_keys.size() & 0x7FFFFFFE); (index -= 2) >= 0;) {
_keys.remove(index);
}
_modulus *= 2;
}
}
/**
* Accumulates total number of pages, bytes and bytes-in-use traversed.
*
* @param size
* Size of the page
* @param used
* Number of bytes in use in the page.
*/
void addPage(final int size, final int used) {
_pageCount++;
_pageBytesTotal += size;
_pageBytesInUse += used;
}
/**
* Culls the List of keys down to the requested sample size
*/
void cull() {
final int have = _keys.size();
final int want = _requestedSampleSize;
int counter = have;
for (int index = have; --index >= 0;) {
counter += want;
if (counter <= have) {
_keys.remove(index);
} else {
counter -= have;
}
}
if (_keys.size() > want) {
_keys.remove(0);
}
}
}