package com.bigdata.service.ndx.pipeline;
import org.apache.log4j.Logger;
import com.bigdata.btree.keys.KVO;
import com.bigdata.util.BytesUtil;
/**
* Implementation which retains one instance of each tuple having the same
* unsigned byte[] key and the same byte[] value. For efficiency, you may
* specify that the presence of the same non-<code>null</code> object reference
* may be used to detect duplicates without requiring the comparison of the
* byte[] values.
* <p>
* When duplicates are eliminated, {@link KVOC}s identified as duplicates are
* arranged into a linked list.
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
* @version $Id$
* @param <O>
* The generic type of the object associated with the key-value pair.
*/
public class DefaultDuplicateRemover<O> implements IDuplicateRemover<O> {
// static protected transient final Logger log = Logger
// .getLogger(DefaultDuplicateRemover.class);
final private boolean testRefs;
/**
* Instance verifies the same unsigned byte[] key and the same byte[]
* value.,
*/
public transient static final IDuplicateRemover KEY_VAL = new DefaultDuplicateRemover(
false/* testRefs */);
/**
* Instance verifies the same unsigned byte[] key and will accept the same
* non-<code>null</code> object reference as indicating the same value. If
* the object reference is <code>null</code> then it will compare the byte[]
* values.
*/
public transient static final IDuplicateRemover KEY_REF_VAL = new DefaultDuplicateRemover(
false/* testRefs */);
/**
* @param testRefs
* When <code>true</code>, {@link KVO}s having the same key
* and the same non-<code>null</code> object reference will be
* filtered without testing the byte[] values for equality.
*/
public DefaultDuplicateRemover(final boolean testRefs) {
this.testRefs = testRefs;
}
public KVO<O>[] filter(final KVO<O>[] src) {
final KVO<O>[] tmp = new KVO[src.length];
int ndistinct = 0;
KVO<O> prior = null;
for (KVO<O> other : src) {
if (prior != null) {
if (filterDuplicate(prior, other)) {
continue;
}
}
tmp[ndistinct++] = prior = other;
}
// Make the array dense.
return KVO.dense(tmp, ndistinct);
}
/**
* Return <code>true</code> if the <i>other</i> instance is a duplicate and
* may be dropped. (This implementation recognizes {@link KVOList} and
* handles it appropriately.)
*
* @param prior
* The previous {@link KVO} instance.
* @param other
* Another {@link KVO} instance.
*
* @return <code>true</code> if the <i>other</i> is a duplicate.
*/
protected boolean filterDuplicate(final KVO<O> prior, final KVO<O> other) {
// same key?
if (BytesUtil.bytesEqual(prior.key, other.key)) {
// same reference (if ref testing) or same value?
if ((testRefs && prior.obj != null && prior.obj == other.obj)
|| BytesUtil.bytesEqual(prior.val, other.val)) {
if (prior instanceof KVOList) {
// link the duplicates together.
((KVOList) prior).add(other);
}
return true;
}
}
return false;
}
}