DefaultDuplicateRemover.java example

Explorer
database-master
package com.bigdata.service.ndx.pipeline;

import org.apache.log4j.Logger;

import com.bigdata.btree.keys.KVO;
import com.bigdata.util.BytesUtil;

/**
 * Implementation which retains one instance of each tuple having the same
 * unsigned byte[] key and the same byte[] value. For efficiency, you may
 * specify that the presence of the same non-<code>null</code> object reference
 * may be used to detect duplicates without requiring the comparison of the
 * byte[] values.
 * <p>
 * When duplicates are eliminated, {@link KVOC}s identified as duplicates are
 * arranged into a linked list.
 * 
 * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
 * @version $Id$
 * @param <O>
 *            The generic type of the object associated with the key-value pair.
 */
public class DefaultDuplicateRemover<O> implements IDuplicateRemover<O> {

//    static protected transient final Logger log = Logger
//            .getLogger(DefaultDuplicateRemover.class);

    final private boolean testRefs;

    /**
     * Instance verifies the same unsigned byte[] key and the same byte[]
     * value.,
     */
    public transient static final IDuplicateRemover KEY_VAL = new DefaultDuplicateRemover(
            false/* testRefs */);

    /**
     * Instance verifies the same unsigned byte[] key and will accept the same
     * non-<code>null</code> object reference as indicating the same value. If
     * the object reference is <code>null</code> then it will compare the byte[]
     * values.
     */
    public transient static final IDuplicateRemover KEY_REF_VAL = new DefaultDuplicateRemover(
            false/* testRefs */);
    
    /**
     * @param testRefs
     *            When <code>true</code>, {@link KVO}s having the same key
     *            and the same non-<code>null</code> object reference will be
     *            filtered without testing the byte[] values for equality.
     */
    public DefaultDuplicateRemover(final boolean testRefs) {
        
        this.testRefs = testRefs;
        
    }
    
    public KVO<O>[] filter(final KVO<O>[] src) {

        final KVO<O>[] tmp = new KVO[src.length];

        int ndistinct = 0;

        KVO<O> prior = null;

        for (KVO<O> other : src) {

            if (prior != null) {

                if (filterDuplicate(prior, other)) {
                    
                    continue;
                    
                }

            }

            tmp[ndistinct++] = prior = other;

        }

        // Make the array dense.
        return KVO.dense(tmp, ndistinct);

    }

    /**
     * Return <code>true</code> if the <i>other</i> instance is a duplicate and
     * may be dropped. (This implementation recognizes {@link KVOList} and
     * handles it appropriately.)
     * 
     * @param prior
     *            The previous {@link KVO} instance.
     * @param other
     *            Another {@link KVO} instance.
     *            
     * @return <code>true</code> if the <i>other</i> is a duplicate.
     */
    protected boolean filterDuplicate(final KVO<O> prior, final KVO<O> other) {

        // same key?
        if (BytesUtil.bytesEqual(prior.key, other.key)) {

            // same reference (if ref testing) or same value?
            if ((testRefs && prior.obj != null && prior.obj == other.obj)
                    || BytesUtil.bytesEqual(prior.val, other.val)) {

                if (prior instanceof KVOList) {

                    // link the duplicates together.
                    ((KVOList) prior).add(other);

                }

                return true;

            }

        }

        return false;

    }

}