KeyValueSource.java example

Explorer
haze-master
- hazelcast-master
/*
 * Copyright (c) 2008-2017, Hazelcast, Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hazelcast.mapreduce;

import com.hazelcast.core.IList;
import com.hazelcast.core.IMap;
import com.hazelcast.core.ISet;
import com.hazelcast.core.MultiMap;
import com.hazelcast.mapreduce.impl.ListKeyValueSource;
import com.hazelcast.mapreduce.impl.MapKeyValueSource;
import com.hazelcast.mapreduce.impl.MultiMapKeyValueSource;
import com.hazelcast.mapreduce.impl.SetKeyValueSource;
import com.hazelcast.nio.serialization.BinaryInterface;
import com.hazelcast.spi.NodeEngine;

import java.io.Closeable;
import java.util.Collection;
import java.util.Collections;
import java.util.Map;

/**
 * The abstract KeyValueSource class is used to implement custom data sources for mapreduce algorithms.<br/>
 * Default shipped implementations contains KeyValueSources for Hazelcast data structures like
 * {@link com.hazelcast.core.IMap} and {@link com.hazelcast.core.MultiMap}. Custom implementations could
 * be external files, URLs or any other data source that can be visualized as key-value pairs.
 *
 * @param <K> key type
 * @param <V> value type
 * @since 3.2
 * @deprecated MapReduce is deprecated and will be removed in 4.0.
 * For map aggregations, you can use {@link com.hazelcast.aggregation.Aggregator} on IMap.
 * For general data processing, it is superseded by <a href="http://jet.hazelcast.org">Hazelcast Jet</a>.
 */
@Deprecated
@BinaryInterface
public abstract class KeyValueSource<K, V>
        implements Closeable {

    /**
     * This method is called before accessing the key-value pairs of this KeyValueSource.
     *
     * @param nodeEngine nodeEngine of this cluster node
     * @return true if the operation succeeded, false otherwise
     */
    public abstract boolean open(NodeEngine nodeEngine);

    /**
     * Called to request if at least one more key-value pair is available from this
     * data source. If so, this method returns true, otherwise it returns false.
     *
     * Calls to this method will change the state, more specifically if an element is found,
     * the index will be set to the found element. Subsequent calls to the key() and element()
     * methods will return that element.
     *
     * @return true if at least one more key-value pair is available from this
     * data source, false otherwise.
     */
    public abstract boolean hasNext();

    /**
     * Returns the current index key for {@link KeyPredicate} analysis. This is called
     * to prevent a possible deserialization of unneeded values because the key is not
     * interesting for the running mapreduce algorithm.
     *
     * Calls to this method won't change state.
     *
     * @return the current index key for {@link KeyPredicate} analysis
     */
    public abstract K key();

    /**
     * Returns the current index element
     *
     * Calls to this method won't change state.
     *
     * @return the current index element
     */
    public abstract Map.Entry<K, V> element();

    /**
     * This method resets all internal state to be a new instance.
     * The same instance of the KeyValueSource may be used multiple times in a row depending
     * on the internal implementation, especially when the KeyValueSource implements
     * {@link com.hazelcast.mapreduce.PartitionIdAware}.<br/>
     * If the instance is reused, a sequence of reset(), {@link #open(com.hazelcast.spi.NodeEngine)}
     * and {@link #close()} is called multiple times with the other methods between open(...) and close().
     *
     * @return true if reset was successful, false otherwise
     */
    public abstract boolean reset();

    /**
     * <p>
     * If {@link #isAllKeysSupported()} returns true, a call to this method returns
     * all clusterwide available keys. If there is no chance to precollect all keys due to
     * partitioning of the data {@link #isAllKeysSupported()}, this method returns false.<br/>
     * </p>
     * <p>
     * If this functionality is not available and {@link Job#onKeys(Object[])},
     * {@link Job#onKeys(Iterable)}, or {@link Job#keyPredicate(KeyPredicate)} is used, a
     * preselection of the interesting partitions / nodes is not available and the
     * overall processing speed my be degraded.
     * </p>
     * <p>
     * If {@link #isAllKeysSupported()} returns false this method throws an
     * {@link java.lang.UnsupportedOperationException}.
     * </p>
     *
     * @return a collection of all clusterwide available keys
     */
    public final Collection<K> getAllKeys() {
        if (!isAllKeysSupported()) {
            throw new UnsupportedOperationException("getAllKeys is unsupported for this KeyValueSource");
        }
        return getAllKeys0();
    }

    /**
     * <p>
     * If it is possible to collect all clusterwide available keys for this KeyValueSource
     * implementation then this method returns true.<br/>
     * If true is returned, a call to {@link #getAllKeys()} must return all available keys
     * to execute a preselection of interesting partitions / nodes based on returns keys.
     * </p>
     * <p>
     * If this functionality is not available and {@link Job#onKeys(Object[])},
     * {@link Job#onKeys(Iterable)}, or {@link Job#keyPredicate(KeyPredicate)} is used, a
     * preselection of the interesting partitions / nodes is not available and the
     * overall processing speed my be degraded.
     * </p>
     *
     * @return true if collecting clusterwide keys is available, false otherwise
     */
    public boolean isAllKeysSupported() {
        return false;
    }

    /**
     * This method is meant to be overridden to implement collecting of all clusterwide available keys
     * and return them from {@link #getAllKeys()}.
     *
     * @return a collection of all clusterwide available keys
     */
    protected Collection<K> getAllKeys0() {
        return Collections.emptyList();
    }

    /**
     * A helper method to build a KeyValueSource implementation based on the specified {@link IMap}
     *
     * @param map map to build a KeyValueSource implementation
     * @param <K> key type of the map
     * @param <V> value type of the map
     * @return KeyValueSource implementation based on the specified map
     */
    public static <K, V> KeyValueSource<K, V> fromMap(IMap<? super K, ? extends V> map) {
        return new MapKeyValueSource<K, V>(map.getName());
    }

    /**
     * A helper method to build a KeyValueSource implementation based on the specified {@link MultiMap}
     *
     * @param multiMap multiMap to build a KeyValueSource implementation
     * @param <K>      key type of the multiMap
     * @param <V>      value type of the multiMap
     * @return KeyValueSource implementation based on the specified multiMap
     */
    public static <K, V> KeyValueSource<K, V> fromMultiMap(MultiMap<? super K, ? extends V> multiMap) {
        return new MultiMapKeyValueSource<K, V>(multiMap.getName());
    }

    /**
     * A helper method to build a KeyValueSource implementation based on the specified {@link IList}.<br/>
     * The key returned by this KeyValueSource implementation is <b>ALWAYS</b> the name of the list itself,
     * whereas the value are the entries of the list, one by one. This implementation behaves like a MultiMap
     * with a single key but multiple values.
     *
     * @param list list to build a KeyValueSource implementation
     * @param <V>  value type of the list
     * @return KeyValueSource implementation based on the specified list
     */
    public static <V> KeyValueSource<String, V> fromList(IList<? extends V> list) {
        return new ListKeyValueSource<V>(list.getName());
    }

    /**
     * A helper method to build a KeyValueSource implementation based on the specified {@link ISet}.<br/>
     * The key returned by this KeyValueSource implementation is <b>ALWAYS</b> the name of the set itself,
     * whereas the value are the entries of the set, one by one. This implementation behaves like a MultiMap
     * with a single key but multiple values.
     *
     * @param set set to build a KeyValueSource implementation
     * @param <V> value type of the set
     * @return KeyValueSource implementation based on the specified set
     */
    public static <V> KeyValueSource<String, V> fromSet(ISet<? extends V> set) {
        return new SetKeyValueSource<V>(set.getName());
    }

}