HadoopAbstractMapReduceTest.java example

Explorer

ignite-master
- examples
  - src
    - main
      - java
        org
        apache
        ignite
        examples
        ExampleNodeStartup.java
        ExamplesUtils.java
        binary
        computegrid
        ComputeClientBinaryTaskExecutionExample.java
        ComputeClientTask.java
        package-info.java
        datagrid
        CacheClientBinaryPutGetExample.java
        CacheClientBinaryQueryExample.java
        package-info.java
        store
        auto
        CacheBinaryAutoStoreExample.java
        package-info.java
        package-info.java
        computegrid
        ComputeAsyncExample.java
        ComputeBroadcastExample.java
        ComputeCallableExample.java
        ComputeClosureExample.java
        ComputeContinuousMapperExample.java
        ComputeFibonacciContinuationExample.java
        ComputeReducerExample.java
        ComputeRunnableExample.java
        ComputeTaskMapExample.java
        ComputeTaskSplitExample.java
        cluster
        ClusterGroupExample.java
        package-info.java
        failover
        ComputeFailoverExample.java
        ComputeFailoverNodeStartup.java
        package-info.java
        montecarlo
        Credit.java
        CreditRiskExample.java
        CreditRiskManager.java
        package-info.java
        package-info.java
        datagrid
        CacheAffinityExample.java
        CacheApiExample.java
        CacheAsyncApiExample.java
        CacheContinuousAsyncQueryExample.java
        CacheContinuousQueryExample.java
        CacheDataStreamerExample.java
        CacheEntryProcessorExample.java
        CacheEventsExample.java
        CachePutGetExample.java
        CacheQueryDmlExample.java
        CacheQueryExample.java
        CacheTransactionExample.java
        MemoryPoliciesExample.java
        package-info.java
        starschema
        CacheStarSchemaExample.java
        DimProduct.java
        DimStore.java
        FactPurchase.java
        package-info.java
        store
        CacheLoadOnlyStoreExample.java
        auto
        CacheAutoStoreExample.java
        package-info.java
        jdbc
        CacheJdbcPersonStore.java
        CacheJdbcStoreExample.java
        package-info.java
        package-info.java
        spring
        CacheSpringPersonStore.java
        CacheSpringStoreExample.java
        package-info.java
        datastructures
        IgniteAtomicLongExample.java
        IgniteAtomicReferenceExample.java
        IgniteAtomicSequenceExample.java
        IgniteAtomicStampedExample.java
        IgniteCountDownLatchExample.java
        IgniteExecutorServiceExample.java
        IgniteLockExample.java
        IgniteQueueExample.java
        IgniteSemaphoreExample.java
        IgniteSetExample.java
        package-info.java
        events
        EventsExample.java
        package-info.java
        igfs
        IgfsExample.java
        IgfsMapReduceExample.java
        IgfsNodeStartup.java
        package-info.java
        messaging
        MessagingExample.java
        MessagingPingPongExample.java
        MessagingPingPongListenActorExample.java
        package-info.java
        misc
        client
        memcache
        MemcacheRestExample.java
        MemcacheRestExampleNodeStartup.java
        package-info.java
        package-info.java
        deployment
        DeploymentExample.java
        package-info.java
        lifecycle
        LifecycleExample.java
        package-info.java
        package-info.java
        springbean
        SpringBeanExample.java
        package-info.java
        model
        Address.java
        Employee.java
        EmployeeKey.java
        Organization.java
        OrganizationType.java
        Person.java
        package-info.java
        package-info.java
        servicegrid
        ServicesExample.java
        SimpleMapService.java
        SimpleMapServiceImpl.java
        package-info.java
        springdata
        PersonRepository.java
        SpringAppCfg.java
        SpringDataExample.java
        streaming
        StreamTransformerExample.java
        StreamVisitorExample.java
        package-info.java
        wordcount
        CacheConfig.java
        QueryWords.java
        StreamWords.java
        package-info.java
        socket
        WordsSocketStreamerClient.java
        WordsSocketStreamerServer.java
        package-info.java
        util
        DbH2ServerStartup.java
        package-info.java
      - java-lgpl
        org
        apache
        ignite
        examples
        datagrid
        SpatialQueryExample.java
        hibernate
        HibernateL2CacheExample.java
        Post.java
        User.java
        package-info.java
        store
        hibernate
        CacheHibernatePersonStore.java
        CacheHibernateStoreExample.java
        package-info.java
        misc
        schedule
        ComputeScheduleExample.java
        package-info.java
      - java8
        org
        apache
        ignite
        examples
        java8
        cluster
        ClusterGroupExample.java
        package-info.java
        computegrid
        ComputeAsyncExample.java
        ComputeBroadcastExample.java
        ComputeCallableExample.java
        ComputeClosureExample.java
        ComputeRunnableExample.java
        package-info.java
        datagrid
        CacheAffinityExample.java
        CacheApiExample.java
        CacheAsyncApiExample.java
        CacheEntryProcessorExample.java
        package-info.java
        datastructures
        IgniteExecutorServiceExample.java
        package-info.java
        events
        EventsExample.java
        package-info.java
        messaging
        MessagingExample.java
        MessagingPingPongExample.java
        package-info.java
        package-info.java
        streaming
        StreamTransformerExample.java
        StreamVisitorExample.java
        package-info.java
      - ml
        org
        apache
        ignite
        examples
        ml
        math
        decompositions
        CholeskyDecompositionExample.java
        EigenDecompositionExample.java
        LUDecompositionExample.java
        SingularValueDecompositionExample.java
        package-info.java
        matrix
        CacheMatrixExample.java
        ExampleMatrixStorage.java
        MatrixCustomStorageExample.java
        MatrixExample.java
        MatrixExampleUtil.java
        OffHeapMatrixExample.java
        SparseDistributedMatrixExample.java
        SparseMatrixExample.java
        package-info.java
        package-info.java
        tracer
        TracerExample.java
        package-info.java
        vector
        CacheVectorExample.java
        ExampleVectorStorage.java
        OffHeapVectorExample.java
        SparseVectorExample.java
        VectorCustomStorageExample.java
        VectorExample.java
        package-info.java
      - spark
        org
        apache
        ignite
        examples
        spark
        SharedRDDExample.java
        package-info.java
    - test
- modules

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.ignite.internal.processors.hadoop.impl;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.UUID;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.ignite.Ignite;
import org.apache.ignite.IgniteCheckedException;
import org.apache.ignite.cache.CacheWriteSynchronizationMode;
import org.apache.ignite.configuration.CacheConfiguration;
import org.apache.ignite.configuration.FileSystemConfiguration;
import org.apache.ignite.configuration.HadoopConfiguration;
import org.apache.ignite.configuration.IgniteConfiguration;
import org.apache.ignite.hadoop.fs.IgniteHadoopFileSystemCounterWriter;
import org.apache.ignite.hadoop.fs.IgniteHadoopIgfsSecondaryFileSystem;
import org.apache.ignite.igfs.IgfsFile;
import org.apache.ignite.igfs.IgfsGroupDataBlocksKeyMapper;
import org.apache.ignite.igfs.IgfsIpcEndpointConfiguration;
import org.apache.ignite.igfs.IgfsMode;
import org.apache.ignite.igfs.IgfsPath;
import org.apache.ignite.igfs.IgfsUserContext;
import org.apache.ignite.igfs.secondary.IgfsSecondaryFileSystem;
import org.apache.ignite.internal.IgniteInternalFuture;
import org.apache.ignite.internal.processors.hadoop.HadoopCommonUtils;
import org.apache.ignite.internal.processors.hadoop.HadoopJobId;
import org.apache.ignite.internal.processors.hadoop.counter.HadoopCounters;
import org.apache.ignite.internal.processors.hadoop.counter.HadoopPerformanceCounter;
import org.apache.ignite.internal.processors.hadoop.impl.examples.HadoopWordCount1;
import org.apache.ignite.internal.processors.hadoop.impl.examples.HadoopWordCount2;
import org.apache.ignite.internal.processors.igfs.IgfsEx;
import org.apache.ignite.internal.processors.igfs.IgfsUtils;
import org.apache.ignite.internal.util.lang.GridAbsPredicate;
import org.apache.ignite.internal.util.typedef.G;
import org.apache.ignite.internal.util.typedef.T2;
import org.apache.ignite.lang.IgniteOutClosure;
import org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi;
import org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder;
import org.apache.ignite.testframework.GridTestUtils;
import org.jetbrains.annotations.Nullable;

import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL;
import static org.apache.ignite.cache.CacheMode.PARTITIONED;
import static org.apache.ignite.cache.CacheMode.REPLICATED;
import static org.apache.ignite.igfs.IgfsMode.PRIMARY;
import static org.apache.ignite.internal.processors.hadoop.impl.HadoopUtils.createJobInfo;

/**
 * Abstract test of whole cycle of map-reduce processing via Job tracker.
 */
public class HadoopAbstractMapReduceTest extends HadoopAbstractWordCountTest {
    /** IGFS block size. */
    protected static final int IGFS_BLOCK_SIZE = 512 * 1024;

    /** Amount of blocks to prefetch. */
    protected static final int PREFETCH_BLOCKS = 1;

    /** Amount of sequential block reads before prefetch is triggered. */
    protected static final int SEQ_READS_BEFORE_PREFETCH = 2;

    /** Secondary file system URI. */
    protected static final String SECONDARY_URI = "igfs://igfs-secondary@127.0.0.1:11500/";

    /** Secondary file system configuration path. */
    protected static final String SECONDARY_CFG = "modules/core/src/test/config/hadoop/core-site-loopback-secondary.xml";

    /** The user to run Hadoop job on behalf of. */
    protected static final String USER = "vasya";

    /** Secondary IGFS name. */
    protected static final String SECONDARY_IGFS_NAME = "igfs-secondary";

    /** Red constant. */
    protected static final int red = 10_000;

    /** Blue constant. */
    protected static final int blue = 20_000;

    /** Green constant. */
    protected static final int green = 15_000;

    /** Yellow constant. */
    protected static final int yellow = 7_000;

    /** The secondary Ignite node. */
    protected Ignite igniteSecondary;

    /** The secondary Fs. */
    protected IgfsSecondaryFileSystem secondaryFs;

    /** {@inheritDoc} */
    @Override protected int gridCount() {
        return 3;
    }

    /**
     * Gets owner of a IgfsEx path.
     * @param p The path.
     * @return The owner.
     */
    private static String getOwner(final IgfsEx i, final IgfsPath p) {
        return IgfsUserContext.doAs(USER, new IgniteOutClosure<String>() {
            @Override public String apply() {
                IgfsFile f = i.info(p);

                assert f != null;

                return f.property(IgfsUtils.PROP_USER_NAME);
            }
        });
    }

    /**
     * Gets owner of a secondary Fs path.
     * @param secFs The sec Fs.
     * @param p The path.
     * @return The owner.
     */
    private static String getOwnerSecondary(final IgfsSecondaryFileSystem secFs, final IgfsPath p) {
        return IgfsUserContext.doAs(USER, new IgniteOutClosure<String>() {
            @Override public String apply() {
                return secFs.info(p).property(IgfsUtils.PROP_USER_NAME);
            }
        });
    }

    /**
     * Checks owner of the path.
     * @param p The path.
     */
    private void checkOwner(IgfsPath p) {
        String ownerPrim = getOwner(igfs, p);
        assertEquals(USER, ownerPrim);

        String ownerSec = getOwnerSecondary(secondaryFs, p);
        assertEquals(USER, ownerSec);
    }

    /**
     * Does actual test job
     *
     * @param useNewMapper flag to use new mapper API.
     * @param useNewCombiner flag to use new combiner API.
     * @param useNewReducer flag to use new reducer API.
     */
    protected final void doTest(IgfsPath inFile, boolean useNewMapper, boolean useNewCombiner, boolean useNewReducer)
        throws Exception {
        log.info("useNewMapper=" + useNewMapper + ", useNewCombiner=" + useNewCombiner + ", useNewReducer=" + useNewReducer);

        igfs.delete(new IgfsPath(PATH_OUTPUT), true);

        JobConf jobConf = new JobConf();

        jobConf.set(HadoopCommonUtils.JOB_COUNTER_WRITER_PROPERTY, IgniteHadoopFileSystemCounterWriter.class.getName());
        jobConf.setUser(USER);
        jobConf.set(IgniteHadoopFileSystemCounterWriter.COUNTER_WRITER_DIR_PROPERTY, "/xxx/${USER}/zzz");

        //To split into about 40 items for v2
        jobConf.setInt(FileInputFormat.SPLIT_MAXSIZE, 65000);

        //For v1
        jobConf.setInt("fs.local.block.size", 65000);

        // File system coordinates.
        setupFileSystems(jobConf);

        HadoopWordCount1.setTasksClasses(jobConf, !useNewMapper, !useNewCombiner, !useNewReducer);

        Job job = Job.getInstance(jobConf);

        HadoopWordCount2.setTasksClasses(job, useNewMapper, useNewCombiner, useNewReducer, compressOutputSnappy());

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        FileInputFormat.setInputPaths(job, new Path(igfsScheme() + inFile.toString()));
        FileOutputFormat.setOutputPath(job, new Path(igfsScheme() + PATH_OUTPUT));

        job.setJarByClass(HadoopWordCount2.class);

        HadoopJobId jobId = new HadoopJobId(UUID.randomUUID(), 1);

        IgniteInternalFuture<?> fut = grid(0).hadoop().submit(jobId, createJobInfo(job.getConfiguration()));

        fut.get();

        checkJobStatistics(jobId);

        final String outFile = PATH_OUTPUT + "/" + (useNewReducer ? "part-r-" : "part-") + "00000";

        checkOwner(new IgfsPath(PATH_OUTPUT + "/" + "_SUCCESS"));

        checkOwner(new IgfsPath(outFile));

        String actual = readAndSortFile(outFile, job.getConfiguration());

        assertEquals("Use new mapper: " + useNewMapper + ", new combiner: " + useNewCombiner + ", new reducer: " +
                useNewReducer,
            "blue\t" + blue + "\n" +
                "green\t" + green + "\n" +
                "red\t" + red + "\n" +
                "yellow\t" + yellow + "\n",
            actual
        );
    }

    /**
     * Gets if to compress output data with Snappy.
     *
     * @return If to compress output data with Snappy.
     */
    protected boolean compressOutputSnappy() {
        return false;
    }

    /**
     * Simple test job statistics.
     *
     * @param jobId Job id.
     * @throws IgniteCheckedException
     */
    private void checkJobStatistics(HadoopJobId jobId) throws IgniteCheckedException, IOException {
        HadoopCounters cntrs = grid(0).hadoop().counters(jobId);

        HadoopPerformanceCounter perfCntr = HadoopPerformanceCounter.getCounter(cntrs, null);

        Map<String, SortedMap<Integer,Long>> tasks = new TreeMap<>();

        Map<String, Integer> phaseOrders = new HashMap<>();
        phaseOrders.put("submit", 0);
        phaseOrders.put("prepare", 1);
        phaseOrders.put("start", 2);
        phaseOrders.put("Cstart", 3);
        phaseOrders.put("finish", 4);

        String prevTaskId = null;

        long apiEvtCnt = 0;

        for (T2<String, Long> evt : perfCntr.evts()) {
            //We expect string pattern: COMBINE 1 run 7fa86a14-5a08-40e3-a7cb-98109b52a706
            String[] parsedEvt = evt.get1().split(" ");

            String taskId;
            String taskPhase;

            if ("JOB".equals(parsedEvt[0])) {
                taskId = parsedEvt[0];
                taskPhase = parsedEvt[1];
            }
            else {
                taskId = ("COMBINE".equals(parsedEvt[0]) ? "MAP" : parsedEvt[0].substring(0, 3)) + parsedEvt[1];
                taskPhase = ("COMBINE".equals(parsedEvt[0]) ? "C" : "") + parsedEvt[2];
            }

            if (!taskId.equals(prevTaskId))
                tasks.put(taskId, new TreeMap<Integer,Long>());

            Integer pos = phaseOrders.get(taskPhase);

            assertNotNull("Invalid phase " + taskPhase, pos);

            tasks.get(taskId).put(pos, evt.get2());

            prevTaskId = taskId;

            apiEvtCnt++;
        }

        for (Map.Entry<String ,SortedMap<Integer,Long>> task : tasks.entrySet()) {
            Map<Integer, Long> order = task.getValue();

            long prev = 0;

            for (Map.Entry<Integer, Long> phase : order.entrySet()) {
                assertTrue("Phase order of " + task.getKey() + " is invalid", phase.getValue() >= prev);

                prev = phase.getValue();
            }
        }

        final IgfsPath statPath = new IgfsPath("/xxx/" + USER + "/zzz/" + jobId + "/performance");

        assert GridTestUtils.waitForCondition(new GridAbsPredicate() {
            @Override public boolean apply() {
                return igfs.exists(statPath);
            }
        }, 20_000);

        final long apiEvtCnt0 = apiEvtCnt;

        boolean res = GridTestUtils.waitForCondition(new GridAbsPredicate() {
            @Override public boolean apply() {
                try {
                    try (BufferedReader reader = new BufferedReader(new InputStreamReader(igfs.open(statPath)))) {
                        return apiEvtCnt0 == HadoopTestUtils.simpleCheckJobStatFile(reader);
                    }
                }
                catch (IOException e) {
                    throw new RuntimeException(e);
                }
            }
        }, 10000);

        if (!res) {
            BufferedReader reader = new BufferedReader(new InputStreamReader(igfs.open(statPath)));

            assert false : "Invalid API events count [exp=" + apiEvtCnt0 +
                ", actual=" + HadoopTestUtils.simpleCheckJobStatFile(reader) + ']';
        }
    }

    /** {@inheritDoc} */
    @Override protected void beforeTest() throws Exception {
        igniteSecondary = startGridWithIgfs("grid-secondary", SECONDARY_IGFS_NAME, PRIMARY, null, SECONDARY_REST_CFG);

        super.beforeTest();
    }

    /**
     * Start grid with IGFS.
     *
     * @param igniteInstanceName Ignite instance name.
     * @param igfsName IGFS name
     * @param mode IGFS mode.
     * @param secondaryFs Secondary file system (optional).
     * @param restCfg Rest configuration string (optional).
     * @return Started grid instance.
     * @throws Exception If failed.
     */
    protected Ignite startGridWithIgfs(String igniteInstanceName, String igfsName, IgfsMode mode,
        @Nullable IgfsSecondaryFileSystem secondaryFs, @Nullable IgfsIpcEndpointConfiguration restCfg) throws Exception {
        FileSystemConfiguration igfsCfg = new FileSystemConfiguration();

        igfsCfg.setName(igfsName);
        igfsCfg.setBlockSize(IGFS_BLOCK_SIZE);
        igfsCfg.setDefaultMode(mode);
        igfsCfg.setIpcEndpointConfiguration(restCfg);
        igfsCfg.setSecondaryFileSystem(secondaryFs);
        igfsCfg.setPrefetchBlocks(PREFETCH_BLOCKS);
        igfsCfg.setSequentialReadsBeforePrefetch(SEQ_READS_BEFORE_PREFETCH);

        CacheConfiguration dataCacheCfg = defaultCacheConfiguration();

        dataCacheCfg.setName("dataCache");
        dataCacheCfg.setCacheMode(PARTITIONED);
        dataCacheCfg.setNearConfiguration(null);
        dataCacheCfg.setWriteSynchronizationMode(CacheWriteSynchronizationMode.FULL_SYNC);
        dataCacheCfg.setAffinityMapper(new IgfsGroupDataBlocksKeyMapper(2));
        dataCacheCfg.setBackups(0);
        dataCacheCfg.setAtomicityMode(TRANSACTIONAL);

        CacheConfiguration metaCacheCfg = defaultCacheConfiguration();

        metaCacheCfg.setName("metaCache");
        metaCacheCfg.setCacheMode(REPLICATED);
        metaCacheCfg.setWriteSynchronizationMode(CacheWriteSynchronizationMode.FULL_SYNC);
        metaCacheCfg.setAtomicityMode(TRANSACTIONAL);

        igfsCfg.setDataCacheConfiguration(dataCacheCfg);
        igfsCfg.setMetaCacheConfiguration(metaCacheCfg);

        IgniteConfiguration cfg = new IgniteConfiguration();

        cfg.setIgniteInstanceName(igniteInstanceName);

        TcpDiscoverySpi discoSpi = new TcpDiscoverySpi();

        discoSpi.setIpFinder(new TcpDiscoveryVmIpFinder(true));

        cfg.setDiscoverySpi(discoSpi);
        cfg.setFileSystemConfiguration(igfsCfg);

        cfg.setLocalHost("127.0.0.1");
        cfg.setConnectorConfiguration(null);

        HadoopConfiguration hadoopCfg = createHadoopConfiguration();

        if (hadoopCfg != null)
            cfg.setHadoopConfiguration(hadoopCfg);

        return G.start(cfg);
    }

    /**
     * Creates custom Hadoop configuration.
     *
     * @return The Hadoop configuration.
     */
    protected HadoopConfiguration createHadoopConfiguration() {
        return null;
    }

    /** {@inheritDoc} */
    @Override public FileSystemConfiguration igfsConfiguration() throws Exception {
        FileSystemConfiguration fsCfg = super.igfsConfiguration();

        secondaryFs = new IgniteHadoopIgfsSecondaryFileSystem(SECONDARY_URI, SECONDARY_CFG);

        fsCfg.setSecondaryFileSystem(secondaryFs);

        return fsCfg;
    }
}