/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.ignite.internal.processors.hadoop.impl; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.util.HashMap; import java.util.Map; import java.util.SortedMap; import java.util.TreeMap; import java.util.UUID; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.ignite.Ignite; import org.apache.ignite.IgniteCheckedException; import org.apache.ignite.cache.CacheWriteSynchronizationMode; import org.apache.ignite.configuration.CacheConfiguration; import org.apache.ignite.configuration.FileSystemConfiguration; import org.apache.ignite.configuration.HadoopConfiguration; import org.apache.ignite.configuration.IgniteConfiguration; import org.apache.ignite.hadoop.fs.IgniteHadoopFileSystemCounterWriter; import org.apache.ignite.hadoop.fs.IgniteHadoopIgfsSecondaryFileSystem; import org.apache.ignite.igfs.IgfsFile; import org.apache.ignite.igfs.IgfsGroupDataBlocksKeyMapper; import org.apache.ignite.igfs.IgfsIpcEndpointConfiguration; import org.apache.ignite.igfs.IgfsMode; import org.apache.ignite.igfs.IgfsPath; import org.apache.ignite.igfs.IgfsUserContext; import org.apache.ignite.igfs.secondary.IgfsSecondaryFileSystem; import org.apache.ignite.internal.IgniteInternalFuture; import org.apache.ignite.internal.processors.hadoop.HadoopCommonUtils; import org.apache.ignite.internal.processors.hadoop.HadoopJobId; import org.apache.ignite.internal.processors.hadoop.counter.HadoopCounters; import org.apache.ignite.internal.processors.hadoop.counter.HadoopPerformanceCounter; import org.apache.ignite.internal.processors.hadoop.impl.examples.HadoopWordCount1; import org.apache.ignite.internal.processors.hadoop.impl.examples.HadoopWordCount2; import org.apache.ignite.internal.processors.igfs.IgfsEx; import org.apache.ignite.internal.processors.igfs.IgfsUtils; import org.apache.ignite.internal.util.lang.GridAbsPredicate; import org.apache.ignite.internal.util.typedef.G; import org.apache.ignite.internal.util.typedef.T2; import org.apache.ignite.lang.IgniteOutClosure; import org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi; import org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder; import org.apache.ignite.testframework.GridTestUtils; import org.jetbrains.annotations.Nullable; import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL; import static org.apache.ignite.cache.CacheMode.PARTITIONED; import static org.apache.ignite.cache.CacheMode.REPLICATED; import static org.apache.ignite.igfs.IgfsMode.PRIMARY; import static org.apache.ignite.internal.processors.hadoop.impl.HadoopUtils.createJobInfo; /** * Abstract test of whole cycle of map-reduce processing via Job tracker. */ public class HadoopAbstractMapReduceTest extends HadoopAbstractWordCountTest { /** IGFS block size. */ protected static final int IGFS_BLOCK_SIZE = 512 * 1024; /** Amount of blocks to prefetch. */ protected static final int PREFETCH_BLOCKS = 1; /** Amount of sequential block reads before prefetch is triggered. */ protected static final int SEQ_READS_BEFORE_PREFETCH = 2; /** Secondary file system URI. */ protected static final String SECONDARY_URI = "igfs://igfs-secondary@127.0.0.1:11500/"; /** Secondary file system configuration path. */ protected static final String SECONDARY_CFG = "modules/core/src/test/config/hadoop/core-site-loopback-secondary.xml"; /** The user to run Hadoop job on behalf of. */ protected static final String USER = "vasya"; /** Secondary IGFS name. */ protected static final String SECONDARY_IGFS_NAME = "igfs-secondary"; /** Red constant. */ protected static final int red = 10_000; /** Blue constant. */ protected static final int blue = 20_000; /** Green constant. */ protected static final int green = 15_000; /** Yellow constant. */ protected static final int yellow = 7_000; /** The secondary Ignite node. */ protected Ignite igniteSecondary; /** The secondary Fs. */ protected IgfsSecondaryFileSystem secondaryFs; /** {@inheritDoc} */ @Override protected int gridCount() { return 3; } /** * Gets owner of a IgfsEx path. * @param p The path. * @return The owner. */ private static String getOwner(final IgfsEx i, final IgfsPath p) { return IgfsUserContext.doAs(USER, new IgniteOutClosure<String>() { @Override public String apply() { IgfsFile f = i.info(p); assert f != null; return f.property(IgfsUtils.PROP_USER_NAME); } }); } /** * Gets owner of a secondary Fs path. * @param secFs The sec Fs. * @param p The path. * @return The owner. */ private static String getOwnerSecondary(final IgfsSecondaryFileSystem secFs, final IgfsPath p) { return IgfsUserContext.doAs(USER, new IgniteOutClosure<String>() { @Override public String apply() { return secFs.info(p).property(IgfsUtils.PROP_USER_NAME); } }); } /** * Checks owner of the path. * @param p The path. */ private void checkOwner(IgfsPath p) { String ownerPrim = getOwner(igfs, p); assertEquals(USER, ownerPrim); String ownerSec = getOwnerSecondary(secondaryFs, p); assertEquals(USER, ownerSec); } /** * Does actual test job * * @param useNewMapper flag to use new mapper API. * @param useNewCombiner flag to use new combiner API. * @param useNewReducer flag to use new reducer API. */ protected final void doTest(IgfsPath inFile, boolean useNewMapper, boolean useNewCombiner, boolean useNewReducer) throws Exception { log.info("useNewMapper=" + useNewMapper + ", useNewCombiner=" + useNewCombiner + ", useNewReducer=" + useNewReducer); igfs.delete(new IgfsPath(PATH_OUTPUT), true); JobConf jobConf = new JobConf(); jobConf.set(HadoopCommonUtils.JOB_COUNTER_WRITER_PROPERTY, IgniteHadoopFileSystemCounterWriter.class.getName()); jobConf.setUser(USER); jobConf.set(IgniteHadoopFileSystemCounterWriter.COUNTER_WRITER_DIR_PROPERTY, "/xxx/${USER}/zzz"); //To split into about 40 items for v2 jobConf.setInt(FileInputFormat.SPLIT_MAXSIZE, 65000); //For v1 jobConf.setInt("fs.local.block.size", 65000); // File system coordinates. setupFileSystems(jobConf); HadoopWordCount1.setTasksClasses(jobConf, !useNewMapper, !useNewCombiner, !useNewReducer); Job job = Job.getInstance(jobConf); HadoopWordCount2.setTasksClasses(job, useNewMapper, useNewCombiner, useNewReducer, compressOutputSnappy()); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.setInputPaths(job, new Path(igfsScheme() + inFile.toString())); FileOutputFormat.setOutputPath(job, new Path(igfsScheme() + PATH_OUTPUT)); job.setJarByClass(HadoopWordCount2.class); HadoopJobId jobId = new HadoopJobId(UUID.randomUUID(), 1); IgniteInternalFuture<?> fut = grid(0).hadoop().submit(jobId, createJobInfo(job.getConfiguration())); fut.get(); checkJobStatistics(jobId); final String outFile = PATH_OUTPUT + "/" + (useNewReducer ? "part-r-" : "part-") + "00000"; checkOwner(new IgfsPath(PATH_OUTPUT + "/" + "_SUCCESS")); checkOwner(new IgfsPath(outFile)); String actual = readAndSortFile(outFile, job.getConfiguration()); assertEquals("Use new mapper: " + useNewMapper + ", new combiner: " + useNewCombiner + ", new reducer: " + useNewReducer, "blue\t" + blue + "\n" + "green\t" + green + "\n" + "red\t" + red + "\n" + "yellow\t" + yellow + "\n", actual ); } /** * Gets if to compress output data with Snappy. * * @return If to compress output data with Snappy. */ protected boolean compressOutputSnappy() { return false; } /** * Simple test job statistics. * * @param jobId Job id. * @throws IgniteCheckedException */ private void checkJobStatistics(HadoopJobId jobId) throws IgniteCheckedException, IOException { HadoopCounters cntrs = grid(0).hadoop().counters(jobId); HadoopPerformanceCounter perfCntr = HadoopPerformanceCounter.getCounter(cntrs, null); Map<String, SortedMap<Integer,Long>> tasks = new TreeMap<>(); Map<String, Integer> phaseOrders = new HashMap<>(); phaseOrders.put("submit", 0); phaseOrders.put("prepare", 1); phaseOrders.put("start", 2); phaseOrders.put("Cstart", 3); phaseOrders.put("finish", 4); String prevTaskId = null; long apiEvtCnt = 0; for (T2<String, Long> evt : perfCntr.evts()) { //We expect string pattern: COMBINE 1 run 7fa86a14-5a08-40e3-a7cb-98109b52a706 String[] parsedEvt = evt.get1().split(" "); String taskId; String taskPhase; if ("JOB".equals(parsedEvt[0])) { taskId = parsedEvt[0]; taskPhase = parsedEvt[1]; } else { taskId = ("COMBINE".equals(parsedEvt[0]) ? "MAP" : parsedEvt[0].substring(0, 3)) + parsedEvt[1]; taskPhase = ("COMBINE".equals(parsedEvt[0]) ? "C" : "") + parsedEvt[2]; } if (!taskId.equals(prevTaskId)) tasks.put(taskId, new TreeMap<Integer,Long>()); Integer pos = phaseOrders.get(taskPhase); assertNotNull("Invalid phase " + taskPhase, pos); tasks.get(taskId).put(pos, evt.get2()); prevTaskId = taskId; apiEvtCnt++; } for (Map.Entry<String ,SortedMap<Integer,Long>> task : tasks.entrySet()) { Map<Integer, Long> order = task.getValue(); long prev = 0; for (Map.Entry<Integer, Long> phase : order.entrySet()) { assertTrue("Phase order of " + task.getKey() + " is invalid", phase.getValue() >= prev); prev = phase.getValue(); } } final IgfsPath statPath = new IgfsPath("/xxx/" + USER + "/zzz/" + jobId + "/performance"); assert GridTestUtils.waitForCondition(new GridAbsPredicate() { @Override public boolean apply() { return igfs.exists(statPath); } }, 20_000); final long apiEvtCnt0 = apiEvtCnt; boolean res = GridTestUtils.waitForCondition(new GridAbsPredicate() { @Override public boolean apply() { try { try (BufferedReader reader = new BufferedReader(new InputStreamReader(igfs.open(statPath)))) { return apiEvtCnt0 == HadoopTestUtils.simpleCheckJobStatFile(reader); } } catch (IOException e) { throw new RuntimeException(e); } } }, 10000); if (!res) { BufferedReader reader = new BufferedReader(new InputStreamReader(igfs.open(statPath))); assert false : "Invalid API events count [exp=" + apiEvtCnt0 + ", actual=" + HadoopTestUtils.simpleCheckJobStatFile(reader) + ']'; } } /** {@inheritDoc} */ @Override protected void beforeTest() throws Exception { igniteSecondary = startGridWithIgfs("grid-secondary", SECONDARY_IGFS_NAME, PRIMARY, null, SECONDARY_REST_CFG); super.beforeTest(); } /** * Start grid with IGFS. * * @param igniteInstanceName Ignite instance name. * @param igfsName IGFS name * @param mode IGFS mode. * @param secondaryFs Secondary file system (optional). * @param restCfg Rest configuration string (optional). * @return Started grid instance. * @throws Exception If failed. */ protected Ignite startGridWithIgfs(String igniteInstanceName, String igfsName, IgfsMode mode, @Nullable IgfsSecondaryFileSystem secondaryFs, @Nullable IgfsIpcEndpointConfiguration restCfg) throws Exception { FileSystemConfiguration igfsCfg = new FileSystemConfiguration(); igfsCfg.setName(igfsName); igfsCfg.setBlockSize(IGFS_BLOCK_SIZE); igfsCfg.setDefaultMode(mode); igfsCfg.setIpcEndpointConfiguration(restCfg); igfsCfg.setSecondaryFileSystem(secondaryFs); igfsCfg.setPrefetchBlocks(PREFETCH_BLOCKS); igfsCfg.setSequentialReadsBeforePrefetch(SEQ_READS_BEFORE_PREFETCH); CacheConfiguration dataCacheCfg = defaultCacheConfiguration(); dataCacheCfg.setName("dataCache"); dataCacheCfg.setCacheMode(PARTITIONED); dataCacheCfg.setNearConfiguration(null); dataCacheCfg.setWriteSynchronizationMode(CacheWriteSynchronizationMode.FULL_SYNC); dataCacheCfg.setAffinityMapper(new IgfsGroupDataBlocksKeyMapper(2)); dataCacheCfg.setBackups(0); dataCacheCfg.setAtomicityMode(TRANSACTIONAL); CacheConfiguration metaCacheCfg = defaultCacheConfiguration(); metaCacheCfg.setName("metaCache"); metaCacheCfg.setCacheMode(REPLICATED); metaCacheCfg.setWriteSynchronizationMode(CacheWriteSynchronizationMode.FULL_SYNC); metaCacheCfg.setAtomicityMode(TRANSACTIONAL); igfsCfg.setDataCacheConfiguration(dataCacheCfg); igfsCfg.setMetaCacheConfiguration(metaCacheCfg); IgniteConfiguration cfg = new IgniteConfiguration(); cfg.setIgniteInstanceName(igniteInstanceName); TcpDiscoverySpi discoSpi = new TcpDiscoverySpi(); discoSpi.setIpFinder(new TcpDiscoveryVmIpFinder(true)); cfg.setDiscoverySpi(discoSpi); cfg.setFileSystemConfiguration(igfsCfg); cfg.setLocalHost("127.0.0.1"); cfg.setConnectorConfiguration(null); HadoopConfiguration hadoopCfg = createHadoopConfiguration(); if (hadoopCfg != null) cfg.setHadoopConfiguration(hadoopCfg); return G.start(cfg); } /** * Creates custom Hadoop configuration. * * @return The Hadoop configuration. */ protected HadoopConfiguration createHadoopConfiguration() { return null; } /** {@inheritDoc} */ @Override public FileSystemConfiguration igfsConfiguration() throws Exception { FileSystemConfiguration fsCfg = super.igfsConfiguration(); secondaryFs = new IgniteHadoopIgfsSecondaryFileSystem(SECONDARY_URI, SECONDARY_CFG); fsCfg.setSecondaryFileSystem(secondaryFs); return fsCfg; } }