/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.ignite.igfs.mapreduce;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import org.apache.ignite.Ignite;
import org.apache.ignite.IgniteException;
import org.apache.ignite.IgniteFileSystem;
import org.apache.ignite.cluster.ClusterNode;
import org.apache.ignite.compute.ComputeJob;
import org.apache.ignite.compute.ComputeTaskAdapter;
import org.apache.ignite.igfs.IgfsBlockLocation;
import org.apache.ignite.igfs.IgfsFile;
import org.apache.ignite.igfs.IgfsPath;
import org.apache.ignite.internal.IgniteKernal;
import org.apache.ignite.internal.processors.igfs.IgfsProcessorAdapter;
import org.apache.ignite.internal.util.typedef.internal.U;
import org.apache.ignite.resources.IgniteInstanceResource;
import org.jetbrains.annotations.Nullable;
/**
* IGFS task which can be executed on the grid using one of {@code IgniteFs.execute()} methods. Essentially IGFS task
* is regular {@link org.apache.ignite.compute.ComputeTask} with different map logic. Instead of implementing
* {@link org.apache.ignite.compute.ComputeTask#map(List, Object)} method to split task into jobs, you must implement
* {@link IgfsTask#createJob(org.apache.ignite.igfs.IgfsPath, IgfsFileRange, IgfsTaskArgs)} method.
* <p>
* Each file participating in IGFS task is split into {@link IgfsFileRange}s first. Normally range is a number of
* consequent bytes located on a single node (see {@code IgfssGroupDataBlocksKeyMapper}). In case maximum range size
* is provided (either through {@link org.apache.ignite.configuration.FileSystemConfiguration#getMaximumTaskRangeLength()} or {@code IgniteFs.execute()}
* argument), then ranges could be further divided into smaller chunks.
* <p>
* Once file is split into ranges, each range is passed to {@code IgfsTask.createJob()} method in order to create a
* {@link IgfsJob}.
* <p>
* Finally all generated jobs are sent to Grid nodes for execution.
* <p>
* As with regular {@code ComputeTask} you can define your own logic for results handling and reduce step.
* <p>
* Here is an example of such a task:
* <pre name="code" class="java">
* public class WordCountTask extends IgfsTask<String, Integer> {
* @Override
* public IgfsJob createJob(IgfsPath path, IgfsFileRange range, IgfsTaskArgs<T> args) throws IgniteCheckedException {
* // New job will be created for each range within each file.
* // We pass user-provided argument (which is essentially a word to look for) to that job.
* return new WordCountJob(args.userArgument());
* }
*
* // Aggregate results into one compound result.
* public Integer reduce(List<ComputeJobResult> results) throws IgniteCheckedException {
* Integer total = 0;
*
* for (ComputeJobResult res : results) {
* Integer cnt = res.getData();
*
* // Null can be returned for non-existent file in case we decide to ignore such situations.
* if (cnt != null)
* total += cnt;
* }
*
* return total;
* }
* }
* </pre>
*/
public abstract class IgfsTask<T, R> extends ComputeTaskAdapter<IgfsTaskArgs<T>, R> {
/** */
private static final long serialVersionUID = 0L;
/** Injected grid. */
@IgniteInstanceResource
private Ignite ignite;
/** {@inheritDoc} */
@Nullable @Override public final Map<? extends ComputeJob, ClusterNode> map(List<ClusterNode> subgrid,
@Nullable IgfsTaskArgs<T> args) {
assert ignite != null;
assert args != null;
IgniteFileSystem fs = ignite.fileSystem(args.igfsName());
IgfsProcessorAdapter igfsProc = ((IgniteKernal) ignite).context().igfs();
Map<ComputeJob, ClusterNode> splitMap = new HashMap<>();
Map<UUID, ClusterNode> nodes = mapSubgrid(subgrid);
for (IgfsPath path : args.paths()) {
IgfsFile file = fs.info(path);
if (file == null) {
if (args.skipNonExistentFiles())
continue;
else
throw new IgniteException("Failed to process IGFS file because it doesn't exist: " + path);
}
Collection<IgfsBlockLocation> aff = fs.affinity(path, 0, file.length(), args.maxRangeLength());
long totalLen = 0;
for (IgfsBlockLocation loc : aff) {
ClusterNode node = null;
for (UUID nodeId : loc.nodeIds()) {
node = nodes.get(nodeId);
if (node != null)
break;
}
if (node == null)
throw new IgniteException("Failed to find any of block affinity nodes in subgrid [loc=" + loc +
", subgrid=" + subgrid + ']');
IgfsJob job = createJob(path, new IgfsFileRange(file.path(), loc.start(), loc.length()), args);
if (job != null) {
ComputeJob jobImpl = igfsProc.createJob(job, fs.name(), file.path(), loc.start(),
loc.length(), args.recordResolver());
splitMap.put(jobImpl, node);
}
totalLen += loc.length();
}
assert totalLen == file.length();
}
return splitMap;
}
/**
* Callback invoked during task map procedure to create job that will process specified split
* for IGFS file.
*
* @param path Path.
* @param range File range based on consecutive blocks. This range will be further
* realigned to record boundaries on destination node.
* @param args Task argument.
* @return IGFS job. If {@code null} is returned, the passed in file range will be skipped.
* @throws IgniteException If job creation failed.
*/
@Nullable public abstract IgfsJob createJob(IgfsPath path, IgfsFileRange range,
IgfsTaskArgs<T> args) throws IgniteException;
/**
* Maps list by node ID.
*
* @param subgrid Subgrid.
* @return Map.
*/
private Map<UUID, ClusterNode> mapSubgrid(Collection<ClusterNode> subgrid) {
Map<UUID, ClusterNode> res = U.newHashMap(subgrid.size());
for (ClusterNode node : subgrid)
res.put(node.id(), node);
return res;
}
}