/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.ignite.internal.processors.hadoop.impl;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
import java.util.UUID;
import org.apache.hadoop.fs.AbstractFileSystem;
import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.serializer.JavaSerialization;
import org.apache.hadoop.io.serializer.JavaSerializationComparator;
import org.apache.hadoop.io.serializer.WritableSerialization;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.ignite.configuration.HadoopConfiguration;
import org.apache.ignite.internal.processors.hadoop.HadoopJobId;
import org.apache.ignite.internal.util.typedef.X;
import static org.apache.ignite.internal.processors.hadoop.impl.HadoopUtils.createJobInfo;
/**
* Tests correct sorting.
*/
public class HadoopSortingTest extends HadoopAbstractSelfTest {
/** */
private static final String PATH_INPUT = "/test-in";
/** */
private static final String PATH_OUTPUT = "/test-out";
/** {@inheritDoc} */
@Override protected int gridCount() {
return 3;
}
/**
* @return {@code True} if IGFS is enabled on Hadoop nodes.
*/
@Override protected boolean igfsEnabled() {
return true;
}
/** {@inheritDoc} */
@Override protected void beforeTest() throws Exception {
startGrids(gridCount());
}
/** {@inheritDoc} */
@Override protected void afterTest() throws Exception {
stopAllGrids(true);
}
/** {@inheritDoc} */
@Override public HadoopConfiguration hadoopConfiguration(String igniteInstanceName) {
HadoopConfiguration cfg = super.hadoopConfiguration(igniteInstanceName);
// TODO: IGNITE-404: Uncomment when fixed.
//cfg.setExternalExecution(false);
return cfg;
}
/**
* @throws Exception If failed.
*/
public void testSortSimple() throws Exception {
// Generate test data.
Job job = Job.getInstance();
job.setInputFormatClass(InFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
job.setMapperClass(Mapper.class);
job.setNumReduceTasks(0);
setupFileSystems(job.getConfiguration());
FileOutputFormat.setOutputPath(job, new Path(igfsScheme() + PATH_INPUT));
X.printerrln("Data generation started.");
grid(0).hadoop().submit(new HadoopJobId(UUID.randomUUID(), 1),
createJobInfo(job.getConfiguration())).get(180000);
X.printerrln("Data generation complete.");
// Run main map-reduce job.
job = Job.getInstance();
setupFileSystems(job.getConfiguration());
job.getConfiguration().set(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, JavaSerialization.class.getName() +
"," + WritableSerialization.class.getName());
FileInputFormat.setInputPaths(job, new Path(igfsScheme() + PATH_INPUT));
FileOutputFormat.setOutputPath(job, new Path(igfsScheme() + PATH_OUTPUT));
job.setSortComparatorClass(JavaSerializationComparator.class);
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
job.setNumReduceTasks(2);
job.setMapOutputKeyClass(UUID.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
X.printerrln("Job started.");
grid(0).hadoop().submit(new HadoopJobId(UUID.randomUUID(), 2),
createJobInfo(job.getConfiguration())).get(180000);
X.printerrln("Job complete.");
// Check result.
Path outDir = new Path(igfsScheme() + PATH_OUTPUT);
AbstractFileSystem fs = AbstractFileSystem.get(new URI(igfsScheme()), job.getConfiguration());
for (FileStatus file : fs.listStatus(outDir)) {
X.printerrln("__ file: " + file);
if (file.getLen() == 0)
continue;
FSDataInputStream in = fs.open(file.getPath());
Scanner sc = new Scanner(in);
UUID prev = null;
while(sc.hasNextLine()) {
UUID next = UUID.fromString(sc.nextLine());
// X.printerrln("___ check: " + next);
if (prev != null)
assertTrue(prev.compareTo(next) < 0);
prev = next;
}
}
}
public static class InFormat extends InputFormat<Text, NullWritable> {
/** {@inheritDoc} */
@Override public List<InputSplit> getSplits(JobContext ctx) throws IOException, InterruptedException {
List<InputSplit> res = new ArrayList<>();
FakeSplit split = new FakeSplit(20);
for (int i = 0; i < 10; i++)
res.add(split);
return res;
}
/** {@inheritDoc} */
@Override public RecordReader<Text, NullWritable> createRecordReader(final InputSplit split,
TaskAttemptContext ctx) throws IOException, InterruptedException {
return new RecordReader<Text, NullWritable>() {
/** */
int cnt;
/** */
Text txt = new Text();
@Override public void initialize(InputSplit split, TaskAttemptContext ctx) {
// No-op.
}
@Override public boolean nextKeyValue() throws IOException, InterruptedException {
return ++cnt <= split.getLength();
}
@Override public Text getCurrentKey() {
txt.set(UUID.randomUUID().toString());
// X.printerrln("___ read: " + txt);
return txt;
}
@Override public NullWritable getCurrentValue() {
return NullWritable.get();
}
@Override public float getProgress() throws IOException, InterruptedException {
return (float)cnt / split.getLength();
}
@Override public void close() {
// No-op.
}
};
}
}
public static class MyMapper extends Mapper<LongWritable, Text, UUID, NullWritable> {
/** {@inheritDoc} */
@Override protected void map(LongWritable key, Text val, Context ctx) throws IOException, InterruptedException {
// X.printerrln("___ map: " + val);
ctx.write(UUID.fromString(val.toString()), NullWritable.get());
}
}
public static class MyReducer extends Reducer<UUID, NullWritable, Text, NullWritable> {
/** */
private Text text = new Text();
/** {@inheritDoc} */
@Override protected void reduce(UUID key, Iterable<NullWritable> vals, Context ctx)
throws IOException, InterruptedException {
// X.printerrln("___ rdc: " + key);
text.set(key.toString());
ctx.write(text, NullWritable.get());
}
}
public static class FakeSplit extends InputSplit implements Writable {
/** */
private static final String[] HOSTS = {"127.0.0.1"};
/** */
private int len;
/**
* @param len Length.
*/
public FakeSplit(int len) {
this.len = len;
}
/**
*
*/
public FakeSplit() {
// No-op.
}
/** {@inheritDoc} */
@Override public long getLength() throws IOException, InterruptedException {
return len;
}
/** {@inheritDoc} */
@Override public String[] getLocations() throws IOException, InterruptedException {
return HOSTS;
}
/** {@inheritDoc} */
@Override public void write(DataOutput out) throws IOException {
out.writeInt(len);
}
/** {@inheritDoc} */
@Override public void readFields(DataInput in) throws IOException {
len = in.readInt();
}
}
}