/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.mapred; import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.Writer; import java.net.URI; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.lib.IdentityMapper; import org.apache.hadoop.mapred.lib.IdentityReducer; import org.apache.hadoop.filecache.DistributedCache; public class TestDuplicateArchiveFileCachedURLMinicluster extends ClusterMapReduceTestCase { enum EnumCounter { MAP_RECORDS } public void testDuplicationsMinicluster() throws Exception { OutputStream os = getFileSystem().create(new Path(getInputDir(), "text.txt")); Writer wr = new OutputStreamWriter(os); wr.write("hello1\n"); wr.write("hello2\n"); wr.write("hello3\n"); wr.write("hello4\n"); wr.close(); JobConf conf = createJobConf(); conf.setJobName("counters"); conf.setInputFormat(TextInputFormat.class); conf.setMapOutputKeyClass(LongWritable.class); conf.setMapOutputValueClass(Text.class); conf.setOutputFormat(TextOutputFormat.class); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(IdentityMapper.class); conf.setReducerClass(IdentityReducer.class); FileInputFormat.setInputPaths(conf, getInputDir()); FileOutputFormat.setOutputPath(conf, getOutputDir()); Path inputRoot = getInputDir().makeQualified(getFileSystem()); Path unqualifiedInputRoot = getInputDir(); System.out.println("The qualified input dir is " + inputRoot.toString()); System.out.println("The unqualified input dir is " + unqualifiedInputRoot.toString()); Path duplicatedPath = new Path(inputRoot, "text.txt"); URI duplicatedURI = duplicatedPath.toUri(); Path unqualifiedDuplicatedPath = new Path(unqualifiedInputRoot, "text.txt"); URI unqualifiedDuplicatedURI = unqualifiedDuplicatedPath.toUri(); System.out.println("The duplicated Path is " + duplicatedPath); System.out.println("The duplicated URI is " + duplicatedURI); System.out.println("The unqualified duplicated URI is " + unqualifiedDuplicatedURI); DistributedCache.addCacheArchive(duplicatedURI, conf); DistributedCache.addCacheFile(unqualifiedDuplicatedURI, conf); try { RunningJob runningJob = JobClient.runJob(conf); assertFalse("The job completed, which is wrong since there's a duplication", true); } catch (InvalidJobConfException e) { System.out.println("We expect to see a stack trace here."); e.printStackTrace(System.out); } } public void testApparentDuplicationsMinicluster() throws Exception { OutputStream os = getFileSystem().create(new Path(getInputDir(), "text2.txt")); Writer wr = new OutputStreamWriter(os); wr.write("hello1\n"); wr.write("hello2\n"); wr.write("hello3\n"); wr.write("hello4\n"); wr.close(); JobConf conf = createJobConf(); conf.setJobName("counters"); conf.setInputFormat(TextInputFormat.class); conf.setMapOutputKeyClass(LongWritable.class); conf.setMapOutputValueClass(Text.class); conf.setOutputFormat(TextOutputFormat.class); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(IdentityMapper.class); conf.setReducerClass(IdentityReducer.class); final FileSystem lfs = FileSystem.getLocal(conf); FileInputFormat.setInputPaths(conf, getInputDir()); FileOutputFormat.setOutputPath(conf, getOutputDir()); Path localInputRoot = getInputDir().makeQualified(lfs); Path dfsInputRoot = getInputDir().makeQualified(getFileSystem()); Path unqualifiedInputRoot = getInputDir(); System.out.println("The qualified input dir is " + dfsInputRoot.toString()); System.out.println("The unqualified input dir is " + unqualifiedInputRoot.toString()); Path dfsUnqualPath = new Path(unqualifiedInputRoot, "text2.txt"); Path dfsQualPath = new Path(dfsInputRoot, "test2.text"); Path localQualPath = new Path(localInputRoot, "test2.text"); System.out.println("The dfs unqualified Path is " + dfsUnqualPath); System.out.println("The dfs qualified Path is " + dfsQualPath); System.out.println("The local qualified path is " + localQualPath); DistributedCache.addCacheArchive(localQualPath.toUri(), conf); DistributedCache.addCacheFile(dfsUnqualPath.toUri(), conf); DistributedCache.addCacheFile(dfsQualPath.toUri(), conf); try { RunningJob runningJob = JobClient.runJob(conf); assertFalse("The job completed, which is wrong since there's no local cached file", true); } catch (InvalidJobConfException e) { System.out.println("We expect to see a stack trace here."); e.printStackTrace(System.out); assertFalse("This error should not occur.", true); } catch (FileNotFoundException e) { System.out.println(" got an expected FileNotFoundException because we didn't provide cached files"); } } }