TestDuplicateArchiveFileCachedURLMinicluster.java example

Explorer

bigpetstore-master
- hadoop-1.2.1
  - src
- src
  - integration
    - java
      - org
        bigtop
        bigpetstore
        integration
        BigPetStoreHiveIT.java
        BigPetStoreMahoutIT.java
        BigPetStorePigIT.java
        ITUtils.java
  - main
    - java
      - org
        bigtop
        bigpetstore
        clustering
        BPSRecommnder.java
        MahoutClusterTransactionsByRegion.java
        contract
        PetStoreStatistics.java
        etl
        CrunchETL.java
        HiveViewCreator.java
        LineItem.java
        PigCSVCleaner.java
        generator
        BPSGenerator.java
        GeneratePetStoreTransactionsInputFormat.java
        PetStoreTransaction.java
        PetStoreTransactionInputSplit.java
        TransactionIteratorFactory.java
        util
        BigPetStoreConstants.java
        DeveloperTools.java
        NumericalIdUtils.java
        Pair.java
        PetStoreParseFunctions.java
        StringUtils.java
  - test
    - java
      - org
        bigtop
        bigpetstore
        docs
        TestDocs.java
        generator
        TestNumericalIdUtils.java
        TestPetStoreTransactionGeneratorJob.java

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.mapred;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;

import java.net.URI;

import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.lib.IdentityMapper;
import org.apache.hadoop.mapred.lib.IdentityReducer;

import org.apache.hadoop.filecache.DistributedCache;

public class TestDuplicateArchiveFileCachedURLMinicluster extends ClusterMapReduceTestCase {
  
  enum EnumCounter { MAP_RECORDS }
  
  public void testDuplicationsMinicluster() throws Exception {
    OutputStream os = getFileSystem().create(new Path(getInputDir(), "text.txt"));
    Writer wr = new OutputStreamWriter(os);
    wr.write("hello1\n");
    wr.write("hello2\n");
    wr.write("hello3\n");
    wr.write("hello4\n");
    wr.close();

    JobConf conf = createJobConf();
    conf.setJobName("counters");
    
    conf.setInputFormat(TextInputFormat.class);

    conf.setMapOutputKeyClass(LongWritable.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setOutputFormat(TextOutputFormat.class);
    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(IdentityMapper.class);
    conf.setReducerClass(IdentityReducer.class);

    FileInputFormat.setInputPaths(conf, getInputDir());

    FileOutputFormat.setOutputPath(conf, getOutputDir());

    Path inputRoot = getInputDir().makeQualified(getFileSystem());
    Path unqualifiedInputRoot = getInputDir();
    System.out.println("The qualified input dir is " + inputRoot.toString());
    System.out.println("The unqualified input dir is " + unqualifiedInputRoot.toString());

    Path duplicatedPath = new Path(inputRoot, "text.txt");
    URI duplicatedURI = duplicatedPath.toUri();

    Path unqualifiedDuplicatedPath = new Path(unqualifiedInputRoot, "text.txt");
    URI unqualifiedDuplicatedURI = unqualifiedDuplicatedPath.toUri();

    System.out.println("The duplicated Path is " + duplicatedPath);
    System.out.println("The duplicated URI is " + duplicatedURI);
    System.out.println("The unqualified duplicated URI is " + unqualifiedDuplicatedURI);

    DistributedCache.addCacheArchive(duplicatedURI, conf);
    DistributedCache.addCacheFile(unqualifiedDuplicatedURI, conf);

    try {
      RunningJob runningJob = JobClient.runJob(conf);

      assertFalse("The job completed, which is wrong since there's a duplication", true);
    } catch (InvalidJobConfException e) {
      System.out.println("We expect to see a stack trace here.");
      e.printStackTrace(System.out);
    }
  }
  
  public void testApparentDuplicationsMinicluster() throws Exception {
    OutputStream os = getFileSystem().create(new Path(getInputDir(), "text2.txt"));
    Writer wr = new OutputStreamWriter(os);
    wr.write("hello1\n");
    wr.write("hello2\n");
    wr.write("hello3\n");
    wr.write("hello4\n");
    wr.close();

    JobConf conf = createJobConf();
    conf.setJobName("counters");
    
    conf.setInputFormat(TextInputFormat.class);

    conf.setMapOutputKeyClass(LongWritable.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setOutputFormat(TextOutputFormat.class);
    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(IdentityMapper.class);
    conf.setReducerClass(IdentityReducer.class);

    final FileSystem lfs = FileSystem.getLocal(conf);

    FileInputFormat.setInputPaths(conf, getInputDir());

    FileOutputFormat.setOutputPath(conf, getOutputDir());

    Path localInputRoot = getInputDir().makeQualified(lfs);
    Path dfsInputRoot = getInputDir().makeQualified(getFileSystem());
    Path unqualifiedInputRoot = getInputDir();
    System.out.println("The qualified input dir is " + dfsInputRoot.toString());
    System.out.println("The unqualified input dir is " + unqualifiedInputRoot.toString());

    Path dfsUnqualPath = new Path(unqualifiedInputRoot, "text2.txt");
    Path dfsQualPath = new Path(dfsInputRoot, "test2.text");
    Path localQualPath = new Path(localInputRoot, "test2.text");

    System.out.println("The dfs unqualified Path is " + dfsUnqualPath);
    System.out.println("The dfs qualified Path is " + dfsQualPath);
    System.out.println("The local qualified path is " + localQualPath);

    DistributedCache.addCacheArchive(localQualPath.toUri(), conf);
    DistributedCache.addCacheFile(dfsUnqualPath.toUri(), conf);
    DistributedCache.addCacheFile(dfsQualPath.toUri(), conf);

    try {
      RunningJob runningJob = JobClient.runJob(conf);

      assertFalse("The job completed, which is wrong since there's no local cached file", true);
    } catch (InvalidJobConfException e) {
      System.out.println("We expect to see a stack trace here.");
      e.printStackTrace(System.out);
      assertFalse("This error should not occur.", true);
    } catch (FileNotFoundException e) {
      System.out.println(" got an expected FileNotFoundException because we didn't provide cached files");
    }
  }
}