FileStoreHelper.java example

Explorer

kiji-mapreduce-master
- cdh4mr1-bridge
  - src
    - main
      - java
        org
        kiji
        mapreduce
        platform
        CDH4MR1KijiMRBridge.java
        CDH4MR1KijiMRBridgeFactory.java
        package-info.java
- hadoop2-hbase96-bridge
  - src
    - main
      - java
        org
        kiji
        mapreduce
        platform
        Hadoop2HBase96xKijiMRBridge.java
        Hadoop2HBase96xKijiMRBridgeFactory.java
        package-info.java
- kiji-mapreduce
  - src
    - main
      - java
        org
        apache
        hadoop
        mapreduce
        lib
        input
        CombineFileInputFormat.java
        CombineFileRecordReader.java
        CombineFileSplit.java
        map
        KijiMultithreadedMapper.java
        output
        MapFileOutputFormat.java
        kiji
        mapreduce
        DistributedCacheJars.java
        HFileLoader.java
        JobConfigurationException.java
        KVOutputJob.java
        KijiContext.java
        KijiMapReduceJob.java
        KijiMapReduceJobBuilder.java
        KijiMapper.java
        KijiReducer.java
        KijiTableContext.java
        KijiTableReducer.java
        MapReduceJobInput.java
        MapReduceJobOutput.java
        avro
        AvroKeyReader.java
        AvroKeyWriter.java
        AvroMapReader.java
        AvroValueReader.java
        AvroValueWriter.java
        package-info.java
        bulkimport
        AvroBulkImporter.java
        KijiBulkImportJobBuilder.java
        KijiBulkImporter.java
        impl
        BulkImportMapper.java
        KijiBulkImporters.java
        package-info.java
        package-info.java
        framework
        HBaseKijiTableInputFormat.java
        HBaseKijiTableInputFormatFactory.java
        HFileKeyValue.java
        JobHistoryCounters.java
        JobHistoryKijiTable.java
        KijiConfKeys.java
        KijiTableInputFormat.java
        KijiTableInputFormatFactory.java
        KijiTableInputJobBuilder.java
        MapReduceJobBuilder.java
        package-info.java
        gather
        GathererContext.java
        KijiGatherJobBuilder.java
        KijiGatherer.java
        impl
        GatherMapper.java
        InternalGathererContext.java
        KijiGatherers.java
        package-info.java
        package-info.java
        impl
        DirectKijiTableWriterContext.java
        HFileWriterContext.java
        HTableInputFormat.java
        HTableReader.java
        InternalKijiContext.java
        KijiMappers.java
        KijiReducers.java
        KijiTableContextFactory.java
        KijiTableMapper.java
        KijiTableSplit.java
        package-info.java
        input
        AvroKeyMapReduceJobInput.java
        AvroKeyValueMapReduceJobInput.java
        FileMapReduceJobInput.java
        HTableMapReduceJobInput.java
        KijiTableMapReduceJobInput.java
        MapReduceJobInputs.java
        SequenceFileMapReduceJobInput.java
        TextMapReduceJobInput.java
        WholeTextFileMapReduceJobInput.java
        XMLMapReduceJobInput.java
        impl
        WholeFileInputFormat.java
        WholeFileRecordReader.java
        XMLInputFormat.java
        package-info.java
        package-info.java
        kvstore
        KeyValueStore.java
        KeyValueStoreClient.java
        KeyValueStoreReader.java
        KeyValueStoreReaderFactory.java
        RequiredStores.java
        framework
        KeyValueStoreConfiguration.java
        package-info.java
        impl
        KeyValueStoreConfigSerializer.java
        KeyValueStoreConfigValidator.java
        XmlKeyValueStoreParser.java
        package-info.java
        lib
        AvroKVRecordKeyValueStore.java
        AvroRecordKeyValueStore.java
        EmptyKeyValueStore.java
        FileStoreHelper.java
        InMemoryMapKeyValueStore.java
        KijiTableKeyValueStore.java
        SeqFileKeyValueStore.java
        TextFileKeyValueStore.java
        UnconfiguredKeyValueStore.java
        package-info.java
        package-info.java
        output
        AvroKeyMapReduceJobOutput.java
        AvroKeyValueMapReduceJobOutput.java
        DirectKijiTableMapReduceJobOutput.java
        FileMapReduceJobOutput.java
        HFileMapReduceJobOutput.java
        KijiTableMapReduceJobOutput.java
        MapFileMapReduceJobOutput.java
        MapReduceJobOutputs.java
        SequenceFileMapReduceJobOutput.java
        TextMapReduceJobOutput.java
        framework
        HFileReducerMapReduceJobOutput.java
        KijiHFileOutputFormat.java
        package-info.java
        package-info.java
        package-info.java
        pivot
        KijiCellRewriter.java
        KijiPivotJobBuilder.java
        KijiPivoter.java
        impl
        KijiPivoters.java
        PivoterMapper.java
        package-info.java
        package-info.java
        produce
        KijiProduceJobBuilder.java
        KijiProducer.java
        KijiProducerOutputException.java
        ProducerContext.java
        impl
        InternalProducerContext.java
        KijiProducers.java
        ProduceMapper.java
        package-info.java
        package-info.java
        reducer
        IdentityReducer.java
        package-info.java
        tools
        KijiBulkImport.java
        KijiBulkLoad.java
        KijiGather.java
        KijiJobHistory.java
        KijiLaunchMapReduce.java
        KijiPivot.java
        KijiProduce.java
        framework
        JobIOConfKeys.java
        JobIOSpecParseException.java
        JobInputSpec.java
        JobOutputSpec.java
        JobTool.java
        KijiJobTool.java
        MapReduceJobInputFactory.java
        MapReduceJobOutputFactory.java
        package-info.java
        package-info.java
        util
        AvroMapReduce.java
        Jars.java
        Lists.java
        LruCache.java
        package-info.java
    - test
      - java
        org
        kiji
        mapreduce
        IntegrationTestJobHistoryKijiTable.java
        IntegrationTestKijiBulkLoad.java
        IntegrationTestKijiTableInputFormat.java
        IntegrationTestTableMapReducer.java
        KijiMRTestLayouts.java
        TestAvroKeyValueWriter.java
        TestAvroMapReader.java
        TestBulkImporter.java
        TestDistributedCacheJars.java
        TestGatherMapFamily.java
        TestGatherer.java
        TestGathererReducer.java
        TestKijiBulkImportJobBuilder.java
        TestKijiGatherJobBuilder.java
        TestKijiMapReduceJobBuilder.java
        TestKijiProduceJobBuilder.java
        TestLaunchMapReduce.java
        TestPivoter.java
        TestProducer.java
        TestingResources.java
        framework
        TestColumnReaderSpecOverrides.java
        TestKijiTableInputFormat.java
        input
        TestKijiTableMapReduceJobInput.java
        impl
        TestXMLInputFormat.java
        kvstore
        TestKeyValueStoreConfiguration.java
        TestKeyValueStoreReaderFactory.java
        impl
        TestXmlKeyValueStoreParser.java
        lib
        TestAvroAllKVRecordKeyValueStore.java
        TestAvroKVRecordKeyValueStore.java
        TestAvroRecordKeyValueStore.java
        TestInMemoryMapKeyValueStore.java
        TestKijiTableKeyValueStore.java
        TestSeqFileKeyValueStore.java
        TestTextFileKeyValueStore.java
        output
        TestFileMapReduceJobOutput.java
        TestKijiHFileOutputFormat.java
        TestKijiTableMapReduceJobOutput.java
        pivot
        TestKijiCellRewriter.java
        testlib
        HFileReduceJob.java
        IntegrationTestSimpleBulkImporter.java
        IntegrationTestTableMapper.java
        SimpleBulkImporter.java
        SimpleIntSumReducer.java
        SimpleTableMapReducer.java
        SimpleTableMapperAsBulkImporter.java
        SimpleTableMapperAsGatherer.java
        tools
        TestJobInputSpec.java
        TestJobOutputSpec.java
        util
        TestJars.java
        TestKijiProducers.java
        TestLists.java
        TestLruCache.java
        schema
        filter
        TestRegexQualifierColumnFilter.java
- kiji-mapreduce-archetype
  - src
    - main
      - resources
        archetype-resources
        src
        main
        java
        bulkimport
        ExampleBulkImporter.java
        package-info.java
        gather
        ExampleGatherer.java
        package-info.java
        produce
        ExampleProducer.java
        package-info.java
        reduce
        ExampleIdentityReducer.java
        package-info.java
- kiji-mapreduce-cassandra
  - src
    - main
      - java
        org
        kiji
        mapreduce
        framework
        CassandraInputSplit.java
        CassandraKijiTableInputFormat.java
        CassandraKijiTableInputFormatFactory.java
        CassandraSubSplit.java
        CassandraSubSplitCombiner.java
        CassandraSubSplitCreator.java
        CassandraTokenRange.java
        ConsistentHostOrderPolicy.java
        package-info.java
    - test
      - java
        org
        kiji
        mapreduce
        framework
        TestSubSplits.java
- platform-api
  - src
    - main
      - java
        org
        kiji
        mapreduce
        platform
        KijiMRPlatformBridge.java
        KijiMRPlatformBridgeFactory.java
        package-info.java
- profiling
  - src
    - main
      - java
        org
        kiji
        mapreduce
        util
        MRLogTimerAspect.java
        SerializeLoggerAspect.java

/**
 * (c) Copyright 2012 WibiData, Inc.
 *
 * See the NOTICE file distributed with this work for additional
 * information regarding copyright ownership.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.kiji.mapreduce.kvstore.lib;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Formatter;
import java.util.List;
import java.util.Locale;

import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.kiji.annotations.ApiAudience;
import org.kiji.annotations.ApiStability;
import org.kiji.mapreduce.kvstore.framework.KeyValueStoreConfiguration;
import org.kiji.mapreduce.util.Lists;

/**
 * Helper class that manages filenames and distributed cache functionality
 * for KeyValueStore implementations that work with files or collections
 * of files.
 *
 * <p>Your KeyValueStore can use a FileStoreHelper to manage all aspects of
 * configuration of and deserialization regarding file names in a MapReduce
 * job.</p>
 *
 * <p>Create a FileStoreHelper.Builder object using the builder() method;
 * your own KeyValueStore's Builder should use composition to delegate
 * responsibility for Configuration, file, and distributed cache control
 * to this one. You should use this object's storeToConf() and initFromConf()
 * methods within your own KeyValueStore's storeToConf() and initFromConf()
 * methods.</p>
 *
 * <p>When reading files, use getExpandedInputPaths() to get a complete list
 * of files to read. If the user has enabled the distributed cache, you will
 * receive a set of local files to read. Otherwise, the initial HDFS paths
 * will be used.</p>
 */
@ApiAudience.Public
@ApiStability.Evolving
public final class FileStoreHelper implements Configurable {

  private static final Logger LOG = LoggerFactory.getLogger(
      FileStoreHelper.class.getName());

  /**
   * Configuration key for the KeyValueStore definition that sets whether input files are
   * stored in the DistributedCache. If empty, then DCache is disabled. If non-empty,
   * then DCache target file names are expected to be prefixed by the string in this
   * configuration key.
   */
  public static final String CONF_DCACHE_PREFIX_KEY = "dcache.prefix";

  /**
   * Boolean flag used in XML Configuration files only to state that the files
   * specified are
   * HDFS files, but should be loaded into the DistributedCache as part
   * of the job. This flag is not recorded as part of addToConfiguration().
   */
  public static final String CONF_USE_DCACHE_KEY = "dcache";
  // This flag sets mUseDCache.

  /**
   * By default, it is assumed that the user wants to load this KeyValueStore
   * through the DistributedCache.
   */
  public static final boolean USE_DCACHE_DEFAULT = true;

  /**
   * Suffix for the KeyValueStore definition that is set to the list of
   * input paths. This may be multiple comma-delimited paths.
   */
  public static final String CONF_PATHS_KEY = "paths";

  /** The Hadoop configuration. */
  private Configuration mConf;

  /** True if we should distribute the input files via the DistributedCache. */
  private boolean mUseDCache;

  /** Files stored in the distributed cache have this as their prefix. */
  private String mDCachePrefix;

  /** List of input paths to files to include in the store. */
  private List<Path> mInputPaths;

  /**
   * A class that builds configured FileStoreHelper instances.
   *
   * <p>This object is not exposed to users directly. It is used via composition
   * in other (FooFile)KeyValueStore.Builder instances. If you add a method here,
   * you should reflect this via composition in the other file-backed store builder
   * APIs.</p>
   */
  @ApiAudience.Public
  @ApiStability.Evolving
  public static final class Builder {
    private Configuration mConf;
    private List<Path> mInputPaths;
    private boolean mUseDCache;

    /** private constructor. */
    private Builder() {
      mInputPaths = new ArrayList<Path>();
      mUseDCache = USE_DCACHE_DEFAULT;
      mConf = new Configuration();
    }

    /**
     * Sets the Hadoop configuration instance to use.
     *
     * @param conf The configuration.
     * @return This builder instance.
     */
    public Builder withConfiguration(Configuration conf) {
      mConf = conf;
      return this;
    }

    /**
     * Adds a path to the list of files to load.
     *
     * @param path The input file/directory path.
     * @return This builder instance.
     */
    public Builder withInputPath(Path path) {
      mInputPaths.add(path);
      return this;
    }

    /**
     * Replaces the current list of files to load with the set of files
     * specified as an argument.
     *
     * @param paths The input file/directory paths.
     * @return This builder instance.
     */
    public Builder withInputPaths(List<Path> paths) {
      mInputPaths.clear();
      mInputPaths.addAll(paths);
      return this;
    }

    /**
     * Sets a flag indicating the use of the DistributedCache to distribute
     * input files.
     *
     * @param enabled true if the DistributedCache should be used, false otherwise.
     * @return This builder instance.
     */
    public Builder withDistributedCache(boolean enabled) {
      mUseDCache = enabled;
      return this;
    }

    /**
     * Builds and returns a new FileStoreHelper instance.
     *
     * @return a new, configured FileStoreHelper.
     */
    public FileStoreHelper build() {
      return new FileStoreHelper(this);
    }
  }

  /** @return a new FileStoreHelper.Builder instance. */
  public static Builder builder() {
    return new Builder();
  }

  /**
   * Constructor invoked by Builder.build().
   *
   * @param builder the builder to configure from.
   */
  private FileStoreHelper(Builder builder) {
    mConf = builder.mConf;
    mInputPaths = builder.mInputPaths;
    mUseDCache = builder.mUseDCache;
    mDCachePrefix = "";
  }

  /** {@inheritDoc} */
  @Override
  public void setConf(Configuration conf) {
    mConf = conf;
  }

  /** {@inheritDoc} */
  @Override
  public Configuration getConf() {
    return mConf;
  }

  /**
   * An aggregator for use with Lists.foldLeft() that expands a list of paths that
   * may include directories to include only files; directories are expanded to multiple
   * file entries that are the files in this directory.
   */
  @ApiAudience.Private
  private final class DirExpandAggregator extends Lists.Aggregator<Path, List<Path>> {
    // Note: This class could be factored out to assist other places in Kiji where we need
    // to expand a list of files, dirs and globs into just a list of files, if necessary.

    /** Last exception encountered during file stat lookups for the input paths. */
    private IOException mLastExn;

    /**
     * For each input path, modify the 'outputs' list to include the path
     * itself (if it is a file), or all the files in the directory (if it
     * is a directory). Also expands globs with FileSystem.globStatus().
     *
     * @param inputPath the input path to expand.
     * @param outputs list of output paths being accumulated.
     * @return the 'outputs' list.
     */
    @Override
    public List<Path> eval(Path inputPath, List<Path> outputs) {
      try {
        FileSystem fs = inputPath.getFileSystem(getConf());
        FileStatus[] matches = fs.globStatus(inputPath);
        if (null == matches) {
          mLastExn = new IOException("No such input path: " + inputPath);
        } else if (matches.length == 0) {
          mLastExn = new IOException("Input pattern \"" + inputPath + "\" matches 0 files.");
        } else {
          for (FileStatus match : matches) {
            if (match.isDir()) {
              // Match all the files in this dir, except the "bonus" files generated by
              // MapReduce.
              for (FileStatus subFile : fs.listStatus(match.getPath(),
                  new org.apache.hadoop.mapred.Utils.OutputFileUtils.OutputFilesFilter())) {
                outputs.add(subFile.getPath());
                LOG.debug("Added file: " + subFile.getPath());
              }
            } else {
              // Just a file; add directly.
              outputs.add(match.getPath());
              LOG.debug("Added file: " + match.getPath());
            }
          }
        }
      } catch (IOException ioe) {
        mLastExn = ioe;
      }
      return outputs;
    }

    /**
     * Returns the last exception encountered during FS operation while expanding
     * directories.
     *
     * @return the last exception encountered, or null if none was encountered.
     */
    private IOException getLastException() {
      return mLastExn;
    }
  }

  /**
   * Returns the set of raw input path(s) that were specified for read. This may
   * include wildcards or directories. You should use getExpandedInputPaths()
   * to determine the set of files to actually read.
   *
   * @return a copy of the set of raw input paths specified for read.
   */
  public List<Path> getInputPaths() {
    return Collections.unmodifiableList(new ArrayList<Path>(mInputPaths));
  }

  /**
   * Returns the set of input path(s) that should be actually opened for read.
   * This set of paths may be on local disk (e.g., if the DistributedCache was used
   * to transmit the files), or in HDFS. This will not contain directory names nor
   * globs; it is expanded to the literal set of files to open.
   *
   * <p>Each Path object returned is fully qualified, and represents an absolute
   * path that should be opened by its associated FileSystem object.</p>
   *
   * @return an unmodifiable list of input paths, backed by the underlying collection
   *     within this KeyValueStore.
   * @throws IOException if there is an error communicating with the underlying
   *     FileSystem while expanding paths and globs.
   */
  public List<Path> getExpandedInputPaths() throws IOException {
    // If we've read a bunch of files from the DistributedCache's local dir,
    // no further unglobbing is necessary. Just return the values.
    if (!mDCachePrefix.isEmpty()) {
      return Collections.unmodifiableList(mInputPaths);
    }

    // Otherwise, these are "raw" user inputs. Unglob and expand them.
    DirExpandAggregator expander = new DirExpandAggregator();

    List<Path> actualInputPaths = Lists.distinct(Lists.foldLeft(
        new ArrayList<Path>(), mInputPaths, expander));

    IOException savedException = expander.getLastException();
    if (null != savedException) {
      // Rethrow the saved exception from this context.
      throw savedException;
    }

    return Collections.unmodifiableList(actualInputPaths);
  }

  /**
   * If the cache URI prefix is already set, return this value. Otherwise create
   * a new unique cache URI prefix. This does not memoize its return value;
   * if mDCachePrefix is empty/null, multiple calls to this method will return
   * unique values.
   *
   * @return the DistributedCache URI prefix for files used by this store.
   */
  private String getCachePrefix() {
    if (mDCachePrefix.isEmpty()) {
      // We need to put the files for this KVStore into the distributed cache. They
      // should be given symlink names that do not conflict with the names associated
      // with other KeyValueStores. Pick a symlink prefix that is unique to this store.
      long prefixId = System.currentTimeMillis() ^ (((long) this.hashCode()) << 8);
      StringBuilder sb = new StringBuilder();
      Formatter formatter = new Formatter(sb, Locale.US);
      formatter.format("%08x", prefixId);
      String newPrefix = sb.toString();
      LOG.debug("This KeyValueStore uses Distributed cache files in namespace: " + newPrefix);
      return newPrefix;
    } else {
      return mDCachePrefix; // Prefix is already set.
    }
  }

  /**
   * Serializes file- and DistributedCache-specific properties associated
   * with the KeyValueStore that owns this FileStoreHelper to the specified configuration.
   *
   * @param conf the configuration to populate.
   * @throws IOException if there's an error serializing the state.
   */
  public void storeToConf(KeyValueStoreConfiguration conf) throws IOException {
    if (mInputPaths.isEmpty()) {
      throw new IOException("Required attribute not set: input path");
    }

    if (mUseDCache && !"local".equals(conf.get("mapreduce.jobtracker.address", ""))) {
      // If we're scheduled to use the distributed cache, and we're not in the LocalJobRunner,
      // add these files to the DistributedCache.

      // TODO(aaron): This does not handle any sort of MapperTester, etc.
      // We need a separate flag that tells this to ignore mUseDCache if we're in a test
      // environment, and just use the original input file specs.
      final String dCachePrefix = getCachePrefix();

      // Associate this randomly chosen prefix id with this KVStore implementation.
      conf.set(CONF_DCACHE_PREFIX_KEY, dCachePrefix);

      // Add the input paths to the DistributedCache and translate path names.
      int uniqueId = 0;
      // TODO: getExpandedInputPaths() should use the Configuration from conf, not our getConf().
      for (Path inputPath : getExpandedInputPaths()) {
        FileSystem fs = inputPath.getFileSystem(conf.getDelegate());
        Path absolutePath = inputPath.makeQualified(fs);
        String uriStr = absolutePath.toString() + "#" + dCachePrefix + "-" + uniqueId;
        LOG.debug("Adding to DistributedCache: " + uriStr);
        uniqueId++;
        try {
          DistributedCache.addCacheFile(new URI(uriStr), conf.getDelegate());
        } catch (URISyntaxException use) {
          throw new IOException("Could not construct URI for file: " + uriStr, use);
        }
      }

      // Ensure that symlinks are created for cached files.
      DistributedCache.createSymlink(conf.getDelegate());

      // Now save the cache prefix into the local state.  We couldn't set this earlier,
      // because we wanted getExpandedInputPaths() to actually unglob things. That
      // function will behave differently if mDCachePrefix is already initialized.
      mDCachePrefix = dCachePrefix;
    } else {
      // Just put the regular HDFS paths in the Configuration.
      conf.setStrings(CONF_PATHS_KEY,
          Lists.toArray(Lists.map(mInputPaths, new Lists.ToStringFn<Path>()), String.class));
    }
  }

  /**
   * Deserializes file- and DistributedCache-specific properties associated
   * with the KeyValueStore that owns this FileStoreHelper from the specified configuration.
   *
   * <p>This retains a reference to the KeyValueStoreConfiguration's backing Configuration
   * instance to use when opening files specified by this configuration.</p>
   *
   * @param conf the configuration to read.
   * @throws IOException if there's an error deserializing the configuration.
   */
  public void initFromConf(KeyValueStoreConfiguration conf) throws IOException {
    setConf(conf.getDelegate());

    mDCachePrefix = conf.get(CONF_DCACHE_PREFIX_KEY, "");
    LOG.debug("Input dCachePrefix: " + mDCachePrefix);
    if (mDCachePrefix.isEmpty()) {
      // Read an ordinary list of files from the Configuration.
      // These may include directories and globs to expand.
      mInputPaths = Lists.map(Arrays.asList(conf.getStrings(
          CONF_PATHS_KEY, new String[0])),
          new Lists.Func<String, Path>() {
            @Override
            public Path eval(String in) {
              LOG.debug("File input: " + in);
              return new Path(in);
            }
          });
    } else {
      // Use the dcache prefix to get the names of the files for this store.
      // The symlinks are already present in the working dir of the task.
      final FileSystem localFs = FileSystem.getLocal(conf.getDelegate());
      FileStatus[] statuses = localFs.globStatus(new Path(mDCachePrefix + "-*"));
      if (null == statuses || statuses.length == 0) {
        throw new IOException("No files associated with the job in the DistributedCache");
      }

      // Get the (absolute) input file paths to use.
      mInputPaths = Lists.map(Arrays.asList(statuses), new Lists.Func<FileStatus, Path>() {
        @Override
        public Path eval(FileStatus status) {
          Path out = status.getPath().makeQualified(localFs);
          LOG.debug("Loaded from DistributedCache: " + out);
          return out;
        }
      });
    }

    // If we are initializing a client-side instance to later serialize, the user may have
    // specified HDFS files, but also an intent to put the files in the DistributedCache. Set
    // this flag now, which will generate mDCachePrefix when addToConfiguration() is called
    // later.
    mUseDCache = conf.getBoolean(CONF_USE_DCACHE_KEY, USE_DCACHE_DEFAULT);
  }
}