HowlInputStorageDriver.java example

Explorer
howl-master
- howl-howl
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.howl.mapreduce;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Properties;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.util.StringUtils;
import org.apache.howl.data.HowlRecord;
import org.apache.howl.data.schema.HowlSchema;

/** The abstract class to be implemented by underlying storage drivers to enable data access from Howl through
 *  HowlInputFormat.
 */
public abstract class HowlInputStorageDriver {

  public void initialize(JobContext context, Properties storageDriverArgs) throws IOException {
    // trivial do nothing
  }

  /**
   * Returns the InputFormat to use with this Storage Driver.
   * @param properties the properties containing parameters required for initialization of InputFormat
   * @return the InputFormat instance
   */
  public abstract InputFormat<? extends WritableComparable, ? extends Writable> getInputFormat(Properties howlProperties);


  /**
   * Converts to HowlRecord format usable by HowlInputFormat to convert to required valuetype.
   * Implementers of StorageDriver should look to overwriting this function so as to convert their
   * value type to HowlRecord. Default implementation is provided for StorageDriver implementations
   * on top of an underlying InputFormat that already uses HowlRecord as a tuple
   * @param value the underlying value to convert to HowlRecord
   */
  public abstract HowlRecord convertToHowlRecord(WritableComparable baseKey, Writable baseValue) throws IOException;

  /**
   * Set the data location for the input.
   * @param jobContext the job context object
   * @param location the data location
   * @throws IOException Signals that an I/O exception has occurred.
   *
   * Default implementation for FileInputFormat based Input Formats. Override
   * this for other input formats.
   */
  public void setInputPath(JobContext jobContext, String location) throws IOException{

    // ideally we should just call FileInputFormat.setInputPaths() here - but
    // that won't work since FileInputFormat.setInputPaths() needs
    // a Job object instead of a JobContext which we are handed here

    int length = location.length();
    int curlyOpen = 0;
    int pathStart = 0;
    boolean globPattern = false;
    List<String> pathStrings = new ArrayList<String>();

    for (int i=0; i<length; i++) {
      char ch = location.charAt(i);
      switch(ch) {
      case '{' : {
        curlyOpen++;
        if (!globPattern) {
          globPattern = true;
        }
        break;
      }
      case '}' : {
        curlyOpen--;
        if (curlyOpen == 0 && globPattern) {
          globPattern = false;
        }
        break;
      }
      case ',' : {
        if (!globPattern) {
          pathStrings.add(location.substring(pathStart, i));
          pathStart = i + 1 ;
        }
        break;
      }
      }
    }
    pathStrings.add(location.substring(pathStart, length));

    Path[] paths = StringUtils.stringToPath(pathStrings.toArray(new String[0]));

    Configuration conf = jobContext.getConfiguration();

    FileSystem fs = FileSystem.get(conf);
    Path path = paths[0].makeQualified(fs);
    StringBuilder str = new StringBuilder(StringUtils.escapeString(path.toString()));
    for(int i = 1; i < paths.length;i++) {
      str.append(StringUtils.COMMA_STR);
      path = paths[i].makeQualified(fs);
      str.append(StringUtils.escapeString(path.toString()));
    }

    conf.set("mapred.input.dir", str.toString());
  }

  /**
   * Set the schema of the data as originally published in Howl. The storage driver might validate that this matches with
   * the schema it has (like Zebra) or it will use this to create a HowlRecord matching the output schema.
   * @param jobContext the job context object
   * @param howlSchema the schema published in Howl for this data
   * @param instantiationState
   * @throws IOException Signals that an I/O exception has occurred.
   */
  public abstract void setOriginalSchema(JobContext jobContext, HowlSchema howlSchema) throws IOException;

  /**
   * Set the consolidated schema for the HowlRecord data returned by the storage driver. All tuples returned by the RecordReader should
   * have this schema. Nulls should be inserted for columns not present in the data.
   * @param jobContext the job context object
   * @param howlSchema the schema to use as the consolidated schema
   * @throws IOException Signals that an I/O exception has occurred.
   */
  public abstract void setOutputSchema(JobContext jobContext, HowlSchema howlSchema) throws IOException;

  /**
   * Sets the partition key values for the current partition. The storage driver is passed this so that the storage
   * driver can add the partition key values to the output HowlRecord if the partition key values are not present on disk.
   * @param jobContext the job context object
   * @param partitionValues the partition values having a map with partition key name as key and the HowlKeyValue as value
   * @param instantiationState
   * @throws IOException Signals that an I/O exception has occurred.
   */
  public abstract void setPartitionValues(JobContext jobContext, Map<String,String> partitionValues) throws IOException;

}