JdbcInputFormat.java example

Explorer
hive-master
/*
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hive.storage.jdbc;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.io.HiveInputFormat;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.MapWritable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.hive.storage.jdbc.dao.DatabaseAccessor;
import org.apache.hive.storage.jdbc.dao.DatabaseAccessorFactory;

import java.io.IOException;

public class JdbcInputFormat extends HiveInputFormat<LongWritable, MapWritable> {

  private static final Logger LOGGER = LoggerFactory.getLogger(JdbcInputFormat.class);
  private DatabaseAccessor dbAccessor = null;


  /**
   * {@inheritDoc}
   */
  @Override
  public RecordReader<LongWritable, MapWritable>
    getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {

    if (!(split instanceof JdbcInputSplit)) {
      throw new RuntimeException("Incompatible split type " + split.getClass().getName() + ".");
    }

    return new JdbcRecordReader(job, (JdbcInputSplit) split);
  }


  /**
   * {@inheritDoc}
   */
  @Override
  public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    try {

      LOGGER.debug("Creating {} input splits", numSplits);

      if (dbAccessor == null) {
        dbAccessor = DatabaseAccessorFactory.getAccessor(job);
      }

      int numRecords = dbAccessor.getTotalNumberOfRecords(job);

      if (numRecords < numSplits) {
        numSplits = numRecords;
      }

      if (numSplits <= 0) {
        numSplits = 1;
      }

      int numRecordsPerSplit = numRecords / numSplits;
      int numSplitsWithExtraRecords = numRecords % numSplits;

      LOGGER.debug("Num records = {}", numRecords);
      InputSplit[] splits = new InputSplit[numSplits];
      Path[] tablePaths = FileInputFormat.getInputPaths(job);

      int offset = 0;
      for (int i = 0; i < numSplits; i++) {
        int numRecordsInThisSplit = numRecordsPerSplit;
        if (i < numSplitsWithExtraRecords) {
          numRecordsInThisSplit++;
        }

        splits[i] = new JdbcInputSplit(numRecordsInThisSplit, offset, tablePaths[0]);
        offset += numRecordsInThisSplit;
      }

      dbAccessor = null;
      return splits;
    }
    catch (Exception e) {
      LOGGER.error("Error while splitting input data.", e);
      throw new IOException(e);
    }
  }


  /**
   * For testing purposes only
   *
   * @param dbAccessor
   *            DatabaseAccessor object
   */
  public void setDbAccessor(DatabaseAccessor dbAccessor) {
    this.dbAccessor = dbAccessor;
  }

}