/*
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hive.storage.jdbc;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.io.HiveInputFormat;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.MapWritable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hive.storage.jdbc.dao.DatabaseAccessor;
import org.apache.hive.storage.jdbc.dao.DatabaseAccessorFactory;
import java.io.IOException;
public class JdbcInputFormat extends HiveInputFormat<LongWritable, MapWritable> {
private static final Logger LOGGER = LoggerFactory.getLogger(JdbcInputFormat.class);
private DatabaseAccessor dbAccessor = null;
/**
* {@inheritDoc}
*/
@Override
public RecordReader<LongWritable, MapWritable>
getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
if (!(split instanceof JdbcInputSplit)) {
throw new RuntimeException("Incompatible split type " + split.getClass().getName() + ".");
}
return new JdbcRecordReader(job, (JdbcInputSplit) split);
}
/**
* {@inheritDoc}
*/
@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
try {
LOGGER.debug("Creating {} input splits", numSplits);
if (dbAccessor == null) {
dbAccessor = DatabaseAccessorFactory.getAccessor(job);
}
int numRecords = dbAccessor.getTotalNumberOfRecords(job);
if (numRecords < numSplits) {
numSplits = numRecords;
}
if (numSplits <= 0) {
numSplits = 1;
}
int numRecordsPerSplit = numRecords / numSplits;
int numSplitsWithExtraRecords = numRecords % numSplits;
LOGGER.debug("Num records = {}", numRecords);
InputSplit[] splits = new InputSplit[numSplits];
Path[] tablePaths = FileInputFormat.getInputPaths(job);
int offset = 0;
for (int i = 0; i < numSplits; i++) {
int numRecordsInThisSplit = numRecordsPerSplit;
if (i < numSplitsWithExtraRecords) {
numRecordsInThisSplit++;
}
splits[i] = new JdbcInputSplit(numRecordsInThisSplit, offset, tablePaths[0]);
offset += numRecordsInThisSplit;
}
dbAccessor = null;
return splits;
}
catch (Exception e) {
LOGGER.error("Error while splitting input data.", e);
throw new IOException(e);
}
}
/**
* For testing purposes only
*
* @param dbAccessor
* DatabaseAccessor object
*/
public void setDbAccessor(DatabaseAccessor dbAccessor) {
this.dbAccessor = dbAccessor;
}
}