/**
* (c) Copyright 2012 WibiData, Inc.
*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kiji.mapreduce.output.framework;
import static org.apache.hadoop.hbase.util.Bytes.toBytes;
import java.io.IOException;
import java.util.Arrays;
import java.util.Locale;
import java.util.Map;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.regionserver.StoreFile;
import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.kiji.annotations.ApiAudience;
import org.kiji.annotations.ApiStability;
import org.kiji.mapreduce.framework.HFileKeyValue;
import org.kiji.mapreduce.framework.KijiConfKeys;
import org.kiji.schema.Kiji;
import org.kiji.schema.KijiTable;
import org.kiji.schema.KijiURI;
import org.kiji.schema.layout.KijiTableLayout;
import org.kiji.schema.layout.KijiTableLayout.LocalityGroupLayout;
import org.kiji.schema.layout.impl.ColumnId;
import org.kiji.schema.platform.SchemaPlatformBridge;
import org.kiji.schema.util.ResourceUtils;
/**
* Hadoop output format that writes HFiles that can be loaded directly into HBase region servers.
*
* <p> Allows writing entries to any HBase family of the target table.
* Each reduce task generates a set of files per HBase family.
*
* <p> HFile entries must be properly ordered. This is achieved through the shuffle/sort phase
* of the M/R job, combined with an identity reducer.
*
* <p> Entries should be partitioned into chunks that fit within an existing region of the target
* HTable.
*
* <p> The generated HFiles can be loaded into the target HTable with the
* {@link org.kiji.mapreduce.HFileLoader}.
*/
@ApiAudience.Framework
@ApiStability.Stable
public final class KijiHFileOutputFormat
extends FileOutputFormat<HFileKeyValue, NullWritable> {
private static final Logger LOG = LoggerFactory.getLogger(KijiHFileOutputFormat.class);
public static final String OUTPUT_EXTENSION = ".hfile";
public static final String CONF_HREGION_MAX_FILESIZE = "hbase.hregion.max.filesize";
public static final long DEFAULT_HREGION_MAX_FILESIZE = 256L * 1024L * 1024L;
public static final String CONF_HFILE_BLOCKSIZE = "hbase.mapreduce.hfileoutputformat.blocksize";
public static final int DEFAULT_HFILE_BLOCKSIZE = 64 * 1024;
public static final String CONF_LATEST_TIMESTAMP = "kiji.hfile.latest.timestamp";
/** {@inheritDoc} */
@Override
public RecordWriter<HFileKeyValue, NullWritable> getRecordWriter(TaskAttemptContext context)
throws IOException {
return new TableRecordWriter(this, context);
}
/**
* Record writer processing all puts targeted at an entire table.
*
* Dispatches records to the appropriate family record writer.
*/
private static class TableRecordWriter
extends RecordWriter<HFileKeyValue, NullWritable> {
// ---------------------------------------------------------------------------------------------
/**
* Writes KeyValue records to HFiles.
*
* <p>The MapReduce framework will call this writer once for each record that has been
* emitted from a reducer. It is assumed that these records will be written in sorted
* order based on the key (row, family, qualifier, timestamp) of the KeyValue.</p>
*
* <p>We will attempt to write these sorted KeyValue objects sequentially into HFiles no
* larger than <code>maxFileSizeBytes</code>. However, we will not split a row across
* multiple HFiles, so exceeding <code>maxFileSizeBytes</code> might be required.</p>
*
* <p>Any KeyValue objects that did not have a timestamp specified by the client will be
* assigned a write time of <code>currentTimestamp</code>.</p>
*/
private class LocalityGroupRecordWriter
extends RecordWriter<HFileKeyValue, NullWritable> {
/** Layout of the locality group this writer writes to. */
private final LocalityGroupLayout mLGLayout;
/** HBase family name. */
private final String mFamily;
/** Directory path where to write HFiles. */
private final Path mFamilyDir;
/** Maximum HFile size, in bytes (best-effort). */
private final long mMaxFileSizeBytes;
/** HFile block size, in bytes. */
private final int mBlockSizeBytes;
/** The compression algorithm to use for the HFiles. */
private final String mCompressionType;
/** The HFile writer we currently have open to write KeyValues to. */
private HFile.Writer mWriter;
/** The current size of the HFile <code>mCurrentWriter</code> is writing to. */
private long mCurrentHFileSize = 0;
/** A timerange tracker to compute time ranges. */
private TimeRangeTracker mTimeRangeTracker = null;
/** Key of the last written row. */
private byte[] mCurrentRow = null;
/** Counter for HFile file names. */
private int mHFileCounter = 0;
/**
* Constructor.
*
* @param context Task attempt context.
* @param lgLayout Layout of the locality group.
* @throws IOException on I/O error.
*/
public LocalityGroupRecordWriter(TaskAttemptContext context, LocalityGroupLayout lgLayout)
throws IOException {
mLGLayout = Preconditions.checkNotNull(lgLayout);
mFamily = lgLayout.getId().toString();
// These parameters might be specific to each locality group:
mMaxFileSizeBytes = mConf.getLong(CONF_HREGION_MAX_FILESIZE, DEFAULT_HREGION_MAX_FILESIZE);
mBlockSizeBytes = mConf.getInt(CONF_HFILE_BLOCKSIZE, DEFAULT_HFILE_BLOCKSIZE);
mFamilyDir = new Path(mOutputDir, mFamily);
if (!mFileSystem.exists(mFamilyDir)) {
if (!mFileSystem.mkdirs(mFamilyDir)) {
throw new IOException(String.format(
"Unable to create output directory: %s", mFamilyDir));
}
}
mCompressionType =
mLGLayout.getDesc().getCompressionType().toString().toLowerCase(Locale.ROOT);
mWriter = openNewWriter();
}
/** {@inheritDoc} */
@Override
public void write(HFileKeyValue entry, NullWritable unused)
throws IOException {
final KeyValue kv = entry.getKeyValue();
kv.updateLatestStamp(mLatestTimestampBytes);
final long recordLength = kv.getLength();
if (mCurrentHFileSize + recordLength >= mMaxFileSizeBytes) {
// We can't fit this record in the current HFile without exceeding the max file size.
if (Arrays.equals(mCurrentRow, kv.getRow())) {
// But we're still adding data for a single row, so we can't close this HFile yet.
LOG.debug("Reached max HFile size, but waiting to finish this row before closing.");
} else {
// Close it and open a new one.
closeWriter(mWriter);
mWriter = openNewWriter();
}
}
mWriter.append(kv);
mTimeRangeTracker.includeTimestamp(kv);
mCurrentHFileSize += recordLength;
// Remember the row so we know when we are transitioning.
mCurrentRow = kv.getRow();
}
/** {@inheritDoc} */
@Override
public void close(TaskAttemptContext context) throws IOException {
closeWriter(mWriter);
}
/**
* Opens a new HFile writer for the current column family.
*
* @return A new HFile writer.
* @throws IOException If there is an error.
*/
private HFile.Writer openNewWriter() throws IOException {
// Create a unique file for the locality group:
final Path familyDirectory = new Path(mOutputDir, mFamily);
if (!mFileSystem.exists(familyDirectory)) {
mFileSystem.mkdirs(familyDirectory);
}
final Path hfilePath = new Path(familyDirectory, String.format("%05d", mHFileCounter));
mHFileCounter += 1;
// Create the writer.
LOG.info("Opening HFile.Writer for family " + mFamily + " at " + hfilePath);
final HFile.Writer hfileWriter =
SchemaPlatformBridge.get().createHFileWriter(mConf, mFileSystem, hfilePath,
mBlockSizeBytes, mCompressionType);
mTimeRangeTracker = new TimeRangeTracker();
// Reset the current file size.
mCurrentHFileSize = 0L;
return hfileWriter;
}
/**
* Closes an HFile writer.
*
* @param hfileWriter The writer to close.
* @throws IOException If there is an error.
*/
private void closeWriter(HFile.Writer hfileWriter) throws IOException {
LOG.info("Closing HFile " + hfileWriter.getPath());
// Write file metadata:
hfileWriter.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, toBytes(mLatestTimestamp));
final String taskAttemptID = mContext.getTaskAttemptID().toString();
hfileWriter.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY, toBytes(taskAttemptID));
hfileWriter.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, toBytes(true));
// Write out a timerange.
// See: HBASE-8055 and KIJIMR-204.
hfileWriter.appendFileInfo(StoreFile.TIMERANGE_KEY,
WritableUtils.toByteArray(mTimeRangeTracker));
hfileWriter.close();
}
}
// ---------------------------------------------------------------------------------------------
/** Context of the task. */
private final TaskAttemptContext mContext;
/** Task configuration. */
private final Configuration mConf;
/** Map from locality group column ID to locality group record writer. */
private final Map<ColumnId, LocalityGroupRecordWriter> mLGWriter = Maps.newHashMap();
/** Actual timestamp to substitute HConstants.LATEST_TIMESTAMP with. */
private final long mLatestTimestamp;
private final byte[] mLatestTimestampBytes;
/** URI of the output table. */
private final KijiURI mTableURI;
/** Layout of the output table. */
private final KijiTableLayout mLayout;
private final FileSystem mFileSystem;
private final Path mOutputDir;
/**
* Initializes a new table-wide record writer.
*
* @param oformat KijiHFileOutputFormat this writer is built from.
* @param context Context of the task.
* @throws IOException on I/O error.
*/
public TableRecordWriter(KijiHFileOutputFormat oformat, TaskAttemptContext context)
throws IOException {
mContext = Preconditions.checkNotNull(context);
mConf = mContext.getConfiguration();
mLatestTimestamp = mConf.getLong(CONF_LATEST_TIMESTAMP, System.currentTimeMillis());
mLatestTimestampBytes = toBytes(mLatestTimestamp);
mOutputDir = oformat.getDefaultWorkFile(mContext, OUTPUT_EXTENSION);
mFileSystem = mOutputDir.getFileSystem(mConf);
mTableURI = KijiURI.newBuilder(mConf.get(KijiConfKeys.KIJI_OUTPUT_TABLE_URI)).build();
final Kiji kiji = Kiji.Factory.open(mTableURI, mConf);
final KijiTable table = kiji.openTable(mTableURI.getTable());
mLayout = table.getLayout();
ResourceUtils.releaseOrLog(table);
ResourceUtils.releaseOrLog(kiji);
}
/** {@inheritDoc} */
@Override
public void write(HFileKeyValue entry, NullWritable unused)
throws IOException {
final ColumnId lgId = ColumnId.fromByteArray(entry.getFamily());
getWriter(lgId).write(entry, unused);
}
/**
* Gets the record writer for a given locality group.
*
* @param lgId Locality group ID.
* @return the record writer for the specified locality group.
* @throws IOException on I/O error.
*/
private synchronized LocalityGroupRecordWriter getWriter(ColumnId lgId)
throws IOException {
final LocalityGroupRecordWriter writer = mLGWriter.get(lgId);
if (writer != null) {
return writer;
}
final String lgName = mLayout.getLocalityGroupIdNameMap().get(lgId);
Preconditions.checkArgument(lgName != null, String.format(
"Locality group ID '%s' does not exist in table '%s'.", lgId, mTableURI));
final LocalityGroupLayout lgroup = mLayout.getLocalityGroupMap().get(lgName);
final LocalityGroupRecordWriter newWriter = new LocalityGroupRecordWriter(mContext, lgroup);
mLGWriter.put(lgId, newWriter);
return newWriter;
}
/** {@inheritDoc} */
@Override
public void close(TaskAttemptContext context)
throws IOException {
for (LocalityGroupRecordWriter writer : mLGWriter.values()) {
writer.close(context);
}
}
}
}