/**
* (c) Copyright 2012 WibiData, Inc.
*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kiji.mapreduce.impl;
import java.io.IOException;
import com.google.common.base.Preconditions;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.TaskInputOutputContext;
import org.kiji.annotations.ApiAudience;
import org.kiji.mapreduce.KijiTableContext;
import org.kiji.mapreduce.framework.HFileKeyValue;
import org.kiji.mapreduce.framework.KijiConfKeys;
import org.kiji.schema.EntityId;
import org.kiji.schema.EntityIdFactory;
import org.kiji.schema.Kiji;
import org.kiji.schema.KijiCellEncoder;
import org.kiji.schema.KijiColumnName;
import org.kiji.schema.KijiDataRequest;
import org.kiji.schema.KijiDataRequestBuilder;
import org.kiji.schema.KijiRowData;
import org.kiji.schema.KijiTable;
import org.kiji.schema.KijiTableReader;
import org.kiji.schema.KijiURI;
import org.kiji.schema.NoSuchColumnException;
import org.kiji.schema.filter.StripValueColumnFilter;
import org.kiji.schema.hbase.HBaseColumnName;
import org.kiji.schema.impl.DefaultKijiCellEncoderFactory;
import org.kiji.schema.layout.CellSpec;
import org.kiji.schema.layout.HBaseColumnNameTranslator;
import org.kiji.schema.layout.KijiTableLayout.LocalityGroupLayout;
import org.kiji.schema.layout.KijiTableLayout.LocalityGroupLayout.FamilyLayout;
import org.kiji.schema.layout.KijiTableLayout.LocalityGroupLayout.FamilyLayout.ColumnLayout;
import org.kiji.schema.util.ResourceUtils;
/**
* Kiji context that emits puts for the configured output table to HFiles.
*
* This is the recommended way for writing to an HBase table.
* <li> This context provides some level of atomicity and isolation
* (no partial writes to the table while the M/R job runs, or if the M/R job fails).
* <li> Region servers are not hammered but a sustained stream of puts while the M/R job.
* <li> After the M/R job completed successfully, the output is committed to the HBase table
* using the HFileLoader.
*/
@ApiAudience.Private
public final class HFileWriterContext
extends InternalKijiContext
implements KijiTableContext {
/** NullWritable shortcut. */
private static final NullWritable NULL = NullWritable.get();
private final Kiji mKiji;
private final KijiTable mTable;
private final KijiTableReader mReader;
private final HBaseColumnNameTranslator mColumnNameTranslator;
private final EntityIdFactory mEntityIdFactory;
/**
* Constructs a new context that can write cells to an HFile that can be loaded into an HBase
* table.
*
* @param hadoopContext is the Hadoop {@link TaskInputOutputContext} that will be used to perform
* the writes.
* @throws IOException on I/O error.
*/
public HFileWriterContext(TaskInputOutputContext<?, ?, ?, ?> hadoopContext)
throws IOException {
super(hadoopContext);
final Configuration conf = new Configuration(hadoopContext.getConfiguration());
final KijiURI outputURI =
KijiURI.newBuilder(conf.get(KijiConfKeys.KIJI_OUTPUT_TABLE_URI)).build();
mKiji = Kiji.Factory.open(outputURI, conf);
mTable = mKiji.openTable(outputURI.getTable());
mReader = mTable.openTableReader();
mColumnNameTranslator = HBaseColumnNameTranslator.from(mTable.getLayout());
mEntityIdFactory = EntityIdFactory.getFactory(mTable.getLayout());
}
/**
* Creates a new context that can write cells to an HFile that can be loaded into an HBase table.
*
* @param hadoopContext is the Hadoop {@link TaskInputOutputContext} that will be used to perform
* the writes.
* @return a new context that can write cells to an HFile that can be loaded into an HBase table.
* @throws IOException if there is an I/O error.
*/
public static HFileWriterContext create(TaskInputOutputContext<?, ?, ?, ?> hadoopContext)
throws IOException {
return new HFileWriterContext(hadoopContext);
}
/**
* Write the given HFileKeyValue to the MR context.
*
* <p>The key value is written as the mapper key with a null writable value.</p>
*
* @param mrKey The HFileKeyValue to write out.
* @throws IOException on I/O error or interruption.
*/
private void write(final HFileKeyValue mrKey) throws IOException {
try {
getMapReduceContext().write(mrKey, NULL);
} catch (InterruptedException ie) {
throw new IOException(ie);
}
}
/** {@inheritDoc} */
@Override
public <T> void put(EntityId entityId, String family, String qualifier, T value)
throws IOException {
put(entityId, family, qualifier, HConstants.LATEST_TIMESTAMP, value);
}
/** {@inheritDoc} */
@SuppressWarnings("unchecked")
@Override
public <T> void put(EntityId entityId, String family, String qualifier, long timestamp, T value)
throws IOException {
final KijiColumnName kijiColumn = new KijiColumnName(family, qualifier);
final HBaseColumnName hbaseColumn = mColumnNameTranslator.toHBaseColumnName(kijiColumn);
final CellSpec cellSpec = mTable.getLayout().getCellSpec(kijiColumn)
.setSchemaTable(mKiji.getSchemaTable());
final KijiCellEncoder encoder = DefaultKijiCellEncoderFactory.get().create(cellSpec);
final HFileKeyValue mrKey = new HFileKeyValue(
entityId.getHBaseRowKey(),
hbaseColumn.getFamily(),
hbaseColumn.getQualifier(),
timestamp,
encoder.encode(value));
write(mrKey);
}
/**
* Deletes an entire row.
*
* <p>Note HBase does not represent row deletions with individual (cross-family)
* tombstones. Instead, this method issues a family delete for each locality group
* individually.</p>
*
* @param entityId Entity ID of the row to delete.
* @throws IOException on I/O error.
*/
@Override
public void deleteRow(EntityId entityId) throws IOException {
deleteRow(entityId, HConstants.LATEST_TIMESTAMP);
}
/**
* Delete all cells from a row with a timestamp less than or equal to the specified timestamp.
*
* <p>Note HBase does not represent row deletions with individual (cross-family)
* tombstones. Instead, this method issues a family delete for each locality group
* individually.</p>
*
* @param entityId Entity ID of the row to delete data from.
* @param upToTimestamp Delete cells with a timestamp older or equal to this parameter.
* @throws IOException on I/O error.
*/
@Override
public void deleteRow(EntityId entityId, long upToTimestamp) throws IOException {
for (LocalityGroupLayout localityGroupLayout : mTable.getLayout().getLocalityGroups()) {
final HFileKeyValue mrKey = new HFileKeyValue(
entityId.getHBaseRowKey(),
localityGroupLayout.getId().toByteArray(),
HConstants.EMPTY_BYTE_ARRAY,
upToTimestamp,
HFileKeyValue.Type.DeleteFamily,
HConstants.EMPTY_BYTE_ARRAY);
write(mrKey);
}
}
/**
* Deletes all versions of all cells in a family.
*
* <p>When the deleted kiji family is the only one in the enclosing locality group, a
* single (hbase) family delete is issued.</p>
*
* <p>However this won't work when there are other families within the locality group:
* the delete would affect them too. In this case, this method instead issues a
* sequence of column deletes in way depending on the family type.</p>
*
* <p>Group-type families are handled by issuing a column delete for each of their
* columns as declared by their fixed layout.</p>
*
* <p>Map-type families are handled by enumerating the existing cells of the family and
* issuing columns deletes. This strategy is susceptible to races with writers: a new
* put occurring after MR job execution but before bulk load will not be deleted.</p>
*
* <P>Again, notice this limitation does not apply if the map-type family is the only
* one in its locality group. As stated above, in that case a single family delete is
* issued and no leaks are possible.</p>
*
* @param entityId Entity ID of the row to delete data from.
* @param family Column family.
* @throws IOException on I/O error.
*/
@Override
public void deleteFamily(EntityId entityId, String family) throws IOException {
deleteFamily(entityId, family, HConstants.LATEST_TIMESTAMP);
}
/**
* Deletes all cells from a family with a timestamp less than or equal to the specified timestamp.
*
* <p>When the deleted kiji family is the only one in the enclosing locality group, a
* single (hbase) family delete is issued.</p>
*
* <p>However this won't work when there are other families within the locality group:
* the delete would affect them too. In this case, this method instead issues a
* sequence of column deletes in way depending on the family type.</p>
*
* <p>Group-type families are handled by issuing a column delete for each of their
* columns as declared by their fixed layout.</p>
*
* <p>Map-type families are handled by enumerating the existing cells of the family and
* issuing columns deletes. This strategy is susceptible to races with writers: a new
* put occurring after MR job execution but before bulk load will not be deleted.</p>
*
* <P>Again, notice this limitation does not apply if the map-type family is the only
* one in its locality group. As stated above, in that case a single family delete is
* issued and no leaks are possible.</p>
*
* @param entityId Entity ID of the row to delete data from.
* @param family Column family.
* @param upToTimestamp Delete cells with a timestamp older or equal to this parameter.
* @throws IOException on I/O error.
*/
@Override
public void deleteFamily(EntityId entityId, String family, long upToTimestamp)
throws IOException {
final FamilyLayout familyLayout = mTable.getLayout().getFamilyMap().get(family);
if (null == familyLayout) {
throw new NoSuchColumnException(String.format("Family '%s' not found.", family));
}
// Shamelessly stolen from HBaseKijiBufferedWriter#deleteFamily
if (familyLayout.getLocalityGroup().getFamilyMap().size() > 1) {
// There are multiple families within the locality group, so we need to be clever.
if (familyLayout.isGroupType()) {
deleteGroupFamily(entityId, familyLayout, upToTimestamp);
} else if (familyLayout.isMapType()) {
deleteMapFamily(entityId, familyLayout, upToTimestamp);
} else {
throw new RuntimeException("Internal error: family is neither map-type nor group-type.");
}
return;
}
// The only data in this HBase family is the one Kiji family, so we can delete everything.
final KijiColumnName kijiColumn = new KijiColumnName(family, null);
final HBaseColumnName hbaseColumn = mColumnNameTranslator.toHBaseColumnName(kijiColumn);
final HFileKeyValue mrKey = new HFileKeyValue(
entityId.getHBaseRowKey(),
hbaseColumn.getFamily(),
HConstants.EMPTY_BYTE_ARRAY,
upToTimestamp,
HFileKeyValue.Type.DeleteFamily,
HConstants.EMPTY_BYTE_ARRAY);
write(mrKey);
}
/**
* Deletes all cells from a group-type family with a timestamp less than or equal to a
* specified timestamp.
*
* @param entityId The entity (row) to delete from.
* @param familyLayout The family layout.
* @param upToTimestamp A timestamp.
* @throws IOException If there is an IO error.
*/
private void deleteGroupFamily(
EntityId entityId,
FamilyLayout familyLayout,
long upToTimestamp)
throws IOException {
final String familyName = Preconditions.checkNotNull(familyLayout.getName());
// Delete each column in the group according to the layout.
for (ColumnLayout columnLayout : familyLayout.getColumnMap().values()) {
final String qualifier = columnLayout.getName();
deleteColumn(entityId, familyName, qualifier, upToTimestamp);
}
}
/**
* Deletes all cells from a map-type family with a timestamp less than or equal to a
* specified timestamp.
*
* @param entityId The entity (row) to delete from.
* @param familyLayout A family layout.
* @param upToTimestamp A timestamp.
* @throws IOException If there is an IO error.
*/
private void deleteMapFamily(EntityId entityId, FamilyLayout familyLayout, long upToTimestamp)
throws IOException {
// Since multiple Kiji column families are mapped into a single HBase column family,
// we have to do this delete in two steps:
//
// 1. Send a get() to retrieve the names of all HBase qualifiers within the HBase
// family that belong to the Kiji column family.
// 2. Send a delete() for each of the qualifiers found in the previous step.
// Step 1.
final String familyName = familyLayout.getName();
final KijiDataRequestBuilder dataRequestBuilder = KijiDataRequest.builder();
dataRequestBuilder
.withTimeRange(0, upToTimestamp)
.newColumnsDef()
.withFilter(new StripValueColumnFilter())
.addFamily(familyName);
final KijiDataRequest dataRequest = dataRequestBuilder.build();
final KijiRowData rowData = mReader.get(entityId, dataRequest);
// Step 2.
final byte[] hbaseRow = entityId.getHBaseRowKey();
for (String qualifier : rowData.getQualifiers(familyName)) {
final KijiColumnName kijiColumn = new KijiColumnName(familyName, qualifier);
final HBaseColumnName hbaseColumn = mColumnNameTranslator.toHBaseColumnName(kijiColumn);
final HFileKeyValue mrKey = new HFileKeyValue(
hbaseRow,
hbaseColumn.getFamily(),
hbaseColumn.getQualifier(),
upToTimestamp,
HFileKeyValue.Type.DeleteColumn,
HConstants.EMPTY_BYTE_ARRAY);
write(mrKey);
}
}
/** {@inheritDoc} */
@Override
public void deleteColumn(EntityId entityId, String family, String qualifier) throws IOException {
deleteColumn(entityId, family, qualifier, HConstants.LATEST_TIMESTAMP);
}
/** {@inheritDoc} */
@Override
public void deleteColumn(EntityId entityId, String family, String qualifier, long upToTimestamp)
throws IOException {
final KijiColumnName kijiColumn = new KijiColumnName(family, qualifier);
final HBaseColumnName hbaseColumn = mColumnNameTranslator.toHBaseColumnName(kijiColumn);
final HFileKeyValue mrKey = new HFileKeyValue(
entityId.getHBaseRowKey(),
hbaseColumn.getFamily(),
hbaseColumn.getQualifier(),
upToTimestamp,
HFileKeyValue.Type.DeleteColumn,
HConstants.EMPTY_BYTE_ARRAY);
write(mrKey);
}
/** {@inheritDoc} */
@Override
public void deleteCell(EntityId entityId, String family, String qualifier) throws IOException {
deleteCell(entityId, family, qualifier, HConstants.LATEST_TIMESTAMP);
}
/** {@inheritDoc} */
@Override
public void deleteCell(EntityId entityId, String family, String qualifier, long timestamp)
throws IOException {
final KijiColumnName kijiColumn = new KijiColumnName(family, qualifier);
final HBaseColumnName hbaseColumn = mColumnNameTranslator.toHBaseColumnName(kijiColumn);
final HFileKeyValue mrKey = new HFileKeyValue(
entityId.getHBaseRowKey(),
hbaseColumn.getFamily(),
hbaseColumn.getQualifier(),
timestamp,
HFileKeyValue.Type.DeleteCell,
HConstants.EMPTY_BYTE_ARRAY);
write(mrKey);
}
/** {@inheritDoc} */
@Override
public EntityIdFactory getEntityIdFactory() {
return mEntityIdFactory;
}
/** {@inheritDoc} */
@Override
public EntityId getEntityId(Object... components) {
return mEntityIdFactory.getEntityId(components);
}
@Override
public void close() throws IOException {
ResourceUtils.closeOrLog(mReader);
ResourceUtils.releaseOrLog(mTable);
ResourceUtils.releaseOrLog(mKiji);
super.close();
}
}