/*********************************************************************************************************************** * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu) * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. **********************************************************************************************************************/ package eu.stratosphere.addons.hbase; import java.io.IOException; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat; import org.apache.hadoop.mapreduce.RecordWriter; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskAttemptID; import org.apache.hadoop.mapreduce.TaskType; import eu.stratosphere.api.common.io.OutputFormat; import eu.stratosphere.configuration.Configuration; import eu.stratosphere.types.Record; public abstract class GenericTableOutputFormat implements OutputFormat<Record> { private static final long serialVersionUID = 1L; public static final String JT_ID_KEY = "pact.hbase.jtkey"; public static final String JOB_ID_KEY = "pact.job.id"; private RecordWriter<ImmutableBytesWritable, KeyValue> writer; private Configuration config; private org.apache.hadoop.conf.Configuration hadoopConfig; private TaskAttemptContext context; private String jtID; private int jobId; @Override public void configure(Configuration parameters) { this.config = parameters; // get the ID parameters this.jtID = parameters.getString(JT_ID_KEY, null); if (this.jtID == null) { throw new RuntimeException("Missing JT_ID entry in hbase config."); } this.jobId = parameters.getInteger(JOB_ID_KEY, -1); if (this.jobId < 0) { throw new RuntimeException("Missing or invalid job id in input config."); } } @Override public void open(int taskNumber, int numTasks) throws IOException { this.hadoopConfig = getHadoopConfig(this.config); /** * PLASE NOTE: * If you are a Eclipse+Maven Integration user and you have two (or more) warnings here, please * close the pact-hbase project OR set the maven profile to hadoop_yarn * * pact-hbase requires hadoop_yarn, but Eclipse is not able to parse maven profiles properly. Therefore, * it imports the pact-hbase project even if it is not included in the standard profile (hadoop_v1) */ final TaskAttemptID attemptId = new TaskAttemptID(this.jtID, this.jobId, TaskType.MAP, taskNumber - 1, 0); this.context = new org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl(this.hadoopConfig, attemptId); final HFileOutputFormat outFormat = new HFileOutputFormat(); try { this.writer = outFormat.getRecordWriter(this.context); } catch (InterruptedException iex) { throw new IOException("Opening the writer was interrupted.", iex); } } @Override public void close() throws IOException { final RecordWriter<ImmutableBytesWritable, KeyValue> writer = this.writer; this.writer = null; if (writer != null) { try { writer.close(this.context); } catch (InterruptedException iex) { throw new IOException("Closing was interrupted.", iex); } } } public void collectKeyValue(KeyValue kv) throws IOException { try { this.writer.write(null, kv); } catch (InterruptedException iex) { throw new IOException("Write request was interrupted.", iex); } } public abstract org.apache.hadoop.conf.Configuration getHadoopConfig(Configuration config); }