/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.contrib.fileformat.base64;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.Properties;
import org.apache.commons.codec.binary.Base64;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter;
import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobConfigurable;
import org.apache.hadoop.util.Progressable;
/**
* FileOutputFormat for base64 encoded text files.
*
* Each line is a base64-encoded record. The key is a LongWritable which is the
* offset. The value is a BytesWritable containing the base64-decoded bytes.
*
* This class accepts a configurable parameter:
* "base64.text.output.format.signature"
*
* The UTF-8 encoded signature will be prepended to each BytesWritable before we
* do base64 encoding.
*/
public class Base64TextOutputFormat<K extends WritableComparable, V extends Writable>
extends HiveIgnoreKeyTextOutputFormat<K, V> {
/**
* Base64RecordWriter.
*
*/
public static class Base64RecordWriter implements RecordWriter,
JobConfigurable {
RecordWriter writer;
BytesWritable bytesWritable;
public Base64RecordWriter(RecordWriter writer) {
this.writer = writer;
bytesWritable = new BytesWritable();
}
@Override
public void write(Writable w) throws IOException {
// Get input data
byte[] input;
int inputLength;
if (w instanceof Text) {
input = ((Text) w).getBytes();
inputLength = ((Text) w).getLength();
} else {
assert (w instanceof BytesWritable);
input = ((BytesWritable) w).getBytes();
inputLength = ((BytesWritable) w).getLength();
}
// Add signature
byte[] wrapped = new byte[signature.length + inputLength];
for (int i = 0; i < signature.length; i++) {
wrapped[i] = signature[i];
}
for (int i = 0; i < inputLength; i++) {
wrapped[i + signature.length] = input[i];
}
// Encode
byte[] output = base64.encode(wrapped);
bytesWritable.set(output, 0, output.length);
writer.write(bytesWritable);
}
@Override
public void close(boolean abort) throws IOException {
writer.close(abort);
}
private byte[] signature;
private final Base64 base64 = Base64TextInputFormat.createBase64();
@Override
public void configure(JobConf job) {
try {
String signatureString = job.get("base64.text.output.format.signature");
if (signatureString != null) {
signature = signatureString.getBytes("UTF-8");
} else {
signature = new byte[0];
}
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
}
}
@Override
public RecordWriter getHiveRecordWriter(JobConf jc, Path finalOutPath,
Class<? extends Writable> valueClass, boolean isCompressed,
Properties tableProperties, Progressable progress) throws IOException {
Base64RecordWriter writer = new Base64RecordWriter(super
.getHiveRecordWriter(jc, finalOutPath, BytesWritable.class,
isCompressed, tableProperties, progress));
writer.configure(jc);
return writer;
}
}