package org.openstack.atlas.logs.hadoop.reducers;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.mapreduce.Reducer;
import org.openstack.atlas.logs.hadoop.counters.LogCounters;
import org.openstack.atlas.logs.hadoop.writables.LogMapperOutputKey;
import org.openstack.atlas.logs.hadoop.writables.LogMapperOutputValue;
import org.openstack.atlas.logs.hadoop.writables.LogReducerOutputKey;
import org.openstack.atlas.logs.hadoop.writables.LogReducerOutputValue;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import java.util.zip.CRC32;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.openstack.atlas.util.staticutils.StaticFileUtils;
import org.openstack.atlas.logs.hadoop.util.StaticLogUtils;
import org.openstack.atlas.logs.hadoop.util.LogFileNameBuilder;
public class LogReducer extends Reducer<LogMapperOutputKey, LogMapperOutputValue, LogReducerOutputKey, LogReducerOutputValue> {
private static final int BUFFER_SIZE = 1024 * 128;
private int fileHour = -1;
private static final short REPL_COUNT = 3;
private static final int HDFS_BLOCK_SIZE = 1024 * 1024 * 64;
private String hdfsUserName;
private String hdfsZipDir;
private String workDir;
private FileSystem fs;
@Override
public void setup(Context ctx) throws IOException, InterruptedException {
ctx.getCounter(LogCounters.REDUCER_SETUP_CALLS).increment(1);
String fileHourString = ctx.getConfiguration().get("fileHour");
fileHour = Integer.parseInt(fileHourString);
hdfsUserName = ctx.getConfiguration().get("hdfs_user_name");
hdfsZipDir = ctx.getConfiguration().get("hdfs_zip_dir");
URI defaultHdfsUri = FileSystem.getDefaultUri(ctx.getConfiguration());
fs = FileSystem.get(defaultHdfsUri, ctx.getConfiguration(), hdfsUserName);
workDir = FileOutputFormat.getWorkOutputPath(ctx).toUri().getRawPath();
}
@Override
public void reduce(LogMapperOutputKey rKey, Iterable<LogMapperOutputValue> rVals, Context ctx) throws IOException, InterruptedException {
int accountId = rKey.getAccountId();
int loadbalancerId = rKey.getLoadbalancerId();
LogReducerOutputKey oKey = new LogReducerOutputKey();
LogReducerOutputValue oVal = new LogReducerOutputValue();
oKey.setAccountId(accountId);
oKey.setLoadbalancerId(loadbalancerId);
oVal.setAccountId(accountId);
oVal.setLoadbalancerId(loadbalancerId);
String zipFileName = LogFileNameBuilder.getZipFileName(loadbalancerId, fileHour);
String zipContentsName = LogFileNameBuilder.getZipContentsName(loadbalancerId, fileHour);
CRC32 crc = new CRC32();
String partitionZipPath = StaticFileUtils.joinPath(hdfsZipDir, zipFileName);
String fullZipPath = getTempWorkZipPath(workDir, "zips", zipFileName); // This one will replace fullZipPath
FSDataOutputStream os = fs.create(new Path(fullZipPath), true, BUFFER_SIZE, REPL_COUNT, HDFS_BLOCK_SIZE);
ZipOutputStream zos = new ZipOutputStream(os);
String comment = String.format("Produced by HadoopJob %s: JobId=%s", ctx.getJobName(), ctx.getJobID().toString());
zos.setComment(comment);
zos.putNextEntry(new ZipEntry(zipContentsName));
byte[] bytes = null;
int nLines = 0;
long fileSize = 0;
for (LogMapperOutputValue rVal : rVals) {
String logLine = rVal.getLogLine();
bytes = logLine.getBytes("utf-8");
fileSize += bytes.length;
zos.write(bytes);
crc.update(bytes);
ctx.getCounter(LogCounters.REDUCER_REDUCTIONS).increment(1);
ctx.getCounter(LogCounters.LOG_BYTE_COUNT).increment(bytes.length);
nLines++;
}
zos.closeEntry(); // Closes the zip Contents
zos.finish(); // Marks this as the last file in the zip arvhive
zos.close(); // Closes the zipFile
os.close(); // Just incase the Hdfs file is still open this closes it too.
oVal.setnLines(nLines);
oVal.setLogFile(partitionZipPath);
oVal.setCrc(crc.getValue());
oVal.setFileSize(fileSize);
ctx.getCounter(LogCounters.REDUCER_WRITES).increment(1);
ctx.write(oKey, oVal);
}
private String getTempWorkZipPath(String workDir, String midDir, String zipFileName) {
List<String> pathComps = new ArrayList<String>();
pathComps.add(workDir);
pathComps.add(midDir);
pathComps.add(zipFileName);
return StaticFileUtils.splitPathToString(StaticFileUtils.joinPath(pathComps));
}
}