package org.openstack.atlas.scheduler.execution; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; import org.openstack.atlas.config.CloudFilesZipInfo; import org.openstack.atlas.config.HadoopLogsConfigs; import org.openstack.atlas.exception.ExecutionException; import org.openstack.atlas.exception.SchedulingException; import org.openstack.atlas.io.FileBytesWritable; import org.openstack.atlas.logs.hadoop.sequencefiles.SequenceFileReaderException; import org.openstack.atlas.logs.hadoop.writables.LogReducerOutputValue; import org.openstack.atlas.scheduler.ArchiveLoadBalancerLogsJob; import org.openstack.atlas.scheduler.JobScheduler; import org.openstack.atlas.service.domain.entities.JobName; import org.openstack.atlas.service.domain.entities.JobState; import org.openstack.atlas.tools.QuartzSchedulerConfigs; import org.openstack.atlas.logs.hadoop.util.HdfsUtils; import org.openstack.atlas.util.common.VerboseLogger; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.openstack.atlas.util.debug.Debug; import org.openstack.atlas.util.staticutils.StaticFileUtils; import org.openstack.atlas.util.staticutils.StaticStringUtils; public class SplitLoadBalancerLogsJobExecution extends LoggableJobExecution implements QuartzExecutable { private static final VerboseLogger vlog = new VerboseLogger(SplitLoadBalancerLogsJobExecution.class, VerboseLogger.LogLevel.INFO); private static final Log LOG = LogFactory.getLog(SplitLoadBalancerLogsJobExecution.class); @Override public void execute(JobScheduler scheduler, QuartzSchedulerConfigs schedulerConfigs) throws ExecutionException { // Get reducer Directory String fileHour = schedulerConfigs.getInputString(); JobState state = createJob(JobName.FILES_SPLIT, fileHour); List<LogReducerOutputValue> zipFileInfoList; List<String> reducerOutdirComponents = new ArrayList<String>(); reducerOutdirComponents.add(HadoopLogsConfigs.getMapreduceOutputPrefix()); reducerOutdirComponents.add("lb_logs_split"); reducerOutdirComponents.add(fileHour); String hdfsReducerOutputDirectory = StaticFileUtils.splitPathToString(StaticFileUtils.joinPath(reducerOutdirComponents)); String localCacheDir = HadoopLogsConfigs.getCacheDir(); try { zipFileInfoList = hdfsUtils.getZipFileInfoList(hdfsReducerOutputDirectory); } catch (SequenceFileReaderException ex) { String excMsg = Debug.getExtendedStackTrace(ex); LOG.error(String.format("Could not list sequenceFiles for directory %s: %s", hdfsReducerOutputDirectory, excMsg)); failJob(state); throw new ExecutionException(ex); } for (LogReducerOutputValue zipFileInfo : zipFileInfoList) { int accountId = zipFileInfo.getAccountId(); int loadbalancerId = zipFileInfo.getLoadbalancerId(); long crc = zipFileInfo.getCrc(); long uncompressedSize = zipFileInfo.getFileSize(); int nLines = zipFileInfo.getnLines(); String hdfsZipFilePath = zipFileInfo.getLogFile(); String zipFileNameNoDir = StaticFileUtils.stripDirectoryFromFileName(hdfsZipFilePath); // Build the local cache file Name List<String> localZipPathComponents = new ArrayList<String>(); localZipPathComponents.add(localCacheDir); if (accountId < 0 || loadbalancerId < 0) { localZipPathComponents.add("unknown"); } else { localZipPathComponents.add(fileHour); localZipPathComponents.add(Integer.toString(accountId)); } localZipPathComponents.add(zipFileNameNoDir); List<String> mergedCacheZipPathComponents = StaticFileUtils.joinPath(localZipPathComponents); String fullCacheZipPath = StaticFileUtils.splitPathToString(mergedCacheZipPathComponents); // Download The zip file from Hdfs FSDataInputStream zipFileInputStream; FSDataOutputStream zipfileCacheOutputStream; vlog.printf("Downloading hdfs %s ->%s", hdfsZipFilePath, fullCacheZipPath); try { zipFileInputStream = hdfsUtils.openHdfsInputFile(hdfsZipFilePath, false); } catch (IOException ex) { String msg = String.format("Error opening hdfsZip file %s for reading. Skipping this entry", hdfsZipFilePath); String excMsg = Debug.getExtendedStackTrace(ex); LOG.error(String.format("%s:%s", msg, excMsg), ex); continue; } try { zipfileCacheOutputStream = hdfsUtils.openHdfsOutputFile(fullCacheZipPath, true, true); } catch (IOException ex) { String msg = String.format("Error opening cacheZipFile %s for writing. Skipping this entry", fullCacheZipPath); String excMsg = Debug.getExtendedStackTrace(ex); LOG.error(String.format("%s:%s", msg, excMsg), ex); StaticFileUtils.close(zipFileInputStream); continue; } try { StaticFileUtils.copyStreams(zipFileInputStream, zipfileCacheOutputStream, null, hdfsUtils.getBufferSize()); } catch (IOException ex) { String msg = String.format("Error opening writing data from %s -> %s", hdfsZipFilePath, fullCacheZipPath); String excMsg = Debug.getExtendedStackTrace(ex); LOG.error(String.format("%s:%s", msg, excMsg), ex); StaticFileUtils.close(zipFileInputStream); StaticFileUtils.close(zipfileCacheOutputStream); continue; } StaticFileUtils.close(zipFileInputStream); StaticFileUtils.close(zipfileCacheOutputStream); if (accountId < 0 || loadbalancerId < 0) { // This is the unknown zip file do not upload to cloud Files vlog.printf("found file %s not scheduling for upload to cloudFiles. This is expected\n", fullCacheZipPath); continue; } // Build the CloudFilesZipInfo entry and put it on the schedulerConfigs list for the ArchiveLoadbalancerLogsJob CloudFilesZipInfo cloudFileZipEntry = new CloudFilesZipInfo(); cloudFileZipEntry.setAccountId(accountId); cloudFileZipEntry.setLoadbalancerId(loadbalancerId); cloudFileZipEntry.setCrc(crc); cloudFileZipEntry.setUncompressedSize(uncompressedSize); cloudFileZipEntry.setnLines(nLines); cloudFileZipEntry.setHdfsFile(hdfsZipFilePath); cloudFileZipEntry.setCacheFile(fullCacheZipPath); cloudFileZipEntry.setLocalCacheDir(localCacheDir); schedulerConfigs.getCloudFilesZipInfoList().add(cloudFileZipEntry); vlog.printf("Added %s", cloudFileZipEntry.toString()); } try { scheduleArchiveLoadBalancerLogsJob(scheduler, schedulerConfigs); } catch (SchedulingException ex) { String msg = "Error attempting to schedule Archive Job. This job shall be marked as a failure"; String excMsg = Debug.getExtendedStackTrace(ex); LOG.error(String.format("%s:%s", msg, excMsg), ex); failJob(state); throw new ExecutionException(msg, ex); } finishJob(state); } private String getFileName(String lbId, String rawlogsFileDate) { StringBuilder sb = new StringBuilder(); sb.append("access log "); sb.append(lbId).append(" "); sb.append(rawlogsFileDate); return getFormattedName(sb.toString()); } private String getFormattedName(String name) { return name.replaceAll(" ", "_"); } private String getAccount(String key) { return key.split(":")[0]; } private String getLoadBalancerId(String key) { return key.split(":")[1]; } private void scheduleArchiveLoadBalancerLogsJob(JobScheduler scheduler, QuartzSchedulerConfigs schedulerConfigs) throws SchedulingException { scheduler.scheduleJob(ArchiveLoadBalancerLogsJob.class, schedulerConfigs); } }