/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * <p> * http://www.apache.org/licenses/LICENSE-2.0 * <p> * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.ambari.view.hive2.resources.jobs; import akka.actor.ActorRef; import com.google.common.base.Function; import com.google.common.base.Joiner; import com.google.common.base.Predicate; import com.google.common.base.Strings; import com.google.common.collect.FluentIterable; import org.apache.ambari.view.hive2.actor.message.job.SaveDagInformation; import org.apache.ambari.view.hive2.persistence.utils.FilteringStrategy; import org.apache.ambari.view.hive2.persistence.utils.Indexed; import org.apache.ambari.view.hive2.persistence.utils.ItemNotFound; import org.apache.ambari.view.hive2.resources.IResourceManager; import org.apache.ambari.view.hive2.resources.files.FileService; import org.apache.ambari.view.hive2.resources.jobs.atsJobs.HiveQueryId; import org.apache.ambari.view.hive2.resources.jobs.atsJobs.IATSParser; import org.apache.ambari.view.hive2.resources.jobs.atsJobs.TezDagId; import org.apache.ambari.view.hive2.resources.jobs.viewJobs.Job; import org.apache.ambari.view.hive2.resources.jobs.viewJobs.JobImpl; import org.apache.ambari.view.hive2.resources.jobs.viewJobs.JobInfo; import org.apache.commons.beanutils.PropertyUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.annotation.Nullable; import java.lang.reflect.InvocationTargetException; import java.util.LinkedList; import java.util.List; /** * View Jobs and ATS Jobs aggregator. * There are 4 options: * 1) ATS ExecuteJob without operationId * *Meaning*: executed outside of HS2 * - ExecuteJob info only from ATS * 2) ATS ExecuteJob with operationId * a) Hive View ExecuteJob with same operationId is not present * *Meaning*: executed with HS2 * - ExecuteJob info only from ATS * b) Hive View ExecuteJob with operationId is present (need to merge) * *Meaning*: executed with HS2 through Hive View * - ExecuteJob info merged from ATS and from Hive View DataStorage * 3) ExecuteJob present only in Hive View, ATS does not have it * *Meaning*: executed through Hive View, but Hadoop ExecuteJob was not created * it can happen if user executes query without aggregation, like just "select * from TABLE" * - ExecuteJob info only from Hive View */ public class Aggregator { protected final static Logger LOG = LoggerFactory.getLogger(Aggregator.class); private final IATSParser ats; private IResourceManager<Job> viewJobResourceManager; private final ActorRef operationController; public Aggregator(IResourceManager<Job> jobResourceManager, IATSParser ats, ActorRef operationController) { this.viewJobResourceManager = jobResourceManager; this.ats = ats; this.operationController = operationController; } /** * gets all the jobs for 'username' where the job submission time is between 'startTime' (inclusive) * and endTime (exclusive). * Fetches the jobs from ATS and DB merges and update DB. returns the combined list. * * @param username: username for which jobs have to be fetched. * @param startTime: inclusive, time in secs from epoch * @param endTime: exclusive, time in secs from epoch * @return: list of jobs */ public List<Job> readAllForUserByTime(String username, Long startTime, Long endTime) { List<Job> jobs = readDBJobs(username, startTime, endTime); return jobs; } /** * fetches the new state of jobs from ATS and from DB. Does merging/updating as required. * @param jobInfos: infos of job to get * @return: list of updated Job */ public List<Job> readJobsByIds(final List<JobInfo> jobInfos) { List<String> jobIds = FluentIterable.from(jobInfos).filter(new Predicate<JobInfo>() { @Override public boolean apply(@Nullable JobInfo input) { return !Strings.isNullOrEmpty(input.getJobId()); } }).transform(new Function<JobInfo, String>() { @Nullable @Override public String apply(@Nullable JobInfo input) { return input.getJobId(); } }).toList(); List<Job> dbJobs = readJobsFromDbByJobId(jobIds); LOG.debug("readJobsByIds: dbJobs : {}", dbJobs); return dbJobs; } /** * gets the jobs from the Database given their id * @param jobsIds: list of ids of jobs * @return: list of all the jobs found */ private List<Job> readJobsFromDbByJobId(final List<String> jobsIds) { LOG.info("Reading jobs from db with ids : {} ", jobsIds); List<Job> jobs = viewJobResourceManager.readAll(new FilteringStrategy() { @Override public boolean isConform(Indexed item) { JobImpl job = (JobImpl) item; return jobsIds.contains(job.getId()); } @Override public String whereStatement() { String query = " id in ( " + Joiner.on(",").join(jobsIds) + " ) "; LOG.debug("where clause for jobsIds : {}", query); return query; } }); LOG.debug("jobs returned from DB : {}" , jobs); return jobs; } /** * fetches the job from DB given its id * @param jobId: the id of the job to fetch * @return: the job * @throws ItemNotFound: if job with given id is not found in db */ private Job getJobFromDbByJobId(final String jobId) throws ItemNotFound { if (null == jobId) return null; List<Job> jobs = viewJobResourceManager.readAll(new FilteringStrategy() { @Override public boolean isConform(Indexed item) { return item.getId().equals(jobId); } @Override public String whereStatement() { return "id = '" + jobId + "'"; // even IDs are string } }); if (null != jobs && !jobs.isEmpty()) return jobs.get(0); throw new ItemNotFound(String.format("Job with id %s not found.", jobId)); } /** * returns all the jobs from ATS and DB (for this instance) for the given user. * @param username * @return */ public List<Job> readAll(String username) { return readAllForUserByTime(username, null, null); } /** * reads all the jobs from DB for username and excludes the jobs mentioned in queries list * @param username : username for which the jobs are to be read. * @param startTime: can be null, if not then the window start time for job * @param endTime: can be null, if not then the window end time for job * @return : the jobs in db that are not in the queries */ private List<Job> readDBJobs(final String username, final Long startTime, final Long endTime) { List<Job> jobs = viewJobResourceManager.readAll( new FilteringStrategy() { @Override public boolean isConform(Indexed item) { JobImpl job = (JobImpl) item; return job.getOwner().compareTo(username) == 0 && ( (null == startTime || job.getDateSubmitted() >= startTime ) && ( null == endTime || job.getDateSubmitted() < endTime ) ); } @Override public String whereStatement() { StringBuilder sb = new StringBuilder( "owner = '" ).append( username ).append( "'" ); if( null != startTime || null != endTime ) { sb.append(" AND ( " ); if( null != startTime ) { sb.append( " dateSubmitted >= " ).append( startTime ); } if( null != endTime ){ if(null != startTime){ sb.append(" AND "); } sb.append(" dateSubmitted < ").append(endTime); } sb.append( " ) " ); } String where = sb.toString(); LOG.debug("where statement : {}", where); return where; } }); LOG.debug("returning jobs: {}", jobs); return jobs; } private List<Job> fetchDagsAndMergeJobs(List<HiveQueryId> queries) { List<Job> allJobs = new LinkedList<Job>(); for (HiveQueryId atsHiveQuery : queries) { JobImpl atsJob = null; if (hasOperationId(atsHiveQuery)) { try { Job viewJob = getJobByOperationId(atsHiveQuery.operationId); TezDagId atsTezDag = getTezDagFromHiveQueryId(atsHiveQuery); atsJob = mergeHiveAtsTez(atsHiveQuery, atsTezDag, viewJob); } catch (ItemNotFound itemNotFound) { LOG.error("Ignore : {}", itemNotFound.getMessage()); continue; } } else { TezDagId atsTezDag = getTezDagFromHiveQueryId(atsHiveQuery); atsJob = atsOnlyJob(atsHiveQuery, atsTezDag); } atsJob.setHiveQueryId(atsHiveQuery.entity); allJobs.add(atsJob); } return allJobs; } /** * @param atsHiveQuery * @param atsTezDag * @param viewJob * @return */ private JobImpl mergeHiveAtsTez(HiveQueryId atsHiveQuery, TezDagId atsTezDag, Job viewJob) throws ItemNotFound { saveJobInfoIfNeeded(atsHiveQuery, atsTezDag, viewJob); return mergeAtsJobWithViewJob(atsHiveQuery, atsTezDag, viewJob); } public Job readATSJob(Job viewJob) throws ItemNotFound { if (viewJob.getStatus().equals(Job.JOB_STATE_INITIALIZED) || viewJob.getStatus().equals(Job.JOB_STATE_UNKNOWN)) return viewJob; String hexGuid = viewJob.getGuid(); HiveQueryId atsHiveQuery = ats.getHiveQueryIdByOperationId(hexGuid); TezDagId atsTezDag = getTezDagFromHiveQueryId(atsHiveQuery); saveJobInfoIfNeeded(atsHiveQuery, atsTezDag, viewJob, true); return mergeAtsJobWithViewJob(atsHiveQuery, atsTezDag, viewJob); } private TezDagId getTezDagFromHiveQueryId(HiveQueryId atsHiveQuery) { TezDagId atsTezDag; if (atsHiveQuery.version >= HiveQueryId.ATS_15_RESPONSE_VERSION) { atsTezDag = ats.getTezDAGByEntity(atsHiveQuery.entity); } else if (atsHiveQuery.dagNames != null && atsHiveQuery.dagNames.size() > 0) { String dagName = atsHiveQuery.dagNames.get(0); atsTezDag = ats.getTezDAGByName(dagName); } else { atsTezDag = new TezDagId(); } return atsTezDag; } protected boolean hasOperationId(HiveQueryId atsHiveQuery) { return atsHiveQuery.operationId != null; } protected JobImpl mergeAtsJobWithViewJob(HiveQueryId atsHiveQuery, TezDagId atsTezDag, Job viewJob) { JobImpl atsJob; try { atsJob = new JobImpl(PropertyUtils.describe(viewJob)); } catch (IllegalAccessException e) { LOG.error("Can't instantiate JobImpl", e); return null; } catch (InvocationTargetException e) { LOG.error("Can't instantiate JobImpl", e); return null; } catch (NoSuchMethodException e) { LOG.error("Can't instantiate JobImpl", e); return null; } fillAtsJobFields(atsJob, atsHiveQuery, atsTezDag); return atsJob; } protected void saveJobInfoIfNeeded(HiveQueryId hiveQueryId, TezDagId tezDagId, Job viewJob) throws ItemNotFound { saveJobInfoIfNeeded(hiveQueryId, tezDagId, viewJob, false); } protected void saveJobInfoIfNeeded(HiveQueryId hiveQueryId, TezDagId tezDagId, Job viewJob, boolean useActorSystem) throws ItemNotFound { boolean updateDb = false; String dagName = null; String dagId = null; String applicationId = null; if (viewJob.getDagName() == null || viewJob.getDagName().isEmpty()) { if (hiveQueryId.dagNames != null && hiveQueryId.dagNames.size() > 0) { dagName = hiveQueryId.dagNames.get(0); updateDb = true; } } if (tezDagId.status != null && (tezDagId.status.compareToIgnoreCase(Job.JOB_STATE_UNKNOWN) != 0) && !viewJob.getStatus().equalsIgnoreCase(tezDagId.status)) { dagId = tezDagId.entity; applicationId = tezDagId.applicationId; updateDb = true; } if(updateDb) { if (useActorSystem) { LOG.info("Saving DAG information via actor system for job id: {}", viewJob.getId()); operationController.tell(new SaveDagInformation(viewJob.getId(), dagName, dagId, applicationId), ActorRef.noSender()); } else { viewJob.setDagName(dagName); viewJob.setDagId(dagId); viewJob.setApplicationId(applicationId); viewJobResourceManager.update(viewJob, viewJob.getId()); } } } protected JobImpl atsOnlyJob(HiveQueryId atsHiveQuery, TezDagId atsTezDag) { JobImpl atsJob = new JobImpl(); atsJob.setId(atsHiveQuery.entity); fillAtsJobFields(atsJob, atsHiveQuery, atsTezDag); String query = atsHiveQuery.query; atsJob.setTitle(query.substring(0, (query.length() > 42) ? 42 : query.length())); atsJob.setQueryFile(FileService.JSON_PATH_FILE + atsHiveQuery.url + "#otherinfo.QUERY!queryText"); return atsJob; } protected JobImpl fillAtsJobFields(JobImpl atsJob, HiveQueryId atsHiveQuery, TezDagId atsTezDag) { atsJob.setApplicationId(atsTezDag.applicationId); if (atsHiveQuery.dagNames != null && atsHiveQuery.dagNames.size() > 0) atsJob.setDagName(atsHiveQuery.dagNames.get(0)); atsJob.setDagId(atsTezDag.entity); if (atsHiveQuery.starttime != 0) atsJob.setDateSubmitted(atsHiveQuery.starttime); atsJob.setDuration(atsHiveQuery.duration); return atsJob; } protected Job getJobByOperationId(final String opId) throws ItemNotFound { List<Job> jobs = viewJobResourceManager.readAll(new FilteringStrategy() { @Override public boolean isConform(Indexed item) { Job opHandle = (Job) item; return opHandle.getGuid().equals(opId); } @Override public String whereStatement() { return "guid='" + opId + "'"; } }); if (jobs.size() != 1) throw new ItemNotFound(); return jobs.get(0); } }