/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * <p> * http://www.apache.org/licenses/LICENSE-2.0 * <p> * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.ambari.view.hive20.resources.jobs; import akka.actor.ActorRef; import org.apache.ambari.view.hive20.actor.message.job.SaveDagInformation; import org.apache.ambari.view.hive20.persistence.utils.FilteringStrategy; import org.apache.ambari.view.hive20.persistence.utils.Indexed; import org.apache.ambari.view.hive20.persistence.utils.ItemNotFound; import org.apache.ambari.view.hive20.persistence.utils.OnlyOwnersFilteringStrategy; import org.apache.ambari.view.hive20.resources.IResourceManager; import org.apache.ambari.view.hive20.resources.files.FileService; import org.apache.ambari.view.hive20.resources.jobs.atsJobs.HiveQueryId; import org.apache.ambari.view.hive20.resources.jobs.atsJobs.IATSParser; import org.apache.ambari.view.hive20.resources.jobs.atsJobs.TezDagId; import org.apache.ambari.view.hive20.resources.jobs.viewJobs.Job; import org.apache.ambari.view.hive20.resources.jobs.viewJobs.JobImpl; import org.apache.ambari.view.hive20.resources.jobs.viewJobs.JobInfo; import org.apache.commons.beanutils.PropertyUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.lang.reflect.InvocationTargetException; import java.util.HashMap; import java.util.LinkedList; import java.util.List; /** * View Jobs and ATS Jobs aggregator. * There are 4 options: * 1) ATS ExecuteJob without operationId * *Meaning*: executed outside of HS2 * - ExecuteJob info only from ATS * 2) ATS ExecuteJob with operationId * a) Hive View ExecuteJob with same operationId is not present * *Meaning*: executed with HS2 * - ExecuteJob info only from ATS * b) Hive View ExecuteJob with operationId is present (need to merge) * *Meaning*: executed with HS2 through Hive View * - ExecuteJob info merged from ATS and from Hive View DataStorage * 3) ExecuteJob present only in Hive View, ATS does not have it * *Meaning*: executed through Hive View, but Hadoop ExecuteJob was not created * it can happen if user executes query without aggregation, like just "select * from TABLE" * - ExecuteJob info only from Hive View */ public class Aggregator { protected final static Logger LOG = LoggerFactory.getLogger(Aggregator.class); private final IATSParser ats; private IResourceManager<Job> viewJobResourceManager; private final ActorRef operationController; public Aggregator(IResourceManager<Job> jobResourceManager, IATSParser ats, ActorRef operationController) { this.viewJobResourceManager = jobResourceManager; this.ats = ats; this.operationController = operationController; } /** * gets all the jobs for 'username' where the job submission time is between 'startTime' (inclusive) * and endTime (exclusive). * Fetches the jobs from ATS and DB merges and update DB. returns the combined list. * * @param username: username for which jobs have to be fetched. * @param startTime: inclusive, time in secs from epoch * @param endTime: exclusive, time in secs from epoch * @return: list of jobs */ public List<Job> readAllForUserByTime(String username, long startTime, long endTime) { List<HiveQueryId> queryIdList = ats.getHiveQueryIdsForUserByTime(username, startTime, endTime); List<Job> allJobs = fetchDagsAndMergeJobs(queryIdList); List<Job> dbOnlyJobs = readDBOnlyJobs(username, queryIdList, startTime, endTime); allJobs.addAll(dbOnlyJobs); return allJobs; } /** * fetches the new state of jobs from ATS and from DB. Does merging/updating as required. * @param jobInfos: infos of job to get * @return: list of updated Job */ public List<Job> readJobsByIds(List<JobInfo> jobInfos) { //categorize jobs List<String> jobsWithHiveIds = new LinkedList<>(); List<String> dbOnlyJobs = new LinkedList<>(); for (JobInfo jobInfo : jobInfos) { if (null == jobInfo.getHiveId() || jobInfo.getHiveId().trim().isEmpty()) { dbOnlyJobs.add(jobInfo.getJobId()); } else { jobsWithHiveIds.add(jobInfo.getHiveId()); } } List<HiveQueryId> queryIdList = ats.getHiveQueryIdByEntityList(jobsWithHiveIds); List<Job> allJobs = fetchDagsAndMergeJobs(queryIdList); List<Job> dbJobs = readJobsFromDbByJobId(dbOnlyJobs); allJobs.addAll(dbJobs); return allJobs; } /** * gets the jobs from the Database given their id * @param jobsIds: list of ids of jobs * @return: list of all the jobs found */ private List<Job> readJobsFromDbByJobId(List<String> jobsIds) { List<Job> jobs = new LinkedList<>(); for (final String jid : jobsIds) { try { Job job = getJobFromDbByJobId(jid); jobs.add(job); } catch (ItemNotFound itemNotFound) { LOG.error("Error while finding job with id : {}", jid, itemNotFound); } } return jobs; } /** * fetches the job from DB given its id * @param jobId: the id of the job to fetch * @return: the job * @throws ItemNotFound: if job with given id is not found in db */ private Job getJobFromDbByJobId(final String jobId) throws ItemNotFound { if (null == jobId) return null; List<Job> jobs = viewJobResourceManager.readAll(new FilteringStrategy() { @Override public boolean isConform(Indexed item) { return item.getId().equals(jobId); } @Override public String whereStatement() { return "id = '" + jobId + "'"; // even IDs are string } }); if (null != jobs && !jobs.isEmpty()) return jobs.get(0); throw new ItemNotFound(String.format("Job with id %s not found.", jobId)); } /** * returns all the jobs from ATS and DB (for this instance) for the given user. * @param username * @return */ public List<Job> readAll(String username) { List<HiveQueryId> queries = ats.getHiveQueryIdsForUser(username); LOG.debug("HiveQueryIds fetched : {}", queries); List<Job> allJobs = fetchDagsAndMergeJobs(queries); List<Job> dbOnlyJobs = readDBOnlyJobs(username, queries, null, null); LOG.debug("Jobs only present in DB: {}", dbOnlyJobs); allJobs.addAll(dbOnlyJobs); return allJobs; } /** * reads all the jobs from DB for username and excludes the jobs mentioned in queries list * @param username : username for which the jobs are to be read. * @param queries : the jobs to exclude * @param startTime: can be null, if not then the window start time for job * @param endTime: can be null, if not then the window end time for job * @return : the jobs in db that are not in the queries */ private List<Job> readDBOnlyJobs(String username, List<HiveQueryId> queries, Long startTime, Long endTime) { List<Job> dbOnlyJobs = new LinkedList<>(); HashMap<String, String> operationIdVsHiveId = new HashMap<>(); for (HiveQueryId hqid : queries) { operationIdVsHiveId.put(hqid.operationId, hqid.entity); } LOG.debug("operationIdVsHiveId : {} ", operationIdVsHiveId); //cover case when operationId is present, but not exists in ATS //e.g. optimized queries without executing jobs, like "SELECT * FROM TABLE" List<Job> jobs = viewJobResourceManager.readAll(new OnlyOwnersFilteringStrategy(username)); for (Job job : jobs) { if (null != startTime && null != endTime && null != job.getDateSubmitted() && (job.getDateSubmitted() < startTime || job.getDateSubmitted() >= endTime || operationIdVsHiveId.containsKey(job.getGuid())) ) { continue; // don't include this in the result } else { dbOnlyJobs.add(job); } } return dbOnlyJobs; } private List<Job> fetchDagsAndMergeJobs(List<HiveQueryId> queries) { List<Job> allJobs = new LinkedList<Job>(); for (HiveQueryId atsHiveQuery : queries) { JobImpl atsJob = null; if (hasOperationId(atsHiveQuery)) { try { Job viewJob = getJobByOperationId(atsHiveQuery.operationId); TezDagId atsTezDag = getTezDagFromHiveQueryId(atsHiveQuery); atsJob = mergeHiveAtsTez(atsHiveQuery, atsTezDag, viewJob); } catch (ItemNotFound itemNotFound) { LOG.error("Ignore : {}", itemNotFound.getMessage()); continue; } } else { TezDagId atsTezDag = getTezDagFromHiveQueryId(atsHiveQuery); atsJob = atsOnlyJob(atsHiveQuery, atsTezDag); } atsJob.setHiveQueryId(atsHiveQuery.entity); allJobs.add(atsJob); } return allJobs; } /** * @param atsHiveQuery * @param atsTezDag * @param viewJob * @return */ private JobImpl mergeHiveAtsTez(HiveQueryId atsHiveQuery, TezDagId atsTezDag, Job viewJob) throws ItemNotFound { saveJobInfoIfNeeded(atsHiveQuery, atsTezDag, viewJob); return mergeAtsJobWithViewJob(atsHiveQuery, atsTezDag, viewJob); } public Job readATSJob(Job viewJob) throws ItemNotFound { if (viewJob.getStatus().equals(Job.JOB_STATE_INITIALIZED) || viewJob.getStatus().equals(Job.JOB_STATE_UNKNOWN)) return viewJob; String hexGuid = viewJob.getGuid(); HiveQueryId atsHiveQuery = ats.getHiveQueryIdByOperationId(hexGuid); TezDagId atsTezDag = getTezDagFromHiveQueryId(atsHiveQuery); saveJobInfoIfNeeded(atsHiveQuery, atsTezDag, viewJob, true); return mergeAtsJobWithViewJob(atsHiveQuery, atsTezDag, viewJob); } private TezDagId getTezDagFromHiveQueryId(HiveQueryId atsHiveQuery) { TezDagId atsTezDag; if (atsHiveQuery.version >= HiveQueryId.ATS_15_RESPONSE_VERSION) { atsTezDag = ats.getTezDAGByEntity(atsHiveQuery.entity); } else if (atsHiveQuery.dagNames != null && atsHiveQuery.dagNames.size() > 0) { String dagName = atsHiveQuery.dagNames.get(0); atsTezDag = ats.getTezDAGByName(dagName); } else { atsTezDag = new TezDagId(); } return atsTezDag; } protected boolean hasOperationId(HiveQueryId atsHiveQuery) { return atsHiveQuery.operationId != null; } protected JobImpl mergeAtsJobWithViewJob(HiveQueryId atsHiveQuery, TezDagId atsTezDag, Job viewJob) { JobImpl atsJob; try { atsJob = new JobImpl(PropertyUtils.describe(viewJob)); } catch (IllegalAccessException e) { LOG.error("Can't instantiate JobImpl", e); return null; } catch (InvocationTargetException e) { LOG.error("Can't instantiate JobImpl", e); return null; } catch (NoSuchMethodException e) { LOG.error("Can't instantiate JobImpl", e); return null; } fillAtsJobFields(atsJob, atsHiveQuery, atsTezDag); return atsJob; } protected void saveJobInfoIfNeeded(HiveQueryId hiveQueryId, TezDagId tezDagId, Job viewJob) throws ItemNotFound { saveJobInfoIfNeeded(hiveQueryId, tezDagId, viewJob, false); } protected void saveJobInfoIfNeeded(HiveQueryId hiveQueryId, TezDagId tezDagId, Job viewJob, boolean useActorSystem) throws ItemNotFound { boolean updateDb = false; String dagName = null; String dagId = null; String applicationId = null; if (viewJob.getDagName() == null || viewJob.getDagName().isEmpty()) { if (hiveQueryId.dagNames != null && hiveQueryId.dagNames.size() > 0) { dagName = hiveQueryId.dagNames.get(0); updateDb = true; } } if (tezDagId.status != null && (tezDagId.status.compareToIgnoreCase(Job.JOB_STATE_UNKNOWN) != 0) && !viewJob.getStatus().equalsIgnoreCase(tezDagId.status)) { dagId = tezDagId.entity; applicationId = tezDagId.applicationId; updateDb = true; } if(updateDb) { if (useActorSystem) { LOG.info("Saving DAG information via actor system for job id: {}", viewJob.getId()); operationController.tell(new SaveDagInformation(viewJob.getId(), dagName, dagId, applicationId), ActorRef.noSender()); } else { viewJob.setDagName(dagName); viewJob.setDagId(dagId); viewJob.setApplicationId(applicationId); viewJobResourceManager.update(viewJob, viewJob.getId()); } } } protected JobImpl atsOnlyJob(HiveQueryId atsHiveQuery, TezDagId atsTezDag) { JobImpl atsJob = new JobImpl(); atsJob.setId(atsHiveQuery.entity); fillAtsJobFields(atsJob, atsHiveQuery, atsTezDag); String query = atsHiveQuery.query; atsJob.setTitle(query.substring(0, (query.length() > 42) ? 42 : query.length())); atsJob.setQueryFile(FileService.JSON_PATH_FILE + atsHiveQuery.url + "#otherinfo.QUERY!queryText"); return atsJob; } protected JobImpl fillAtsJobFields(JobImpl atsJob, HiveQueryId atsHiveQuery, TezDagId atsTezDag) { atsJob.setApplicationId(atsTezDag.applicationId); if (atsHiveQuery.dagNames != null && atsHiveQuery.dagNames.size() > 0) atsJob.setDagName(atsHiveQuery.dagNames.get(0)); atsJob.setDagId(atsTezDag.entity); if (atsHiveQuery.starttime != 0) atsJob.setDateSubmitted(atsHiveQuery.starttime); atsJob.setDuration(atsHiveQuery.duration); return atsJob; } protected Job getJobByOperationId(final String opId) throws ItemNotFound { List<Job> jobs = viewJobResourceManager.readAll(new FilteringStrategy() { @Override public boolean isConform(Indexed item) { Job opHandle = (Job) item; return opHandle.getGuid().equals(opId); } @Override public String whereStatement() { return "guid='" + opId + "'"; } }); if (jobs.size() != 1) throw new ItemNotFound(); return jobs.get(0); } }