/* * Copyright © 2014-2016 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.internal.app.store; import co.cask.cdap.api.app.ApplicationSpecification; import co.cask.cdap.api.common.Bytes; import co.cask.cdap.api.data.stream.StreamSpecification; import co.cask.cdap.api.dataset.table.Table; import co.cask.cdap.api.workflow.NodeStatus; import co.cask.cdap.api.workflow.WorkflowToken; import co.cask.cdap.app.runtime.ProgramController; import co.cask.cdap.common.app.RunIds; import co.cask.cdap.common.conf.CConfiguration; import co.cask.cdap.data2.dataset2.lib.table.MDSKey; import co.cask.cdap.data2.dataset2.lib.table.MetadataStoreDataset; import co.cask.cdap.internal.app.ApplicationSpecificationAdapter; import co.cask.cdap.internal.app.DefaultApplicationSpecification; import co.cask.cdap.internal.app.runtime.ProgramOptionConstants; import co.cask.cdap.internal.app.runtime.workflow.BasicWorkflowToken; import co.cask.cdap.proto.Id; import co.cask.cdap.proto.NamespaceMeta; import co.cask.cdap.proto.ProgramRunStatus; import co.cask.cdap.proto.ProgramType; import co.cask.cdap.proto.WorkflowNodeStateDetail; import co.cask.cdap.proto.WorkflowNodeThrowable; import co.cask.cdap.proto.id.ApplicationId; import co.cask.cdap.proto.id.Ids; import co.cask.cdap.proto.id.ProgramId; import co.cask.cdap.proto.id.ProgramRunId; import co.cask.tephra.TxConstants; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Function; import com.google.common.base.Predicate; import com.google.common.base.Predicates; import com.google.common.base.Stopwatch; import com.google.common.base.Ticker; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.reflect.TypeToken; import com.google.gson.Gson; import com.google.gson.GsonBuilder; import org.apache.twill.api.RunId; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.lang.reflect.Type; import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.TimeUnit; import javax.annotation.Nullable; import static com.google.common.base.Predicates.and; /** * Store for application metadata */ public class AppMetadataStore extends MetadataStoreDataset { private static final Logger LOG = LoggerFactory.getLogger(AppMetadataStore.class); private static final Gson GSON = ApplicationSpecificationAdapter.addTypeAdapters(new GsonBuilder()).create(); private static final Type MAP_STRING_STRING_TYPE = new TypeToken<Map<String, String>>() { }.getType(); private static final String TYPE_APP_META = "appMeta"; private static final String TYPE_STREAM = "stream"; private static final String TYPE_RUN_RECORD_STARTED = "runRecordStarted"; private static final String TYPE_RUN_RECORD_SUSPENDED = "runRecordSuspended"; private static final String TYPE_RUN_RECORD_COMPLETED = "runRecordCompleted"; private static final String TYPE_WORKFLOW_NODE_STATE = "wns"; private static final String TYPE_WORKFLOW_TOKEN = "wft"; private static final String TYPE_NAMESPACE = "namespace"; private final CConfiguration cConf; private static final Function<RunRecordMeta, RunId> RUN_RECORD_META_TO_RUN_ID_FUNCTION = new Function<RunRecordMeta, RunId>() { @Override public RunId apply(RunRecordMeta runRecordMeta) { return RunIds.fromString(runRecordMeta.getPid()); } }; public AppMetadataStore(Table table, CConfiguration cConf) { super(table); this.cConf = cConf; } @Override protected <T> byte[] serialize(T value) { return Bytes.toBytes(GSON.toJson(value)); } @Override protected <T> T deserialize(byte[] serialized, Type typeOfT) { return GSON.fromJson(Bytes.toString(serialized), typeOfT); } @Nullable public ApplicationMeta getApplication(String namespaceId, String appId) { return getFirst(new MDSKey.Builder().add(TYPE_APP_META, namespaceId, appId).build(), ApplicationMeta.class); } public List<ApplicationMeta> getAllApplications(String namespaceId) { return list(new MDSKey.Builder().add(TYPE_APP_META, namespaceId).build(), ApplicationMeta.class); } public void writeApplication(String namespaceId, String appId, ApplicationSpecification spec, String archiveLocation) { // NOTE: we use Gson underneath to do serde, as it doesn't serialize inner classes (which we use everywhere for // specs - see forwarding specs), we want to wrap spec with DefaultApplicationSpecification spec = DefaultApplicationSpecification.from(spec); write(new MDSKey.Builder().add(TYPE_APP_META, namespaceId, appId).build(), new ApplicationMeta(appId, spec, archiveLocation)); } public void deleteApplication(String namespaceId, String appId) { deleteAll(new MDSKey.Builder().add(TYPE_APP_META, namespaceId, appId).build()); } public void deleteApplications(String namespaceId) { deleteAll(new MDSKey.Builder().add(TYPE_APP_META, namespaceId).build()); } // todo: do we need appId? may be use from appSpec? public void updateAppSpec(String namespaceId, String appId, ApplicationSpecification spec) { // NOTE: we use Gson underneath to do serde, as it doesn't serialize inner classes (which we use everywhere for // specs - see forwarding specs), we want to wrap spec with DefaultApplicationSpecification spec = DefaultApplicationSpecification.from(spec); LOG.trace("App spec to be updated: id: {}: spec: {}", appId, GSON.toJson(spec)); MDSKey key = new MDSKey.Builder().add(TYPE_APP_META, namespaceId, appId).build(); ApplicationMeta existing = getFirst(key, ApplicationMeta.class); if (existing == null) { String msg = String.format("No meta for namespace %s app %s exists", namespaceId, appId); LOG.error(msg); throw new IllegalArgumentException(msg); } LOG.trace("Application exists in mds: id: {}, spec: {}", existing); ApplicationMeta updated = ApplicationMeta.updateSpec(existing, spec); write(key, updated); } /** * Return the {@link List} of {@link WorkflowNodeStateDetail} for a given Workflow run. */ public List<WorkflowNodeStateDetail> getWorkflowNodeStates(ProgramRunId workflowRunId) { MDSKey key = getProgramKeyBuilder(TYPE_WORKFLOW_NODE_STATE, workflowRunId.getParent().toId()) .add(workflowRunId.getRun()).build(); return list(key, WorkflowNodeStateDetail.class); } /** * This method is called to associate node state of custom action with the Workflow run. * * @param workflowRunId the run for which node state is to be added * @param nodeStateDetail node state details to be added */ public void addWorkflowNodeState(ProgramRunId workflowRunId, WorkflowNodeStateDetail nodeStateDetail) { // Node states will be stored with following key: // workflowNodeState.namespace.app.WORKFLOW.workflowName.workflowRun.workflowNodeId MDSKey key = getProgramKeyBuilder(TYPE_WORKFLOW_NODE_STATE, workflowRunId.getParent().toId()) .add(workflowRunId.getRun()).add(nodeStateDetail.getNodeId()).build(); write(key, nodeStateDetail); } private void addWorkflowNodeState(ProgramId programId, String pid, Map<String, String> systemArgs, ProgramRunStatus status, @Nullable Throwable failureCause) { String workflowNodeId = systemArgs.get(ProgramOptionConstants.WORKFLOW_NODE_ID); String workflowName = systemArgs.get(ProgramOptionConstants.WORKFLOW_NAME); String workflowRun = systemArgs.get(ProgramOptionConstants.WORKFLOW_RUN_ID); ApplicationId appId = Ids.namespace(programId.getNamespace()).app(programId.getApplication()); ProgramRunId workflowRunId = appId.workflow(workflowName).run(workflowRun); // Node states will be stored with following key: // workflowNodeState.namespace.app.WORKFLOW.workflowName.workflowRun.workflowNodeId MDSKey key = getProgramKeyBuilder(TYPE_WORKFLOW_NODE_STATE, workflowRunId.getParent().toId()) .add(workflowRun).add(workflowNodeId).build(); WorkflowNodeThrowable defaultThrowable = failureCause == null ? null : new WorkflowNodeThrowable(failureCause); WorkflowNodeStateDetail nodeStateDetail = new WorkflowNodeStateDetail(workflowNodeId, ProgramRunStatus.toNodeStatus(status), pid, defaultThrowable); write(key, nodeStateDetail); // Get the run record of the Workflow which started this program key = getWorkflowRunRecordKey(workflowRunId.getParent().toId(), workflowRunId.getRun()); RunRecordMeta record = get(key, RunRecordMeta.class); if (record != null) { // Update the parent Workflow run record by adding node id and program run id in the properties Map<String, String> properties = record.getProperties(); properties.put(workflowNodeId, pid); write(key, new RunRecordMeta(record, properties)); } } public void recordProgramStart(Id.Program program, String pid, long startTs, String twillRunId, Map<String, String> runtimeArgs, Map<String, String> systemArgs) { String workflowrunId = null; if (systemArgs != null && systemArgs.containsKey(ProgramOptionConstants.WORKFLOW_NAME)) { // Program is started by Workflow. Add row corresponding to its node state. ProgramId programId = program.toEntityId(); addWorkflowNodeState(programId, pid, systemArgs, ProgramRunStatus.RUNNING, null); workflowrunId = systemArgs.get(ProgramOptionConstants.WORKFLOW_RUN_ID); } MDSKey key = new MDSKey.Builder() .add(TYPE_RUN_RECORD_STARTED) .add(program.getNamespaceId()) .add(program.getApplicationId()) .add(program.getType().name()) .add(program.getId()) .add(pid) .build(); ImmutableMap.Builder<String, String> builder = ImmutableMap.builder(); builder.put("runtimeArgs", GSON.toJson(runtimeArgs, MAP_STRING_STRING_TYPE)); if (workflowrunId != null) { builder.put("workflowrunid", workflowrunId); } RunRecordMeta meta = new RunRecordMeta(pid, startTs, null, ProgramRunStatus.RUNNING, builder.build(), systemArgs, twillRunId); write(key, meta); } public void recordProgramSuspend(Id.Program program, String pid) { recordProgramSuspendResume(program, pid, "suspend"); } public void recordProgramResumed(Id.Program program, String pid) { recordProgramSuspendResume(program, pid, "resume"); } private void recordProgramSuspendResume(Id.Program program, String pid, String action) { String fromType = TYPE_RUN_RECORD_STARTED; String toType = TYPE_RUN_RECORD_SUSPENDED; ProgramRunStatus toStatus = ProgramRunStatus.SUSPENDED; if (action.equals("resume")) { fromType = TYPE_RUN_RECORD_SUSPENDED; toType = TYPE_RUN_RECORD_STARTED; toStatus = ProgramRunStatus.RUNNING; } MDSKey key = new MDSKey.Builder() .add(fromType) .add(program.getNamespaceId()) .add(program.getApplicationId()) .add(program.getType().name()) .add(program.getId()) .add(pid) .build(); RunRecordMeta record = get(key, RunRecordMeta.class); if (record == null) { String msg = String.format("No meta for %s run record for namespace %s app %s program type %s " + "program %s pid %s exists", action.equals("suspend") ? "started" : "suspended", program.getNamespaceId(), program.getApplicationId(), program.getType().name(), program.getId(), pid); LOG.error(msg); throw new IllegalArgumentException(msg); } deleteAll(key); key = new MDSKey.Builder() .add(toType) .add(program.getNamespaceId()) .add(program.getApplicationId()) .add(program.getType().name()) .add(program.getId()) .add(pid) .build(); write(key, new RunRecordMeta(record, null, toStatus)); } public void recordProgramStop(Id.Program program, String pid, long stopTs, ProgramRunStatus runStatus, @Nullable Throwable failureCause) { MDSKey key = new MDSKey.Builder() .add(TYPE_RUN_RECORD_STARTED) .add(program.getNamespaceId()) .add(program.getApplicationId()) .add(program.getType().name()) .add(program.getId()) .add(pid) .build(); RunRecordMeta started = getFirst(key, RunRecordMeta.class); if (started == null) { String msg = String.format("No meta for started run record for namespace %s app %s program type %s " + "program %s pid %s exists", program.getNamespaceId(), program.getApplicationId(), program.getType().name(), program.getId(), pid); LOG.error(msg); throw new IllegalArgumentException(msg); } if (started.getSystemArgs() != null && started.getSystemArgs().containsKey(ProgramOptionConstants.WORKFLOW_NAME)) { ProgramId programId = program.toEntityId(); addWorkflowNodeState(programId, pid, started.getSystemArgs(), runStatus, failureCause); } deleteAll(key); key = new MDSKey.Builder() .add(TYPE_RUN_RECORD_COMPLETED) .add(program.getNamespaceId()) .add(program.getApplicationId()) .add(program.getType().name()) .add(program.getId()) .add(getInvertedTsKeyPart(started.getStartTs())) .add(pid).build(); write(key, new RunRecordMeta(started, stopTs, runStatus)); } public List<RunRecordMeta> getRuns(ProgramRunStatus status, Predicate<RunRecordMeta> filter) { return getRuns(null, status, Long.MIN_VALUE, Long.MAX_VALUE, Integer.MAX_VALUE, filter); } private MDSKey.Builder getProgramKeyBuilder(String recordType, @Nullable Id.Program program) { MDSKey.Builder builder = new MDSKey.Builder().add(recordType); if (program != null) { builder.add(program.getNamespaceId()); builder.add(program.getApplicationId()); builder.add(program.getType().name()); builder.add(program.getId()); } return builder; } public List<RunRecordMeta> getRuns(@Nullable Id.Program program, ProgramRunStatus status, long startTime, long endTime, int limit, @Nullable Predicate<RunRecordMeta> filter) { if (status.equals(ProgramRunStatus.ALL)) { List<RunRecordMeta> resultRecords = Lists.newArrayList(); resultRecords.addAll(getActiveRuns(program, startTime, endTime, limit, filter)); resultRecords.addAll(getSuspendedRuns(program, startTime, endTime, limit - resultRecords.size(), filter)); resultRecords.addAll(getHistoricalRuns(program, status, startTime, endTime, limit - resultRecords.size(), filter)); return resultRecords; } else if (status.equals(ProgramRunStatus.RUNNING)) { return getActiveRuns(program, startTime, endTime, limit, filter); } else if (status.equals(ProgramRunStatus.SUSPENDED)) { return getSuspendedRuns(program, startTime, endTime, limit, filter); } else { return getHistoricalRuns(program, status, startTime, endTime, limit, filter); } } // TODO: getRun is duplicated in cdap-watchdog AppMetadataStore class. // Any changes made here will have to be made over there too. // JIRA https://issues.cask.co/browse/CDAP-2172 public RunRecordMeta getRun(Id.Program program, final String runid) { // Query active run record first RunRecordMeta running = getUnfinishedRun(program, TYPE_RUN_RECORD_STARTED, runid); // If program is running, this will be non-null if (running != null) { return running; } // If program is not running, query completed run records RunRecordMeta complete = getCompletedRun(program, runid); if (complete != null) { return complete; } // Else query suspended run records return getUnfinishedRun(program, TYPE_RUN_RECORD_SUSPENDED, runid); } /** * @return run records for runs that do not have start time in mds key for the run record. */ private RunRecordMeta getUnfinishedRun(Id.Program program, String recordType, String runid) { MDSKey runningKey = new MDSKey.Builder() .add(recordType) .add(program.getNamespaceId()) .add(program.getApplicationId()) .add(program.getType().name()) .add(program.getId()) .add(runid) .build(); return get(runningKey, RunRecordMeta.class); } private RunRecordMeta getCompletedRun(Id.Program program, final String runid) { MDSKey completedKey = new MDSKey.Builder() .add(TYPE_RUN_RECORD_COMPLETED) .add(program.getNamespaceId()) .add(program.getApplicationId()) .add(program.getType().name()) .add(program.getId()) .build(); // Get start time from RunId long programStartSecs = RunIds.getTime(RunIds.fromString(runid), TimeUnit.SECONDS); if (programStartSecs > -1) { // If start time is found, run a get MDSKey key = new MDSKey.Builder(completedKey) .add(getInvertedTsKeyPart(programStartSecs)) .add(runid) .build(); return get(key, RunRecordMeta.class); } else { // If start time is not found, scan the table (backwards compatibility when run ids were random UUIDs) MDSKey startKey = new MDSKey.Builder(completedKey).add(getInvertedTsScanKeyPart(Long.MAX_VALUE)).build(); MDSKey stopKey = new MDSKey.Builder(completedKey).add(getInvertedTsScanKeyPart(0)).build(); List<RunRecordMeta> runRecords = list(startKey, stopKey, RunRecordMeta.class, 1, // Should have only one record for this runid new Predicate<RunRecordMeta>() { @Override public boolean apply(RunRecordMeta input) { return input.getPid().equals(runid); } }); return Iterables.getFirst(runRecords, null); } } private List<RunRecordMeta> getSuspendedRuns(Id.Program program, long startTime, long endTime, int limit, @Nullable Predicate<RunRecordMeta> filter) { return getNonCompleteRuns(program, TYPE_RUN_RECORD_SUSPENDED, startTime, endTime, limit, filter); } private List<RunRecordMeta> getActiveRuns(Id.Program program, final long startTime, final long endTime, int limit, @Nullable Predicate<RunRecordMeta> filter) { return getNonCompleteRuns(program, TYPE_RUN_RECORD_STARTED, startTime, endTime, limit, filter); } private List<RunRecordMeta> getNonCompleteRuns(Id.Program program, String recordType, final long startTime, final long endTime, int limit, Predicate<RunRecordMeta> filter) { MDSKey activeKey = getProgramKeyBuilder(recordType, program).build(); return list(activeKey, null, RunRecordMeta.class, limit, andPredicate(new Predicate<RunRecordMeta>() { @Override public boolean apply(RunRecordMeta input) { return input.getStartTs() >= startTime && input.getStartTs() < endTime; } }, filter)); } private List<RunRecordMeta> getHistoricalRuns(Id.Program program, ProgramRunStatus status, final long startTime, final long endTime, int limit, @Nullable Predicate<RunRecordMeta> filter) { MDSKey historyKey = getProgramKeyBuilder(TYPE_RUN_RECORD_COMPLETED, program).build(); MDSKey start = new MDSKey.Builder(historyKey).add(getInvertedTsScanKeyPart(endTime)).build(); MDSKey stop = new MDSKey.Builder(historyKey).add(getInvertedTsScanKeyPart(startTime)).build(); if (status.equals(ProgramRunStatus.ALL)) { //return all records (successful and failed) return list(start, stop, RunRecordMeta.class, limit, filter == null ? Predicates.<RunRecordMeta>alwaysTrue() : filter); } if (status.equals(ProgramRunStatus.COMPLETED)) { return list(start, stop, RunRecordMeta.class, limit, andPredicate(getPredicate(ProgramController.State.COMPLETED), filter)); } if (status.equals(ProgramRunStatus.KILLED)) { return list(start, stop, RunRecordMeta.class, limit, andPredicate(getPredicate(ProgramController.State.KILLED), filter)); } return list(start, stop, RunRecordMeta.class, limit, andPredicate(getPredicate(ProgramController.State.ERROR), filter)); } private Predicate<RunRecordMeta> getPredicate(final ProgramController.State state) { return new Predicate<RunRecordMeta>() { @Override public boolean apply(RunRecordMeta record) { return record.getStatus().equals(state.getRunStatus()); } }; } private Predicate<RunRecordMeta> andPredicate(Predicate<RunRecordMeta> first, @Nullable Predicate<RunRecordMeta> second) { if (second != null) { return and(first, second); } return first; } private long getInvertedTsKeyPart(long endTime) { return Long.MAX_VALUE - endTime; } /** * Returns inverted scan key for given time. The scan key needs to be adjusted to maintain the property that * start key is inclusive and end key is exclusive on a scan. Since when you invert start key, it becomes end key and * vice-versa. */ private long getInvertedTsScanKeyPart(long time) { long invertedTsKey = getInvertedTsKeyPart(time); return invertedTsKey < Long.MAX_VALUE ? invertedTsKey + 1 : invertedTsKey; } public void writeStream(String namespaceId, StreamSpecification spec) { write(new MDSKey.Builder().add(TYPE_STREAM, namespaceId, spec.getName()).build(), spec); } public StreamSpecification getStream(String namespaceId, String name) { return getFirst(new MDSKey.Builder().add(TYPE_STREAM, namespaceId, name).build(), StreamSpecification.class); } public List<StreamSpecification> getAllStreams(String namespaceId) { return list(new MDSKey.Builder().add(TYPE_STREAM, namespaceId).build(), StreamSpecification.class); } public void deleteAllStreams(String namespaceId) { deleteAll(new MDSKey.Builder().add(TYPE_STREAM, namespaceId).build()); } public void deleteStream(String namespaceId, String name) { deleteAll(new MDSKey.Builder().add(TYPE_STREAM, namespaceId, name).build()); } public void deleteProgramHistory(String namespaceId, String appId) { deleteAll(new MDSKey.Builder().add(TYPE_RUN_RECORD_STARTED, namespaceId, appId).build()); deleteAll(new MDSKey.Builder().add(TYPE_RUN_RECORD_COMPLETED, namespaceId, appId).build()); deleteAll(new MDSKey.Builder().add(TYPE_RUN_RECORD_SUSPENDED, namespaceId, appId).build()); } public void deleteProgramHistory(String namespaceId) { deleteAll(new MDSKey.Builder().add(TYPE_RUN_RECORD_STARTED, namespaceId).build()); deleteAll(new MDSKey.Builder().add(TYPE_RUN_RECORD_COMPLETED, namespaceId).build()); deleteAll(new MDSKey.Builder().add(TYPE_RUN_RECORD_SUSPENDED, namespaceId).build()); } public void createNamespace(NamespaceMeta metadata) { write(getNamespaceKey(metadata.getName()), metadata); } public NamespaceMeta getNamespace(Id.Namespace id) { return getFirst(getNamespaceKey(id.getId()), NamespaceMeta.class); } public void deleteNamespace(Id.Namespace id) { deleteAll(getNamespaceKey(id.getId())); } public List<NamespaceMeta> listNamespaces() { return list(getNamespaceKey(null), NamespaceMeta.class); } private MDSKey getNamespaceKey(@Nullable String name) { MDSKey.Builder builder = new MDSKey.Builder().add(TYPE_NAMESPACE); if (null != name) { builder.add(name); } return builder.build(); } public void updateWorkflowToken(ProgramRunId workflowRunId, WorkflowToken workflowToken) { // Workflow token will be stored with following key: // [wft][namespace][app][WORKFLOW][workflowName][workflowRun] MDSKey key = getProgramKeyBuilder(TYPE_WORKFLOW_TOKEN, workflowRunId.getParent().toId()) .add(workflowRunId.getRun()).build(); write(key, workflowToken); } public WorkflowToken getWorkflowToken(Id.Workflow workflowId, String workflowRunId) { // Workflow token is stored with following key: // [wft][namespace][app][WORKFLOW][workflowName][workflowRun] MDSKey key = getProgramKeyBuilder(TYPE_WORKFLOW_TOKEN, workflowId).add(workflowRunId).build(); BasicWorkflowToken workflowToken = get(key, BasicWorkflowToken.class); if (workflowToken == null) { LOG.debug("No workflow token available for workflow: {}, runId: {}", workflowId, workflowRunId); // Its ok to not allow any updates by returning a 0 size token. return new BasicWorkflowToken(0); } return workflowToken; } private MDSKey getWorkflowRunRecordKey(Id.Program workflowId, String workflowRunId) { return new MDSKey.Builder() .add(TYPE_RUN_RECORD_STARTED) .add(workflowId.getNamespaceId()) .add(workflowId.getApplicationId()) .add(ProgramType.WORKFLOW.name()) .add(workflowId.getId()) .add(workflowRunId) .build(); } /** * @return programs that were running between given start and end time */ public Set<RunId> getRunningInRange(long startTimeInSecs, long endTimeInSecs) { // We have scan timeout to be half of transaction timeout to eliminate transaction timeouts during large scans. long scanTimeoutMills = TimeUnit.SECONDS.toMillis(cConf.getLong(TxConstants.Manager.CFG_TX_TIMEOUT)) / 2; LOG.trace("Scan timeout = {}ms", scanTimeoutMills); Set<RunId> runIds = new HashSet<>(); Iterables.addAll(runIds, getRunningInRangeForStatus(TYPE_RUN_RECORD_COMPLETED, startTimeInSecs, endTimeInSecs, scanTimeoutMills)); Iterables.addAll(runIds, getRunningInRangeForStatus(TYPE_RUN_RECORD_SUSPENDED, startTimeInSecs, endTimeInSecs, scanTimeoutMills)); Iterables.addAll(runIds, getRunningInRangeForStatus(TYPE_RUN_RECORD_STARTED, startTimeInSecs, endTimeInSecs, scanTimeoutMills)); return runIds; } private Iterable<RunId> getRunningInRangeForStatus(String statusKey, final long startTimeInSecs, final long endTimeInSecs, long maxScanTimeMillis) { List<Iterable<RunId>> batches = getRunningInRangeForStatus(statusKey, startTimeInSecs, endTimeInSecs, maxScanTimeMillis, Ticker.systemTicker()); return Iterables.concat(batches); } @VisibleForTesting List<Iterable<RunId>> getRunningInRangeForStatus(String statusKey, final long startTimeInSecs, final long endTimeInSecs, long maxScanTimeMillis, Ticker ticker) { // Create time filter to get running programs between start and end time Predicate<RunRecordMeta> timeFilter = new Predicate<RunRecordMeta>() { @Override public boolean apply(RunRecordMeta runRecordMeta) { // Program is running in range [startTime, endTime) if the program started before endTime // or program's stop time was after startTime return runRecordMeta.getStartTs() < endTimeInSecs && (runRecordMeta.getStopTs() == null || runRecordMeta.getStopTs() >= startTimeInSecs); } }; // Break up scans into smaller batches to prevent transaction timeout List<Iterable<RunId>> batches = new ArrayList<>(); MDSKey startKey = new MDSKey.Builder().add(statusKey).build(); MDSKey endKey = new MDSKey(Bytes.stopKeyForPrefix(startKey.getKey())); while (true) { ScanFunction scanFunction = new ScanFunction(timeFilter, ticker, maxScanTimeMillis); scanFunction.start(); scan(startKey, endKey, RunRecordMeta.class, scanFunction); // stop when scan returns zero elements if (scanFunction.getNumProcessed() == 0) { break; } batches.add(Iterables.transform(scanFunction.getValues(), RUN_RECORD_META_TO_RUN_ID_FUNCTION)); // key for next scan is the last key + 1 from the previous scan startKey = new MDSKey(Bytes.stopKeyForPrefix(scanFunction.getLastKey().getKey())); } return batches; } private static class ScanFunction implements Function<MetadataStoreDataset.KeyValue<RunRecordMeta>, Boolean> { private final Predicate<RunRecordMeta> filter; private final Stopwatch stopwatch; private final long maxScanTimeMillis; private final List<RunRecordMeta> values = new ArrayList<>(); private int numProcessed = 0; private MDSKey lastKey; public ScanFunction(Predicate<RunRecordMeta> filter, Ticker ticker, long maxScanTimeMillis) { this.filter = filter; this.maxScanTimeMillis = maxScanTimeMillis; this.stopwatch = new Stopwatch(ticker); } public void start() { stopwatch.start(); } public List<RunRecordMeta> getValues() { return Collections.unmodifiableList(values); } public int getNumProcessed() { return numProcessed; } public MDSKey getLastKey() { return lastKey; } @Override public Boolean apply(MetadataStoreDataset.KeyValue<RunRecordMeta> input) { long elapsedMillis = stopwatch.elapsedMillis(); if (elapsedMillis > maxScanTimeMillis) { return false; } ++numProcessed; lastKey = input.getKey(); if (filter.apply(input.getValue())) { values.add(input.getValue()); } return true; } } /** * Upgrade the Workflow run records. This method iterate over all Workflow run records * and create new records for Workflow token and Workflow node states based on the properties. */ public void upgradeWorkflowRunRecords() { final String workflowTokenPropertyName = "workflowToken"; String runtimeArgsPropertyName = "runtimeArgs"; MDSKey startKey = new MDSKey.Builder().add(TYPE_RUN_RECORD_COMPLETED).build(); MDSKey endKey = new MDSKey(Bytes.stopKeyForPrefix(startKey.getKey())); Predicate<RunRecordMeta> predicate = new Predicate<RunRecordMeta>() { @Override public boolean apply(@Nullable RunRecordMeta input) { return input != null && input.getProperties().containsKey(workflowTokenPropertyName); } }; Map<MDSKey, RunRecordMeta> wfRunRecords = listKV(startKey, endKey, RunRecordMeta.class, Integer.MAX_VALUE, predicate); for (Map.Entry<MDSKey, RunRecordMeta> wfRunRecord : wfRunRecords.entrySet()) { String runId = wfRunRecord.getValue().getPid(); ProgramRunId workflowRunId = getProgramIdFromRunRecordKey(wfRunRecord.getKey()).run(runId); Map<String, String> runRecordProperties = wfRunRecord.getValue().getProperties(); String workflowToken = runRecordProperties.get(workflowTokenPropertyName); updateWorkflowToken(workflowRunId, GSON.fromJson(workflowToken, BasicWorkflowToken.class)); for (Map.Entry<String, String> property : runRecordProperties.entrySet()) { if (property.getKey().equals(workflowTokenPropertyName) || property.getKey().equals(runtimeArgsPropertyName)) { // property is for workflow token or runtime argument continue; } // Property is of type - <program name, program run id> String programName = property.getKey(); String programRunId = property.getValue(); ProgramId programId = Ids.namespace(workflowRunId.getNamespace()).app(workflowRunId.getApplication()) .mr(programName); // Check if the current property is MapReduce program RunRecordMeta completedRun = getCompletedRun(programId.toId(), programRunId); if (completedRun == null) { // Check if current property is for Spark program programId = Ids.namespace(workflowRunId.getNamespace()).app(workflowRunId.getApplication()) .spark(programName); completedRun = getCompletedRun(programId.toId(), programRunId); } if (completedRun == null) { continue; } NodeStatus nodeStatus = ProgramRunStatus.toNodeStatus(completedRun.getStatus()); WorkflowNodeStateDetail nodeStateDetail = new WorkflowNodeStateDetail(programName, nodeStatus, programRunId, null); addWorkflowNodeState(workflowRunId, nodeStateDetail); } } } private ProgramId getProgramIdFromRunRecordKey(MDSKey key) { MDSKey.Splitter splitter = key.split(); // Skip the RunRecord type. splitter.skipString(); // Namespace id is the next part. String namespaceId = splitter.getString(); // Application id is the next part. String applicationId = splitter.getString(); // Program type is the next part. String programType = splitter.getString(); // Program id is the next part. String programId = splitter.getString(); return Ids.namespace(namespaceId).app(applicationId).program(ProgramType.valueOf(programType), programId); } }