/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.configuration; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.util.Properties; import java.util.Set; import com.google.common.base.Strings; import com.google.common.collect.Sets; import com.google.gson.Gson; import com.google.gson.JsonElement; import com.google.gson.JsonParser; import gobblin.broker.gobblin_scopes.GobblinScopeTypes; import gobblin.broker.iface.SharedResourcesBroker; import gobblin.broker.iface.SubscopedBrokerBuilder; import gobblin.source.extractor.Watermark; import gobblin.source.workunit.Extract; import gobblin.source.workunit.ImmutableWorkUnit; import gobblin.source.workunit.WorkUnit; import javax.annotation.Nullable; import lombok.Getter; /** * This class encapsulates a {@link WorkUnit} instance and additionally holds all the * task runtime state of that {@link WorkUnit}. * * <p> * Properties set in the encapsulated {@link WorkUnit} can be overridden at runtime, * with the original values available through the {@link #getWorkunit()} method. * Getters will return values set at task runtime if available, or the corresponding * values from encapsulated {@link WorkUnit} if they are not set at task runtime. * </p> * * @author kgoodhop */ public class WorkUnitState extends State { private static final String FINAL_CONSTRUCT_STATE_PREFIX = "construct.final.state."; private static final JsonParser JSON_PARSER = new JsonParser(); private static final Gson GSON = new Gson(); /** * Runtime state of the {@link WorkUnit}. * * <p> * The final state indicating successfully completed work is COMMITTED. * SUCCESSFUL only implies a task has finished, but doesn't imply the work * has been committed. * </p> */ public enum WorkingState { PENDING, RUNNING, SUCCESSFUL, COMMITTED, FAILED, CANCELLED, SKIPPED } private final WorkUnit workUnit; @Getter private State jobState; transient private final SharedResourcesBroker<GobblinScopeTypes> taskBroker; /** * Default constructor used for deserialization. */ public WorkUnitState() { this.workUnit = WorkUnit.createEmpty(); this.jobState = new State(); // Not available on deserialization this.taskBroker = null; } /** * Constructor. * * @param workUnit a {@link WorkUnit} instance based on which a {@link WorkUnitState} instance is constructed * @deprecated It is recommended to use {@link #WorkUnitState(WorkUnit, State)} rather than combining properties * in the job state into the workunit. */ @Deprecated public WorkUnitState(WorkUnit workUnit) { this.workUnit = workUnit; this.jobState = new State(); this.taskBroker = null; } /** * If creating a {@link WorkUnitState} for use by a task, use {@link #WorkUnitState(WorkUnit, State, SharedResourcesBroker)} * instead. */ public WorkUnitState(WorkUnit workUnit, State jobState) { this(workUnit, jobState, buildTaskBroker(null, jobState, workUnit)); } public WorkUnitState(WorkUnit workUnit, State jobState, SubscopedBrokerBuilder<GobblinScopeTypes, ?> taskBrokerBuilder) { this(workUnit, jobState, buildTaskBroker(taskBrokerBuilder, jobState, workUnit)); } public WorkUnitState(WorkUnit workUnit, State jobState, SharedResourcesBroker<GobblinScopeTypes> taskBroker) { this.workUnit = workUnit; this.jobState = jobState; this.taskBroker = taskBroker; } private static SharedResourcesBroker<GobblinScopeTypes> buildTaskBroker( SubscopedBrokerBuilder<GobblinScopeTypes, ?> taskBrokerBuilder, State jobState, WorkUnit workUnit) { return taskBrokerBuilder == null ? null : taskBrokerBuilder.build(); } /** * Get a {@link SharedResourcesBroker} scoped for this task. */ public SharedResourcesBroker<GobblinScopeTypes> getTaskBroker() { if (this.taskBroker == null) { throw new UnsupportedOperationException("Task broker is only available within a task. If this exception was thrown " + "from within a task, the JobLauncher did not specify a task broker."); } return this.taskBroker; } /** * Get a {@link SharedResourcesBroker} scoped for this task or null if it doesn't exist. This is used for internal calls. */ @Nullable public SharedResourcesBroker<GobblinScopeTypes> getTaskBrokerNullable() { return this.taskBroker; } /** * Get an {@link ImmutableWorkUnit} that wraps the internal {@link WorkUnit}. * * @return an {@link ImmutableWorkUnit} that wraps the internal {@link WorkUnit} */ public WorkUnit getWorkunit() { return new ImmutableWorkUnit(this.workUnit); } /** * Override {@link #workUnit}'s properties with new commonProps and specProps. */ public void setWuProperties(Properties commonProps, Properties specProps) { this.workUnit.setProps(commonProps, specProps); } /** * Get the current runtime state of the {@link WorkUnit}. * * @return {@link WorkingState} of the {@link WorkUnit} */ public WorkingState getWorkingState() { return WorkingState .valueOf(getProp(ConfigurationKeys.WORK_UNIT_WORKING_STATE_KEY, WorkingState.PENDING.toString())); } /** * Set the current runtime state of the {@link WorkUnit}. * * @param state {@link WorkingState} of the {@link WorkUnit} */ public void setWorkingState(WorkingState state) { setProp(ConfigurationKeys.WORK_UNIT_WORKING_STATE_KEY, state.toString()); } /** * Get the actual high {@link Watermark} as a {@link JsonElement}. * * @return a {@link JsonElement} representing the actual high {@link Watermark}, * or {@code null} if the actual high {@link Watermark} is not set. */ public JsonElement getActualHighWatermark() { if (!contains(ConfigurationKeys.WORK_UNIT_STATE_ACTUAL_HIGH_WATER_MARK_KEY)) { return null; } return JSON_PARSER.parse(getProp(ConfigurationKeys.WORK_UNIT_STATE_ACTUAL_HIGH_WATER_MARK_KEY)); } /** * Get the actual high {@link Watermark}. If the {@code WorkUnitState} does not contain the actual high watermark * (which may be caused by task failures), the low watermark in the corresponding {@link WorkUnit} will be returned. * * @param watermarkClass the watermark class for this {@code WorkUnitState}. * @param gson a {@link Gson} object used to deserialize the watermark. * @return the actual high watermark in this {@code WorkUnitState}. null is returned if this {@code WorkUnitState} * does not contain an actual high watermark, and the corresponding {@code WorkUnit} does not contain a low * watermark. */ public <T extends Watermark> T getActualHighWatermark(Class<T> watermarkClass, Gson gson) { JsonElement json = getActualHighWatermark(); if (json == null) { json = this.workUnit.getLowWatermark(); if (json == null) { return null; } } return gson.fromJson(json, watermarkClass); } /** * Get the actual high {@link Watermark}. If the {@code WorkUnitState} does not contain the actual high watermark * (which may be caused by task failures), the low watermark in the corresponding {@link WorkUnit} will be returned. * * <p>A default {@link Gson} object will be used to deserialize the watermark.</p> * * @param watermarkClass the watermark class for this {@code WorkUnitState}. * @return the actual high watermark in this {@code WorkUnitState}. null is returned if this {@code WorkUnitState} * does not contain an actual high watermark, and the corresponding {@code WorkUnit} does not contain a low * watermark. */ public <T extends Watermark> T getActualHighWatermark(Class<T> watermarkClass) { return getActualHighWatermark(watermarkClass, GSON); } /** * This method should set the actual, runtime high {@link Watermark} for this {@link WorkUnitState}. A high * {@link Watermark} indicates that all data for the source has been pulled up to a specific point. * * <p> * This method should be called inside the {@link gobblin.source.extractor.Extractor} class, during the initialization * of the class, before any calls to {@link gobblin.source.extractor.Extractor#readRecord(Object)} are executed. This * method keeps a local point to the given {@link Watermark} and expects the following invariant to always be upheld. * The invariant for this {@link Watermark} is that it should cover all records up to and including the most recent * record returned by {@link gobblin.source.extractor.Extractor#readRecord(Object)}. * </p> * <p> * The {@link Watermark} set in this method may be polled by the framework multiple times, in order to track the * progress of how the {@link Watermark} changes. This is important for reporting percent completion of a * {@link gobblin.source.workunit.WorkUnit}. * </p> * * TODO - Once we are ready to make a backwards incompatible change to the {@link gobblin.source.extractor.Extractor} * interface, this method should become part of the {@link gobblin.source.extractor.Extractor} interface. For example, * a method such as getCurrentHighWatermark() should be added. */ public void setActualHighWatermark(Watermark watermark) { /** * TODO * * Hack until a state-store migration can be done. The watermark is converted to a {@link String} and then stored * internally in via a configuration key. Once a state-store migration can be done, the {@link Watermark} can be * stored as Binary JSON. */ setProp(ConfigurationKeys.WORK_UNIT_STATE_ACTUAL_HIGH_WATER_MARK_KEY, watermark.toJson().toString()); } /** * Backoff the actual high watermark to the low watermark returned by {@link WorkUnit#getLowWatermark()}. */ public void backoffActualHighWatermark() { JsonElement lowWatermark = this.workUnit.getLowWatermark(); if (lowWatermark == null) { return; } setProp(ConfigurationKeys.WORK_UNIT_STATE_ACTUAL_HIGH_WATER_MARK_KEY, lowWatermark.toString()); } /** * Get the high watermark as set in {@link gobblin.source.extractor.Extractor}. * * @return high watermark * @deprecated use {@link #getActualHighWatermark}. */ @Deprecated public long getHighWaterMark() { return getPropAsLong(ConfigurationKeys.WORK_UNIT_STATE_RUNTIME_HIGH_WATER_MARK, ConfigurationKeys.DEFAULT_WATERMARK_VALUE); } /** * Set the high watermark. * * @param value high watermark * @deprecated use {@link #setActualHighWatermark(Watermark)}. */ @Deprecated public void setHighWaterMark(long value) { setProp(ConfigurationKeys.WORK_UNIT_STATE_RUNTIME_HIGH_WATER_MARK, value); } @Override public Properties getProperties() { Properties props = new Properties(); props.putAll(this.jobState.getProperties()); props.putAll(this.workUnit.getProperties()); props.putAll(super.getProperties()); return props; } @Override public String getProp(String key) { String value = super.getProp(key); if (value == null) { value = this.workUnit.getProp(key); } if (value == null) { value = this.jobState.getProp(key); } return value; } @Override public String getProp(String key, String def) { String value = super.getProp(key); if (value == null) { value = this.workUnit.getProp(key); } if (value == null) { value = this.jobState.getProp(key, def); } return value; } /** * @deprecated Use {@link #getProp(String)} */ @Deprecated @Override protected String getProperty(String key) { return getProp(key); } /** * @deprecated Use {@link #getProp(String, String)} */ @Deprecated @Override protected String getProperty(String key, String def) { return getProp(key, def); } @Override public Set<String> getPropertyNames() { Set<String> set = Sets.newHashSet(super.getPropertyNames()); set.addAll(this.workUnit.getPropertyNames()); set.addAll(this.jobState.getPropertyNames()); return set; } @Override public boolean contains(String key) { return super.contains(key) || this.workUnit.contains(key) || this.jobState.contains(key); } /** * Get the {@link gobblin.source.workunit.Extract} associated with the {@link WorkUnit}. * * @return {@link gobblin.source.workunit.Extract} associated with the {@link WorkUnit} */ public Extract getExtract() { return new Extract(this.workUnit.getExtract()); } /** * Get properties set in the previous run for the same table as the {@link WorkUnit}. * * @return properties as a {@link State} object */ public State getPreviousTableState() { return getExtract().getPreviousTableState(); } public void setJobState(State jobState) { this.jobState = jobState; } @Override public void readFields(DataInput in) throws IOException { this.workUnit.readFields(in); super.readFields(in); } @Override public void write(DataOutput out) throws IOException { this.workUnit.write(out); super.write(out); } @Override public boolean equals(Object object) { if (!(object instanceof WorkUnitState)) { return false; } WorkUnitState other = (WorkUnitState) object; return ((this.workUnit == null && other.workUnit == null) || (this.workUnit != null && this.workUnit.equals(other.workUnit))) && ((this.jobState == null && other.jobState == null) || (this.jobState != null && this.jobState.equals(other.jobState))) && super.equals(other); } @Override public int hashCode() { final int prime = 31; int result = super.hashCode(); result = prime * result + (this.workUnit == null ? 0 : this.workUnit.hashCode()); return result; } @Override public String toString() { return super.toString() + "\nWorkUnit: " + getWorkunit().toString() + "\nExtract: " + getExtract().toString() + "\nJobState: " + this.jobState.toString(); } /** * Adds all properties from {@link gobblin.configuration.State} to this {@link gobblin.configuration.WorkUnitState}. * * <p> * A property with name "property" will be added to this object with the key * "{@link #FINAL_CONSTRUCT_STATE_PREFIX}[.<infix>].property" * </p> * * @param infix Optional infix used for the name of the property in the {@link gobblin.configuration.WorkUnitState}. * @param finalConstructState {@link gobblin.configuration.State} for which all properties should be added to this * object. */ public void addFinalConstructState(String infix, State finalConstructState) { for (String property : finalConstructState.getPropertyNames()) { if (Strings.isNullOrEmpty(infix)) { setProp(FINAL_CONSTRUCT_STATE_PREFIX + property, finalConstructState.getProp(property)); } else { setProp(FINAL_CONSTRUCT_STATE_PREFIX + infix + "." + property, finalConstructState.getProp(property)); } } } /** * Builds a State containing all properties added with {@link #addFinalConstructState} * to this {@link gobblin.configuration.WorkUnitState}. All such properties will be stripped of * {@link #FINAL_CONSTRUCT_STATE_PREFIX} but not of any infixes. * * <p> * For example, if state={sample.property: sampleValue} * then * <pre> * {@code * this.addFinalConstructState("infix",state); * this.getFinalConstructState(); * } * </pre> * will return state={infix.sample.property: sampleValue} * </p> * * @return State containing all properties added with {@link #addFinalConstructState}. */ public State getFinalConstructStates() { State constructState = new State(); for (String property : getPropertyNames()) { if (property.startsWith(FINAL_CONSTRUCT_STATE_PREFIX)) { constructState.setProp(property.substring(FINAL_CONSTRUCT_STATE_PREFIX.length()), getProp(property)); } } return constructState; } }