/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.source.workunit; import java.util.List; import java.util.Locale; import org.joda.time.DateTime; import org.joda.time.DateTimeZone; import org.joda.time.format.DateTimeFormat; import org.joda.time.format.DateTimeFormatter; import com.google.common.base.Joiner; import com.google.common.base.Strings; import gobblin.configuration.ConfigurationKeys; import gobblin.configuration.SourceState; import gobblin.configuration.State; import gobblin.configuration.WorkUnitState; /** * A class representing all the base attributes required by all tables types. Subclasses * will be expected to validate each table type for their respective required attributes. * * <p> * The extract ID only needs to be unique for {@link Extract}s belonging to the same * namespace/table. One or more {@link WorkUnit}s can share the same extract ID. * {@link WorkUnit}s that do share an extract ID will be considered parts of a single * {@link Extract} for the purpose of applying publishing policies. * </p> * * @author kgoodhop * */ public class Extract extends State { public enum TableType { SNAPSHOT_ONLY, SNAPSHOT_APPEND, APPEND_ONLY } private static final DateTimeFormatter DTF = DateTimeFormat.forPattern("yyyyMMddHHmmss").withLocale(Locale.US).withZone(DateTimeZone.UTC); private final State previousTableState = new State(); /** * Constructor. * * @param state a {@link SourceState} carrying properties needed to construct an {@link Extract} * @param namespace dot separated namespace path * @param type {@link TableType} * @param table table name * * @deprecated Extract does not use any property in {@link SourceState}. * Use {@link #Extract(TableType, String, String)} */ @Deprecated public Extract(SourceState state, TableType type, String namespace, String table) { // Values should only be null for deserialization if (state != null && type != null && !Strings.isNullOrEmpty(namespace) && !Strings.isNullOrEmpty(table)) { String extractId = DTF.print(new DateTime()); super.addAll(state); super.setProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY, type.toString()); super.setProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY, namespace); super.setProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY, table); super.setProp(ConfigurationKeys.EXTRACT_EXTRACT_ID_KEY, extractId); for (WorkUnitState pre : state.getPreviousWorkUnitStates()) { Extract previousExtract = pre.getWorkunit().getExtract(); if (previousExtract.getNamespace().equals(namespace) && previousExtract.getTable().equals(table)) { this.previousTableState.addAll(pre); } } // Setting full drop date if not already specified, the value can still be overridden if required. if (state.getPropAsBoolean(ConfigurationKeys.EXTRACT_IS_FULL_KEY) && !state.contains(ConfigurationKeys.EXTRACT_FULL_RUN_TIME_KEY)) { super.setProp(ConfigurationKeys.EXTRACT_FULL_RUN_TIME_KEY, System.currentTimeMillis()); } } } /** * Constructor. * * @param type {@link TableType} * @param namespace dot separated namespace path * @param table table name */ public Extract(TableType type, String namespace, String table) { this(new SourceState(), type, namespace, table); } /** * Deep copy constructor. * * @param extract the other {@link Extract} instance */ public Extract(Extract extract) { super.addAll(extract.getProperties()); } @Override public boolean equals(Object object) { if (!(object instanceof Extract)) { return false; } Extract other = (Extract) object; return super.equals(other) && this.getNamespace().equals(other.getNamespace()) && this.getTable().equals(other.getTable()) && this.getExtractId().equals(other.getExtractId()); } @Override public int hashCode() { return (this.getNamespace() + this.getTable() + this.getExtractId()).hashCode(); } /** * Get the writer output file path corresponding to this {@link Extract}. * * @return writer output file path corresponding to this {@link Extract} */ public String getOutputFilePath() { return this.getNamespace().replaceAll("\\.", "/") + "/" + this.getTable() + "/" + this.getExtractId() + "_" + (this.getIsFull() ? "full" : "append"); } /** * If this {@link Extract} has extract table type defined. * * @return <code>true</code> if it has, <code>false</code> otherwise. */ public boolean hasType() { return contains(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY); } /** * Get the {@link TableType} of the table. * * @return {@link TableType} of the table */ public TableType getType() { return TableType.valueOf(getProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY)); } /** * Get the dot-separated namespace of the table. * * @return dot-separated namespace of the table */ public String getNamespace() { return getProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY, ""); } /** * Get the name of the table. * * @return name of the table */ public String getTable() { return getProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY, ""); } /** * Get a (non-globally) unique ID for this {@link Extract}. * * @return unique ID for this {@link Extract} */ public String getExtractId() { return getProp(ConfigurationKeys.EXTRACT_EXTRACT_ID_KEY, ""); } /** * Set a (non-globally) unique ID for this {@link Extract}. * * @param extractId unique ID for this {@link Extract} */ public void setExtractId(String extractId) { setProp(ConfigurationKeys.EXTRACT_EXTRACT_ID_KEY, extractId); } /** * Check if this {@link Extract} represents the full contents of the source table. * * @return <code>true</code> if this {@link Extract} represents the full contents * of the source table and <code>false</code> otherwise * @deprecated It is recommend to get this information from {@code WorkUnit} instead of {@code Extract}. */ @Deprecated public boolean getIsFull() { return getPropAsBoolean(ConfigurationKeys.EXTRACT_IS_FULL_KEY, false); } /** * Set full drop date from the given time. * * @param extractFullRunTime full extract time * @deprecated It is recommend to set this information in {@code WorkUnit} instead of {@code Extract}. */ @Deprecated public void setFullTrue(long extractFullRunTime) { setProp(ConfigurationKeys.EXTRACT_IS_FULL_KEY, true); setProp(ConfigurationKeys.EXTRACT_FULL_RUN_TIME_KEY, extractFullRunTime); } /** * Set primary keys. * * <p> * The order of primary keys does not matter. * </p> * * @param primaryKeyFieldName primary key names * @deprecated It is recommended to set primary keys in {@code WorkUnit} instead of {@code Extract}. */ @Deprecated public void setPrimaryKeys(String... primaryKeyFieldName) { setProp(ConfigurationKeys.EXTRACT_PRIMARY_KEY_FIELDS_KEY, Joiner.on(",").join(primaryKeyFieldName)); } /** * Add more primary keys to the existing set of primary keys. * * @param primaryKeyFieldName primary key names * @deprecated @deprecated It is recommended to add primary keys in {@code WorkUnit} instead of {@code Extract}. */ @Deprecated public void addPrimaryKey(String... primaryKeyFieldName) { StringBuilder sb = new StringBuilder(getProp(ConfigurationKeys.EXTRACT_PRIMARY_KEY_FIELDS_KEY, "")); Joiner.on(",").appendTo(sb, primaryKeyFieldName); setProp(ConfigurationKeys.EXTRACT_PRIMARY_KEY_FIELDS_KEY, sb.toString()); } /** * Get the list of primary keys. * * @return list of primary keys * @deprecated It is recommended to obtain primary keys from {@code WorkUnit} instead of {@code Extract}. */ @Deprecated public List<String> getPrimaryKeys() { return getPropAsList(ConfigurationKeys.EXTRACT_PRIMARY_KEY_FIELDS_KEY); } /** * Set delta fields. * * <p> * The order of delta fields does not matter. * </p> * * @param deltaFieldName delta field names * @deprecated It is recommended to set delta fields in {@code WorkUnit} instead of {@code Extract}. */ @Deprecated public void setDeltaFields(String... deltaFieldName) { setProp(ConfigurationKeys.EXTRACT_DELTA_FIELDS_KEY, Joiner.on(",").join(deltaFieldName)); } /** * Add more delta fields to the existing set of delta fields. * * @param deltaFieldName delta field names * @deprecated It is recommended to add delta fields in {@code WorkUnit} instead of {@code Extract}. */ @Deprecated public void addDeltaField(String... deltaFieldName) { StringBuilder sb = new StringBuilder(getProp(ConfigurationKeys.EXTRACT_DELTA_FIELDS_KEY, "")); Joiner.on(",").appendTo(sb, deltaFieldName); setProp(ConfigurationKeys.EXTRACT_DELTA_FIELDS_KEY, sb.toString()); } /** * Get the list of delta fields. * * @return list of delta fields * @deprecated It is recommended to obtain delta fields from {@code WorkUnit} instead of {@code Extract}. */ @Deprecated public List<String> getDeltaFields() { return getPropAsList(ConfigurationKeys.EXTRACT_DELTA_FIELDS_KEY); } /** * Get the previous table {@link State}. * * @return previous table {@link State} */ public State getPreviousTableState() { return this.previousTableState; } }