/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hive.hcatalog.api.repl;
import com.google.common.base.Function;
import org.apache.hadoop.hive.common.classification.InterfaceStability;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hive.hcatalog.api.HCatClient;
import org.apache.hive.hcatalog.api.HCatNotificationEvent;
import org.apache.hive.hcatalog.messaging.MessageFactory;
/**
* ReplicationTask captures the concept of what it'd take to replicate changes from
* one warehouse to another given a notification event that captures what changed.
*/
@InterfaceStability.Evolving
public abstract class ReplicationTask {
protected HCatNotificationEvent event;
protected StagingDirectoryProvider srcStagingDirProvider = null;
protected StagingDirectoryProvider dstStagingDirProvider = null;
protected Function<String,String> tableNameMapping = null;
protected Function<String,String> dbNameMapping = null;
protected static MessageFactory messageFactory = MessageFactory.getInstance();
private static Factory factoryInstance = null;
private static String factoryClassName = null;
public interface Factory {
public ReplicationTask create(HCatClient client, HCatNotificationEvent event);
}
private static Factory getFactoryInstance(HCatClient client) {
if (factoryInstance == null){
createFactoryInstance(client);
}
return factoryInstance;
}
/**
* Create factory instance for instantiating ReplicationTasks.
*
* The order precedence is as follows:
*
* a) If a factory has already been instantiated, and is valid, use it.
* b) If a factoryClassName has been provided, through .resetFactory(), attempt to instantiate that.
* c) If a hive.repl.task.factory has been set in the default hive conf, use that.
* d) If none of the above methods work, instantiate an anoymous factory that will return an error
* whenever called, till a user calls resetFactory.
*/
private synchronized static void createFactoryInstance(HCatClient client) {
if (factoryInstance == null){
// instantiate new factory instance only if current one is not valid.
if (factoryClassName == null){
// figure out which factory we're instantiating from HiveConf iff it's not been set on us directly.
factoryClassName = client.getConfVal(HiveConf.ConfVars.HIVE_REPL_TASK_FACTORY.varname,"");
}
try {
Class<? extends Factory> factoryClass = (Class<? extends Factory>) Class.forName(factoryClassName);
factoryInstance = factoryClass.newInstance();
} catch (Exception e) {
factoryInstance = new Factory() {
@Override
public ReplicationTask create(HCatClient client, HCatNotificationEvent event) {
throw new IllegalStateException("Error instantiating ReplicationTask.Factory " +
HiveConf.ConfVars.HIVE_REPL_TASK_FACTORY.varname+"="+factoryClassName +
". Call resetFactory() if you need to reset to a valid one.");
}
};
}
}
}
/**
* Package scoped method used for testing - allows resetting the ReplicationTaskFactory used
* @param factoryClass The new ReplicationTaskFactory to use.
*/
public static void resetFactory(Class<? extends Factory> factoryClass) {
if (factoryClass != null){
factoryClassName = factoryClass.getName();
} else {
factoryClassName = null;
}
factoryInstance = null;
}
/**
* Factory method to return appropriate subtype of ReplicationTask for given event
* @param event HCatEventMessage returned by the notification subsystem
* @return corresponding ReplicationTask
*/
public static ReplicationTask create(HCatClient client, HCatNotificationEvent event){
if (event == null){
throw new IllegalArgumentException("event should not be null");
}
return getFactoryInstance(client).create(client,event);
}
// Primary entry point is a factory method instead of ctor
// to allow for future ctor mutabulity in design
protected ReplicationTask(HCatNotificationEvent event) {
this.event = event;
}
/**
* Returns the event that this ReplicationTask is attempting to replicate
* @return underlying event
*/
public HCatNotificationEvent getEvent(){
return this.event;
}
/**
* Returns true if the replication task in question needs to create staging
* directories to complete its operation. This will mean that you will need
* to copy these directories over to the destination warehouse for each
* source-destination warehouse pair.
* If this is true, you will need to call .withSrcStagingDirProvider(...)
* and .withDstStagingDirProvider(...) before this ReplicationTask is usable
*/
public abstract boolean needsStagingDirs();
/**
* Returns true if this ReplicationTask is prepared with all info it needs, and is
* ready to be used
*/
public boolean isActionable(){
if (! this.needsStagingDirs()) {
return true;
}
if ((srcStagingDirProvider != null) && (dstStagingDirProvider != null)){
return true;
}
return false;
}
/**
* See {@link org.apache.hive.hcatalog.api.repl.StagingDirectoryProvider}
* @param srcStagingDirProvider Staging Directory Provider for the source warehouse
* @return this
*/
public ReplicationTask withSrcStagingDirProvider(StagingDirectoryProvider srcStagingDirProvider){
this.srcStagingDirProvider = srcStagingDirProvider;
return this;
}
/**
* See {@link org.apache.hive.hcatalog.api.repl.StagingDirectoryProvider}
* @param dstStagingDirProvider Staging Directory Provider for the destination warehouse
* @return this replication task
*/
public ReplicationTask withDstStagingDirProvider(StagingDirectoryProvider dstStagingDirProvider){
this.dstStagingDirProvider = dstStagingDirProvider;
return this;
}
/**
* Allows a user to specify a table name mapping, where the the function provided maps the name of
* the table in the source warehouse to the name of the table in the dest warehouse. It is expected
* that if the mapping does not exist, it should return the same name sent in. Or, if the function
* throws an IllegalArgumentException as well, a ReplicationTask will use the same key sent in.
* That way, the default will then be that the destination db name is the same as the src db name
*
* If you want to use a Map<String,String> mapping instead of a Function<String,String>,
* simply call this function as .withTableNameMapping(ReplicationUtils.mapBasedFunction(tableMap))
* @param tableNameMapping
* @return this replication task
*/
public ReplicationTask withTableNameMapping(Function<String,String> tableNameMapping){
this.tableNameMapping = tableNameMapping;
return this;
}
/**
* Allows a user to specify a db name mapping, where the the function provided maps the name of
* the db in the source warehouse to the name of the db in the dest warehouse. It is expected
* that if the mapping does not exist, it should return the same name sent in. Or, if the function
* throws an IllegalArgumentException as well, a ReplicationTask will use the same key sent in.
* That way, the default will then be that the destination db name is the same as the src db name
*
* If you want to use a Map<String,String> mapping instead of a Function<String,String>,
* simply call this function as .withDbNameMapping(ReplicationUtils.mapBasedFunction(dbMap))
* @param dbNameMapping
* @return this replication task
*/
public ReplicationTask withDbNameMapping(Function<String,String> dbNameMapping){
this.dbNameMapping = dbNameMapping;
return this;
}
protected void verifyActionable() {
if (!this.isActionable()){
throw new IllegalStateException("actionable command on task called when ReplicationTask is still not actionable.");
}
}
/**
* Returns a Iterable<Command> to send to a hive driver on the source warehouse
*
* If you *need* a List<Command> instead, you can use guava's
* ImmutableList.copyOf(iterable) or Lists.newArrayList(iterable) to
* get the underlying list, but this defeats the purpose of making this
* interface an Iterable rather than a List, since it is very likely
* that the number of Commands returned here will cause your process
* to run OOM.
*/
abstract public Iterable<? extends Command> getSrcWhCommands();
/**
* Returns a Iterable<Command> to send to a hive driver on the source warehouse
*
* If you *need* a List<Command> instead, you can use guava's
* ImmutableList.copyOf(iterable) or Lists.newArrayList(iterable) to
* get the underlying list, but this defeats the purpose of making this
* interface an Iterable rather than a List, since it is very likely
* that the number of Commands returned here will cause your process
* to run OOM.
*/
abstract public Iterable<? extends Command> getDstWhCommands();
protected void validateEventType(HCatNotificationEvent event, String allowedEventType) {
if (event == null || !allowedEventType.equals(event.getEventType())){
throw new IllegalStateException(this.getClass().getName() + " valid only for " +
allowedEventType + " events.");
}
}
}