/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.publisher; import java.io.IOException; import java.util.Set; import org.apache.hadoop.fs.Path; import com.google.common.base.Preconditions; import gobblin.util.ParallelRunner; import gobblin.util.WriterUtils; import gobblin.configuration.State; import gobblin.configuration.WorkUnitState; /** * Path expected from writer: * {writerfinaldir}/{topicname}/{dbname_tablename_xxxxx} * * Publisher output path: * {publisherfinaldir}/{dbname.tablename}/{currenttimestamp} */ public class TimestampDataPublisher extends BaseDataPublisher { private final String timestamp; public TimestampDataPublisher(State state) throws IOException { super(state); timestamp = String.valueOf(System.currentTimeMillis()); } /** * Make sure directory exists before running {@link BaseDataPublisher#publishData(WorkUnitState, int, boolean, Set)} * so that tables will be moved one at a time rather than all at once */ @Override protected void publishData(WorkUnitState state, int branchId, boolean publishSingleTaskData, Set<Path> writerOutputPathsMoved) throws IOException { Path publisherOutputDir = getPublisherOutputDir(state, branchId); if (!this.publisherFileSystemByBranches.get(branchId).exists(publisherOutputDir)) { WriterUtils.mkdirsWithRecursivePermission(this.publisherFileSystemByBranches.get(branchId), publisherOutputDir, this.permissions.get(branchId)); } super.publishData(state, branchId, publishSingleTaskData, writerOutputPathsMoved); } /** * Update destination path to put db and table name in format "dbname.tablename" using {@link #getDbTableName(String)} * and include timestamp * * Input dst format: {finaldir}/{schemaName} * Output dst format: {finaldir}/{dbname.tablename}/{currenttimestamp} */ @Override protected void movePath(ParallelRunner parallelRunner, State state, Path src, Path dst, int branchId) throws IOException { String outputDir = dst.getParent().toString(); String schemaName = dst.getName(); Path newDst = new Path(new Path(outputDir, getDbTableName(schemaName)), timestamp); if (!this.publisherFileSystemByBranches.get(branchId).exists(newDst)) { WriterUtils.mkdirsWithRecursivePermission(this.publisherFileSystemByBranches.get(branchId), newDst.getParent(), this.permissions.get(branchId)); } super.movePath(parallelRunner, state, src, newDst, branchId); } /** * Translate schema name to "dbname.tablename" to use in path * * @param schemaName In format "dbname_tablename_xxxxx" * @return db and table name in format "dbname.tablename" */ private String getDbTableName(String schemaName) { Preconditions.checkArgument(schemaName.matches(".+_.+_.+")); return schemaName.replaceFirst("_", ".").substring(0, schemaName.lastIndexOf('_')); } }