/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.hive.hcatalog.api.repl; import com.google.common.base.Function; import com.google.common.base.Objects; import org.apache.commons.codec.binary.Base64; import org.apache.commons.io.IOExceptionWithCause; import org.apache.hadoop.hive.ql.parse.ReplicationSpec; import org.apache.hive.hcatalog.api.HCatDatabase; import org.apache.hive.hcatalog.api.HCatPartition; import org.apache.hive.hcatalog.api.HCatTable; import org.apache.hive.hcatalog.data.ReaderWriter; import javax.annotation.Nullable; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.DataInput; import java.io.DataInputStream; import java.io.DataOutput; import java.io.DataOutputStream; import java.io.IOException; import java.util.Map; public class ReplicationUtils { public final static String REPL_STATE_ID = ReplicationSpec.KEY.CURR_STATE_ID.toString(); private ReplicationUtils(){ // dummy private constructor, since this class is a collection of static utility methods. } /** * Gets the last known replication state of this db. This is * applicable only if it is the destination of a replication * and has had data replicated into it via imports previously. * Defaults to 0. */ public static long getLastReplicationId(HCatDatabase db){ Map<String, String> props = db.getProperties(); if (props != null){ if (props.containsKey(REPL_STATE_ID)){ return Long.parseLong(props.get(REPL_STATE_ID)); } } return 0l; // default is to return earliest possible state. } /** * Gets the last known replication state of the provided table. This * is applicable only if it is the destination of a replication * and has had data replicated into it via imports previously. * Defaults to 0. */ public static long getLastReplicationId(HCatTable tbl) { Map<String, String> tblProps = tbl.getTblProps(); if (tblProps != null){ if (tblProps.containsKey(REPL_STATE_ID)){ return Long.parseLong(tblProps.get(REPL_STATE_ID)); } } return 0l; // default is to return earliest possible state. } /** * Gets the last known replication state of the provided partition. * This is applicable only if it is the destination of a replication * and has had data replicated into it via imports previously. * If that is not available, but parent table is provided, * defaults to parent table's replication state. If that is also * unknown, defaults to 0. */ public static long getLastReplicationId(HCatPartition ptn, @Nullable HCatTable parentTable) { Map<String,String> parameters = ptn.getParameters(); if (parameters != null){ if (parameters.containsKey(REPL_STATE_ID)){ return Long.parseLong(parameters.get(REPL_STATE_ID)); } } if (parentTable != null){ return getLastReplicationId(parentTable); } return 0l; // default is to return earliest possible state. } /** * Used to generate a unique key for a combination of given event id, dbname, * tablename and partition keyvalues. This is used to feed in a name for creating * staging directories for exports and imports. This should be idempotent given * the same values, i.e. hashcode-like, but at the same time, be guaranteed to be * different for every possible partition, while being "readable-ish". Basically, * we concat the alphanumberic versions of all of the above, along with a hashcode * of the db, tablename and ptn key-value pairs */ public static String getUniqueKey(long eventId, String db, String table, Map<String, String> ptnDesc) { StringBuilder sb = new StringBuilder(); sb.append(eventId); sb.append('.'); sb.append(toStringWordCharsOnly(db)); sb.append('.'); sb.append(toStringWordCharsOnly(table)); sb.append('.'); sb.append(toStringWordCharsOnly(ptnDesc)); sb.append('.'); sb.append(Objects.hashCode(db, table, ptnDesc)); return sb.toString(); } /** * Return alphanumeric(and '_') representation of a Map<String,String> * */ private static String toStringWordCharsOnly(Map<String, String> map) { if (map == null){ return "null"; } StringBuilder sb = new StringBuilder(); boolean first = true; for (Map.Entry<String,String> e : map.entrySet()){ if (!first){ sb.append(','); } sb.append(toStringWordCharsOnly(e.getKey())); sb.append('='); sb.append(toStringWordCharsOnly(e.getValue())); first = false; } return sb.toString(); } /** * Return alphanumeric(and '_') chars only of a string, lowercased */ public static String toStringWordCharsOnly(String s){ return (s == null) ? "null" : s.replaceAll("[\\W]", "").toLowerCase(); } /** * Utility function to use in conjunction with .withDbNameMapping / .withTableNameMapping, * if we desire usage of a Map<String,String> instead of implementing a Function<String,String> */ Function<String,String> mapBasedFunction(final Map<String,String> m){ return new Function<String,String>(){ @Nullable @Override public String apply(@Nullable String s) { if ((m == null) || (!m.containsKey(s))){ return s; } return m.get(s); } }; } /** * Return a mapping from a given map function if available, and the key itself if not. */ public static String mapIfMapAvailable(String s, Function<String, String> mapping){ try { if (mapping != null){ return mapping.apply(s); } } catch (IllegalArgumentException iae){ // The key wasn't present in the mapping, and the function didn't // return a default value - ignore, and use our default. } // We return the key itself, since no mapping was available/returned return s; } public static String partitionDescriptor(Map<String,String> ptnDesc) { StringBuilder sb = new StringBuilder(); if ((ptnDesc != null) && (!ptnDesc.isEmpty())){ boolean first = true; sb.append(" PARTITION ("); for (Map.Entry e : ptnDesc.entrySet()){ if (!first){ sb.append(", "); } else { first = false; } sb.append(e.getKey()); // TODO : verify if any quoting is needed for keys sb.append('='); sb.append('"'); sb.append(e.getValue()); // TODO : verify if any escaping is needed for values sb.append('"'); } sb.append(')'); } return sb.toString(); } /** * Command implements Writable, but that's not terribly easy to use compared * to String, even if it plugs in easily into the rest of Hadoop. Provide * utility methods to easily serialize and deserialize Commands * * serializeCommand returns a base64 String representation of given command */ public static String serializeCommand(Command command) throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); DataOutput dataOutput = new DataOutputStream(baos); ReaderWriter.writeDatum(dataOutput,command.getClass().getName()); command.write(dataOutput); return Base64.encodeBase64URLSafeString(baos.toByteArray()); } /** * Command implements Writable, but that's not terribly easy to use compared * to String, even if it plugs in easily into the rest of Hadoop. Provide * utility methods to easily serialize and deserialize Commands * * deserializeCommand instantiates a concrete Command and initializes it, * given a base64 String representation of it. */ public static Command deserializeCommand(String s) throws IOException { DataInput dataInput = new DataInputStream(new ByteArrayInputStream(Base64.decodeBase64(s))); String clazz = (String) ReaderWriter.readDatum(dataInput); Command cmd; try { cmd = (Command)Class.forName(clazz).newInstance(); } catch (Exception e) { throw new IOExceptionWithCause("Error instantiating class "+clazz,e); } cmd.readFields(dataInput); return cmd; } }