/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * <p/> * http://www.apache.org/licenses/LICENSE-2.0 * <p/> * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.metastore.txn; import org.apache.hadoop.hive.common.ValidCompactorTxnList; import org.apache.hadoop.hive.common.ValidReadTxnList; import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.GetOpenTxnsInfoResponse; import org.apache.hadoop.hive.metastore.api.GetOpenTxnsResponse; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.api.TxnInfo; import org.apache.hadoop.hive.metastore.api.TxnState; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.Arrays; import java.util.BitSet; import java.util.List; import java.util.Map; public class TxnUtils { private static final Logger LOG = LoggerFactory.getLogger(TxnUtils.class); /** * Transform a {@link org.apache.hadoop.hive.metastore.api.GetOpenTxnsResponse} to a * {@link org.apache.hadoop.hive.common.ValidTxnList}. This assumes that the caller intends to * read the files, and thus treats both open and aborted transactions as invalid. * @param txns txn list from the metastore * @param currentTxn Current transaction that the user has open. If this is greater than 0 it * will be removed from the exceptions list so that the user sees his own * transaction as valid. * @return a valid txn list. */ public static ValidTxnList createValidReadTxnList(GetOpenTxnsResponse txns, long currentTxn) { /*todo: should highWater be min(currentTxn,txns.getTxn_high_water_mark()) assuming currentTxn>0 * otherwise if currentTxn=7 and 8 commits before 7, then 7 will see result of 8 which * doesn't make sense for Snapshot Isolation. Of course for Read Committed, the list should * inlude the latest committed set.*/ long highWater = txns.getTxn_high_water_mark(); List<Long> open = txns.getOpen_txns(); BitSet abortedBits = BitSet.valueOf(txns.getAbortedBits()); long[] exceptions = new long[open.size() - (currentTxn > 0 ? 1 : 0)]; int i = 0; for(long txn: open) { if (currentTxn > 0 && currentTxn == txn) continue; exceptions[i++] = txn; } if(txns.isSetMin_open_txn()) { return new ValidReadTxnList(exceptions, abortedBits, highWater, txns.getMin_open_txn()); } else { return new ValidReadTxnList(exceptions, abortedBits, highWater); } } /** * Transform a {@link org.apache.hadoop.hive.metastore.api.GetOpenTxnsInfoResponse} to a * {@link org.apache.hadoop.hive.common.ValidTxnList}. This assumes that the caller intends to * compact the files, and thus treats only open transactions as invalid. Additionally any * txnId > highestOpenTxnId is also invalid. This is to avoid creating something like * delta_17_120 where txnId 80, for example, is still open. * @param txns txn list from the metastore * @return a valid txn list. */ public static ValidTxnList createValidCompactTxnList(GetOpenTxnsInfoResponse txns) { long highWater = txns.getTxn_high_water_mark(); long minOpenTxn = Long.MAX_VALUE; long[] exceptions = new long[txns.getOpen_txnsSize()]; int i = 0; for (TxnInfo txn : txns.getOpen_txns()) { if (txn.getState() == TxnState.OPEN) { minOpenTxn = Math.min(minOpenTxn, txn.getId()); } else { //only need aborted since we don't consider anything above minOpenTxn exceptions[i++] = txn.getId(); } } if(i < exceptions.length) { exceptions = Arrays.copyOf(exceptions, i); } highWater = minOpenTxn == Long.MAX_VALUE ? highWater : minOpenTxn - 1; BitSet bitSet = new BitSet(exceptions.length); bitSet.set(0, bitSet.length()); // for ValidCompactorTxnList, everything in exceptions are aborted return new ValidCompactorTxnList(exceptions, bitSet, highWater); } /** * Get an instance of the TxnStore that is appropriate for this store * @param conf configuration * @return txn store */ public static TxnStore getTxnStore(HiveConf conf) { String className = conf.getVar(HiveConf.ConfVars.METASTORE_TXN_STORE_IMPL); try { TxnStore handler = ((Class<? extends TxnHandler>) MetaStoreUtils.getClass( className)).newInstance(); handler.setConf(conf); return handler; } catch (Exception e) { LOG.error("Unable to instantiate raw store directly in fastpath mode", e); throw new RuntimeException(e); } } /** Checks if a table is a valid ACID table. * Note, users are responsible for using the correct TxnManager. We do not look at * SessionState.get().getTxnMgr().supportsAcid() here * @param table table * @return true if table is a legit ACID table, false otherwise */ public static boolean isAcidTable(Table table) { if (table == null) { return false; } Map<String, String> parameters = table.getParameters(); String tableIsTransactional = parameters.get(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL); return tableIsTransactional != null && tableIsTransactional.equalsIgnoreCase("true"); } /** * Build a query (or queries if one query is too big) with specified "prefix" and "suffix", * while populating the IN list into multiple OR clauses, e.g. id in (1,2,3) OR id in (4,5,6) * For NOT IN case, NOT IN list is broken into multiple AND clauses. * @param queries array of complete query strings * @param prefix part of the query that comes before IN list * @param suffix part of the query that comes after IN list * @param inList the list containing IN list values * @param inColumn column name of IN list operator * @param addParens add a pair of parenthesis outside the IN lists * e.g. ( id in (1,2,3) OR id in (4,5,6) ) * @param notIn clause to be broken up is NOT IN */ public static void buildQueryWithINClause(HiveConf conf, List<String> queries, StringBuilder prefix, StringBuilder suffix, List<Long> inList, String inColumn, boolean addParens, boolean notIn) { if (inList == null || inList.size() == 0) { throw new IllegalArgumentException("The IN list is empty!"); } int batchSize = conf.getIntVar(HiveConf.ConfVars.METASTORE_DIRECT_SQL_MAX_ELEMENTS_IN_CLAUSE); int numWholeBatches = inList.size() / batchSize; StringBuilder buf = new StringBuilder(); buf.append(prefix); if (addParens) { buf.append("("); } buf.append(inColumn); if (notIn) { buf.append(" not in ("); } else { buf.append(" in ("); } for (int i = 0; i <= numWholeBatches; i++) { if (i * batchSize == inList.size()) { // At this point we just realized we don't need another query break; } if (needNewQuery(conf, buf)) { // Wrap up current query string if (addParens) { buf.append(")"); } buf.append(suffix); queries.add(buf.toString()); // Prepare a new query string buf.setLength(0); } if (i > 0) { if (notIn) { if (buf.length() == 0) { buf.append(prefix); if (addParens) { buf.append("("); } } else { buf.append(" and "); } buf.append(inColumn); buf.append(" not in ("); } else { if (buf.length() == 0) { buf.append(prefix); if (addParens) { buf.append("("); } } else { buf.append(" or "); } buf.append(inColumn); buf.append(" in ("); } } for (int j = i * batchSize; j < (i + 1) * batchSize && j < inList.size(); j++) { buf.append(inList.get(j)).append(","); } buf.setCharAt(buf.length() - 1, ')'); } if (addParens) { buf.append(")"); } buf.append(suffix); queries.add(buf.toString()); } /** Estimate if the size of a string will exceed certain limit */ private static boolean needNewQuery(HiveConf conf, StringBuilder sb) { int queryMemoryLimit = conf.getIntVar(HiveConf.ConfVars.METASTORE_DIRECT_SQL_MAX_QUERY_LENGTH); // http://www.javamex.com/tutorials/memory/string_memory_usage.shtml long sizeInBytes = 8 * (((sb.length() * 2) + 45) / 8); return sizeInBytes / 1024 > queryMemoryLimit; } }