/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.io.orc; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import org.apache.commons.codec.binary.Hex; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.api.MetadataPpdResult; import org.apache.hadoop.hive.ql.exec.SerializationUtilities; import org.apache.hadoop.hive.ql.io.HdfsUtils; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.FooterCache; import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg; import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.shims.HadoopShims.HdfsFileStatusWithId; import org.apache.orc.impl.OrcTail; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.esotericsoftware.kryo.Kryo; import com.esotericsoftware.kryo.io.Output; import com.google.common.collect.Lists; /** Metastore-based footer cache storing serialized footers. Also has a local cache. */ public class ExternalCache implements FooterCache { private static final Logger LOG = LoggerFactory.getLogger(ExternalCache.class); private static boolean isDebugEnabled = LOG.isDebugEnabled(); private final LocalCache localCache; private final ExternalFooterCachesByConf externalCacheSrc; private boolean isWarnLogged = false; // Configuration and things set from it. private HiveConf conf; private boolean isInTest; private SearchArgument sarg; private ByteBuffer sargIsOriginal, sargNotIsOriginal; private boolean isPpdEnabled; public ExternalCache(LocalCache lc, ExternalFooterCachesByConf efcf) { localCache = lc; externalCacheSrc = efcf; } @Override public void put(OrcInputFormat.FooterCacheKey key, OrcTail orcTail) throws IOException { localCache.put(key.getPath(), orcTail); if (key.getFileId() != null) { try { externalCacheSrc.getCache(conf).putFileMetadata(Lists.newArrayList(key.getFileId()), Lists.newArrayList(orcTail.getSerializedTail())); } catch (HiveException e) { throw new IOException(e); } } } @Override public boolean isBlocking() { return true; } @Override public boolean hasPpd() { return isPpdEnabled; } public void configure(HiveConf queryConfig) { this.conf = queryConfig; this.sarg = ConvertAstToSearchArg.createFromConf(conf); this.isPpdEnabled = HiveConf.getBoolVar(conf, ConfVars.HIVEOPTINDEXFILTER) && HiveConf.getBoolVar(conf, ConfVars.HIVE_ORC_MS_FOOTER_CACHE_PPD); this.isInTest = HiveConf.getBoolVar(conf, ConfVars.HIVE_IN_TEST); this.sargIsOriginal = this.sargNotIsOriginal = null; } @Override public void getAndValidate(List<HdfsFileStatusWithId> files, boolean isOriginal, OrcTail[] result, ByteBuffer[] ppdResult) throws IOException, HiveException { assert result.length == files.size(); assert ppdResult == null || ppdResult.length == files.size(); // First, check the local cache. localCache.getAndValidate(files, isOriginal, result, ppdResult); // posMap is an unfortunate consequence of batching/iterating thru MS results. HashMap<Long, Integer> posMap = new HashMap<Long, Integer>(); // We won't do metastore-side PPD for the things we have locally. List<Long> fileIds = determineFileIdsToQuery(files, result, posMap); // Need to get a new one, see the comment wrt threadlocals. ExternalFooterCachesByConf.Cache cache = externalCacheSrc.getCache(conf); ByteBuffer serializedSarg = null; if (isPpdEnabled) { serializedSarg = getSerializedSargForMetastore(isOriginal); } if (serializedSarg != null) { Iterator<Entry<Long, MetadataPpdResult>> iter = cache.getFileMetadataByExpr( fileIds, serializedSarg, false); // don't fetch the footer, PPD happens in MS. while (iter.hasNext()) { Entry<Long, MetadataPpdResult> e = iter.next(); int ix = getAndVerifyIndex(posMap, files, result, e.getKey()); processPpdResult(e.getValue(), files.get(ix), ix, result, ppdResult); } } else { // Only populate corrupt IDs for the things we couldn't deserialize if we are not using // ppd. We assume that PPD makes sure the cached values are correct (or fails otherwise); // also, we don't use the footers in PPD case. List<Long> corruptIds = null; Iterator<Entry<Long, ByteBuffer>> iter = cache.getFileMetadata(fileIds); while (iter.hasNext()) { Entry<Long, ByteBuffer> e = iter.next(); int ix = getAndVerifyIndex(posMap, files, result, e.getKey()); if (!processBbResult(e.getValue(), ix, files.get(ix), result)) { if (corruptIds == null) { corruptIds = new ArrayList<>(); } corruptIds.add(e.getKey()); } } if (corruptIds != null) { cache.clearFileMetadata(corruptIds); } } } private int getAndVerifyIndex(HashMap<Long, Integer> posMap, List<HdfsFileStatusWithId> files, OrcTail[] result, Long fileId) { int ix = posMap.get(fileId); assert result[ix] == null; assert fileId != null && fileId.equals(files.get(ix).getFileId()); return ix; } private boolean processBbResult( ByteBuffer bb, int ix, HdfsFileStatusWithId file, OrcTail[] result) throws IOException { if (bb == null) return true; result[ix] = createOrcTailFromMs(file, bb); if (result[ix] == null) { return false; } localCache.put(file.getFileStatus().getPath(), result[ix]); return true; } private void processPpdResult(MetadataPpdResult mpr, HdfsFileStatusWithId file, int ix, OrcTail[] result, ByteBuffer[] ppdResult) throws IOException { if (mpr == null) return; // This file is unknown to metastore. ppdResult[ix] = mpr.isSetIncludeBitset() ? mpr.bufferForIncludeBitset() : NO_SPLIT_AFTER_PPD; if (mpr.isSetMetadata()) { result[ix] = createOrcTailFromMs(file, mpr.bufferForMetadata()); if (result[ix] != null) { localCache.put(file.getFileStatus().getPath(), result[ix]); } } } private List<Long> determineFileIdsToQuery( List<HdfsFileStatusWithId> files, OrcTail[] result, HashMap<Long, Integer> posMap) { for (int i = 0; i < result.length; ++i) { if (result[i] != null) continue; HdfsFileStatusWithId file = files.get(i); final FileStatus fs = file.getFileStatus(); Long fileId = file.getFileId(); if (fileId == null) { if (!isInTest) { if (!isWarnLogged || isDebugEnabled) { LOG.warn("Not using metastore cache because fileId is missing: " + fs.getPath()); isWarnLogged = true; } continue; } fileId = generateTestFileId(fs, files, i); LOG.info("Generated file ID " + fileId + " at " + i); } posMap.put(fileId, i); } return Lists.newArrayList(posMap.keySet()); } private Long generateTestFileId(final FileStatus fs, List<HdfsFileStatusWithId> files, int i) { final Long fileId = HdfsUtils.createFileId(fs.getPath().toUri().getPath(), fs, false, null); files.set(i, new HdfsFileStatusWithId() { @Override public FileStatus getFileStatus() { return fs; } @Override public Long getFileId() { return fileId; } }); return fileId; } private ByteBuffer getSerializedSargForMetastore(boolean isOriginal) { if (sarg == null) return null; ByteBuffer serializedSarg = isOriginal ? sargIsOriginal : sargNotIsOriginal; if (serializedSarg != null) return serializedSarg; SearchArgument sarg2 = sarg; Kryo kryo = SerializationUtilities.borrowKryo(); try { if ((isOriginal ? sargNotIsOriginal : sargIsOriginal) == null) { sarg2 = kryo.copy(sarg2); // In case we need it for the other case. } translateSargToTableColIndexes(sarg2, conf, OrcInputFormat.getRootColumn(isOriginal)); ExternalCache.Baos baos = new Baos(); Output output = new Output(baos); kryo.writeObject(output, sarg2); output.flush(); serializedSarg = baos.get(); if (isOriginal) { sargIsOriginal = serializedSarg; } else { sargNotIsOriginal = serializedSarg; } } finally { SerializationUtilities.releaseKryo(kryo); } return serializedSarg; } /** * Modifies the SARG, replacing column names with column indexes in target table schema. This * basically does the same thing as all the shennannigans with included columns, except for the * last step where ORC gets direct subtypes of root column and uses the ordered match to map * table columns to file columns. The numbers put into predicate leaf should allow to go into * said subtypes directly by index to get the proper index in the file. * This won't work with schema evolution, although it's probably much easier to reason about * if schema evolution was to be supported, because this is a clear boundary between table * schema columns and all things ORC. None of the ORC stuff is used here and none of the * table schema stuff is used after that - ORC doesn't need a bunch of extra crap to apply * the SARG thus modified. */ public static void translateSargToTableColIndexes( SearchArgument sarg, Configuration conf, int rootColumn) { String nameStr = OrcInputFormat.getNeededColumnNamesString(conf), idStr = OrcInputFormat.getSargColumnIDsString(conf); String[] knownNames = nameStr.split(","); String[] idStrs = (idStr == null) ? null : idStr.split(","); assert idStrs == null || knownNames.length == idStrs.length; HashMap<String, Integer> nameIdMap = new HashMap<>(); for (int i = 0; i < knownNames.length; ++i) { Integer newId = (idStrs != null) ? Integer.parseInt(idStrs[i]) : i; Integer oldId = nameIdMap.put(knownNames[i], newId); if (oldId != null && oldId.intValue() != newId.intValue()) { throw new RuntimeException("Multiple IDs for " + knownNames[i] + " in column strings: [" + idStr + "], [" + nameStr + "]"); } } List<PredicateLeaf> leaves = sarg.getLeaves(); for (int i = 0; i < leaves.size(); ++i) { PredicateLeaf pl = leaves.get(i); Integer colId = nameIdMap.get(pl.getColumnName()); String newColName = RecordReaderImpl.encodeTranslatedSargColumn(rootColumn, colId); SearchArgumentFactory.setPredicateLeafColumn(pl, newColName); } if (LOG.isDebugEnabled()) { LOG.debug("SARG translated into " + sarg); } } private static OrcTail createOrcTailFromMs( HdfsFileStatusWithId file, ByteBuffer bb) throws IOException { if (bb == null) return null; FileStatus fs = file.getFileStatus(); ByteBuffer copy = bb.duplicate(); try { OrcTail orcTail = ReaderImpl.extractFileTail(copy, fs.getLen(), fs.getModificationTime()); // trigger lazy read of metadata to make sure serialized data is not corrupted and readable orcTail.getStripeStatistics(); return orcTail; } catch (Exception ex) { byte[] data = new byte[bb.remaining()]; System.arraycopy(bb.array(), bb.arrayOffset() + bb.position(), data, 0, data.length); String msg = "Failed to parse the footer stored in cache for file ID " + file.getFileId() + " " + bb + " [ " + Hex.encodeHexString(data) + " ]"; LOG.error(msg, ex); return null; } } private static final class Baos extends ByteArrayOutputStream { public ByteBuffer get() { return ByteBuffer.wrap(buf, 0, count); } } /** An abstraction for testing ExternalCache in OrcInputFormat. */ public interface ExternalFooterCachesByConf { public interface Cache { Iterator<Map.Entry<Long, MetadataPpdResult>> getFileMetadataByExpr(List<Long> fileIds, ByteBuffer serializedSarg, boolean doGetFooters) throws HiveException; void clearFileMetadata(List<Long> fileIds) throws HiveException; Iterator<Map.Entry<Long, ByteBuffer>> getFileMetadata(List<Long> fileIds) throws HiveException; void putFileMetadata( ArrayList<Long> keys, ArrayList<ByteBuffer> values) throws HiveException; } public Cache getCache(HiveConf conf) throws IOException; } }