/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.llap.cache; import org.apache.hadoop.hive.common.io.DiskRange; import org.apache.hadoop.hive.common.io.DiskRangeList; import org.apache.hadoop.hive.common.io.DataCache.BooleanRef; import org.apache.hadoop.hive.common.io.DataCache.DiskRangeListFactory; import org.apache.hadoop.hive.common.io.encoded.MemoryBuffer; public interface LowLevelCache extends LlapOomDebugDump { public enum Priority { NORMAL, HIGH } /** * Gets file data for particular offsets. The range list is modified in place; it is then * returned (since the list head could have changed). Ranges are replaced with cached ranges. * In case of partial overlap with cached data, full cache blocks are always returned; * there's no capacity for partial matches in return type. The rules are as follows: * 1) If the requested range starts in the middle of a cached range, that cached range will not * be returned by default (e.g. if [100,200) and [200,300) are cached, the request for * [150,300) will only return [200,300) from cache). This may be configurable in impls. * This is because we assume well-known range start offsets are used (rg/stripe offsets), so * a request from the middle of the start doesn't make sense. * 2) If the requested range ends in the middle of a cached range, that entire cached range will * be returned (e.g. if [100,200) and [200,300) are cached, the request for [100,250) will * return both ranges). It should really be same as #1, however currently ORC uses estimated * end offsets; we do in fact know in such cases that partially-matched cached block (rg) * can be thrown away, the reader will never touch it; but we need code in the reader to * handle such cases to avoid disk reads for these "tails" vs real unmatched ranges. * Some sort of InvalidCacheChunk could be placed to avoid them. TODO * @param base base offset for the ranges (stripe/stream offset in case of ORC). */ DiskRangeList getFileData(Object fileKey, DiskRangeList range, long baseOffset, DiskRangeListFactory factory, LowLevelCacheCounters qfCounters, BooleanRef gotAllData); /** * Puts file data into cache. * @return null if all data was put; bitmask indicating which chunks were not put otherwise; * the replacement chunks from cache are updated directly in the array. */ long[] putFileData(Object fileKey, DiskRange[] ranges, MemoryBuffer[] chunks, long baseOffset, Priority priority, LowLevelCacheCounters qfCounters); /** Notifies the cache that a particular buffer should be removed due to eviction. */ void notifyEvicted(MemoryBuffer buffer); }