/* * Copyright © 2014 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.data.stream; import co.cask.cdap.api.common.Bytes; import co.cask.cdap.common.io.BinaryDecoder; import com.google.common.collect.Maps; import com.google.common.io.ByteStreams; import com.google.common.io.InputSupplier; import com.google.common.primitives.Longs; import it.unimi.dsi.fastutil.longs.LongArrayList; import it.unimi.dsi.fastutil.longs.LongList; import it.unimi.dsi.fastutil.longs.LongListIterator; import it.unimi.dsi.fastutil.longs.LongLists; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.io.InputStream; import java.util.Arrays; import java.util.Collections; import java.util.Map; /** * This class is for loading stream index file and lookup of the index. */ final class StreamDataFileIndex { private static final Logger LOG = LoggerFactory.getLogger(StreamDataFileIndex.class); private static final byte[] INDEX_MAGIC_HEADER = {'I', '1'}; // Parallel array list for holding timestamps and corresponding positions in the index. private final LongList timestamps; private final LongList positions; /** * Constructs with the given input. * * @param indexInputSupplier Provides {@link InputStream} for reading the index. */ StreamDataFileIndex(InputSupplier<? extends InputStream> indexInputSupplier) { LongList timestamps; LongList positions; // Load the whole index into memory. try { Map.Entry<LongList, LongList> index; try (InputStream indexInput = indexInputSupplier.getInput()) { index = loadIndex(indexInput); } timestamps = LongLists.unmodifiable(index.getKey()); positions = LongLists.unmodifiable(index.getValue()); } catch (IOException e) { LOG.error("Failed to load stream index. Default to empty index.", e); timestamps = LongLists.EMPTY_LIST; positions = LongLists.EMPTY_LIST; } this.timestamps = timestamps; this.positions = positions; } /** * Finds the largest event file position recorded in the index that has timestamp smaller than or equal to the given * timestamp. * * @param timestamp Stream event timestamp to search for. * @return The file position or {@code -1} if no record satisfied the requirement can be found. */ long floorPositionByTime(long timestamp) { if (timestamps.isEmpty()) { return -1; } // Binary search for a timestamp that is larger than or equals to the given timestamp. int idx = binarySearch(timestamps, timestamp); if (idx >= 0) { return positions.getLong(idx); } // Return the position that has smaller timestamp than the one to search for. // If every timestamp in the index is larger than the given one, return -1. return idx == -1 ? -1 : positions.getLong(-idx - 2); } /** * Finds the largest event file position recorded in the index that is smaller than or equal to a given offset. * * @param offset an arbitrary file offset. * @return largest event file position that is smaller than or equal to the given offset. * */ long floorPosition(long offset) { if (positions.isEmpty()) { return 0L; } int idx = binarySearch(positions, offset); if (idx >= 0) { return offset; } return idx == -1 ? 0 : positions.getLong(-idx - 2); } /** * Returns a {@link StreamDataFileIndexIterator} for iterating over all (timestamp, position) pairs. */ StreamDataFileIndexIterator indexIterator() { final LongListIterator timestampIter = timestamps.iterator(); final LongListIterator positionIter = positions.iterator(); return new StreamDataFileIndexIterator() { private long timestamp; private long position; @Override public boolean nextIndexEntry() { if (timestampIter.hasNext() && positionIter.hasNext()) { timestamp = timestampIter.nextLong(); position = positionIter.nextLong(); return true; } return false; } @Override public long currentTimestamp() { return timestamp; } @Override public long currentPosition() { return position; } }; } /** * Same contract as {@link Collections#binarySearch(java.util.List, Object)}, except that it works on LongList. */ private int binarySearch(LongList list, long target) { // Binary search for a value that is larger than or equals to the given target int low = 0; int high = list.size() - 1; while (low <= high) { int mid = ((high - low) >> 1) + low; long value = list.getLong(mid); if (value < target) { low = mid + 1; } else if (value > target) { high = mid - 1; } else { // Found, returns the index return mid; } } return -(low + 1); } private Map.Entry<LongList, LongList> loadIndex(InputStream input) throws IOException { byte[] magic = new byte[INDEX_MAGIC_HEADER.length]; ByteStreams.readFully(input, magic); if (!Arrays.equals(magic, INDEX_MAGIC_HEADER)) { throw new IOException("Unsupported index file format. Expected magic bytes as 'I' '1'"); } // Decode the properties map. In current version, it is not used. StreamUtils.decodeMap(new BinaryDecoder(input)); // Read in all index (timestamp, position pairs). LongList timestamps = new LongArrayList(1000); LongList positions = new LongArrayList(1000); byte[] buf = new byte[Longs.BYTES * 2]; while (ByteStreams.read(input, buf, 0, buf.length) == buf.length) { timestamps.add(Bytes.toLong(buf, 0)); positions.add(Bytes.toLong(buf, Longs.BYTES)); } return Maps.immutableEntry(timestamps, positions); } }