/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.store; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; import org.apache.lucene.util.BitUtil; /** * Abstract base class for performing read operations of Lucene's low-level * data types. * * <p>{@code DataInput} may only be used from one thread, because it is not * thread safe (it keeps internal state like file position). To allow * multithreaded use, every {@code DataInput} instance must be cloned before * used in another thread. Subclasses must therefore implement {@link #clone()}, * returning a new {@code DataInput} which operates on the same underlying * resource, but positioned independently. */ public abstract class DataInput implements Cloneable { private static final int SKIP_BUFFER_SIZE = 1024; /* This buffer is used to skip over bytes with the default implementation of * skipBytes. The reason why we need to use an instance member instead of * sharing a single instance across threads is that some delegating * implementations of DataInput might want to reuse the provided buffer in * order to eg. update the checksum. If we shared the same buffer across * threads, then another thread might update the buffer while the checksum is * being computed, making it invalid. See LUCENE-5583 for more information. */ private byte[] skipBuffer; /** Reads and returns a single byte. * @see DataOutput#writeByte(byte) */ public abstract byte readByte() throws IOException; /** Reads a specified number of bytes into an array at the specified offset. * @param b the array to read bytes into * @param offset the offset in the array to start storing bytes * @param len the number of bytes to read * @see DataOutput#writeBytes(byte[],int) */ public abstract void readBytes(byte[] b, int offset, int len) throws IOException; /** Reads a specified number of bytes into an array at the * specified offset with control over whether the read * should be buffered (callers who have their own buffer * should pass in "false" for useBuffer). Currently only * {@link BufferedIndexInput} respects this parameter. * @param b the array to read bytes into * @param offset the offset in the array to start storing bytes * @param len the number of bytes to read * @param useBuffer set to false if the caller will handle * buffering. * @see DataOutput#writeBytes(byte[],int) */ public void readBytes(byte[] b, int offset, int len, boolean useBuffer) throws IOException { // Default to ignoring useBuffer entirely readBytes(b, offset, len); } /** Reads two bytes and returns a short. * @see DataOutput#writeByte(byte) */ public short readShort() throws IOException { return (short) (((readByte() & 0xFF) << 8) | (readByte() & 0xFF)); } /** Reads four bytes and returns an int. * @see DataOutput#writeInt(int) */ public int readInt() throws IOException { return ((readByte() & 0xFF) << 24) | ((readByte() & 0xFF) << 16) | ((readByte() & 0xFF) << 8) | (readByte() & 0xFF); } /** Reads an int stored in variable-length format. Reads between one and * five bytes. Smaller values take fewer bytes. Negative numbers are not * supported. * <p> * The format is described further in {@link DataOutput#writeVInt(int)}. * * @see DataOutput#writeVInt(int) */ public int readVInt() throws IOException { /* This is the original code of this method, * but a Hotspot bug (see LUCENE-2975) corrupts the for-loop if * readByte() is inlined. So the loop was unwinded! byte b = readByte(); int i = b & 0x7F; for (int shift = 7; (b & 0x80) != 0; shift += 7) { b = readByte(); i |= (b & 0x7F) << shift; } return i; */ byte b = readByte(); if (b >= 0) return b; int i = b & 0x7F; b = readByte(); i |= (b & 0x7F) << 7; if (b >= 0) return i; b = readByte(); i |= (b & 0x7F) << 14; if (b >= 0) return i; b = readByte(); i |= (b & 0x7F) << 21; if (b >= 0) return i; b = readByte(); // Warning: the next ands use 0x0F / 0xF0 - beware copy/paste errors: i |= (b & 0x0F) << 28; if ((b & 0xF0) == 0) return i; throw new IOException("Invalid vInt detected (too many bits)"); } /** * Read a {@link BitUtil#zigZagDecode(int) zig-zag}-encoded * {@link #readVInt() variable-length} integer. * @see DataOutput#writeZInt(int) */ public int readZInt() throws IOException { return BitUtil.zigZagDecode(readVInt()); } /** Reads eight bytes and returns a long. * @see DataOutput#writeLong(long) */ public long readLong() throws IOException { return (((long)readInt()) << 32) | (readInt() & 0xFFFFFFFFL); } /** Reads a long stored in variable-length format. Reads between one and * nine bytes. Smaller values take fewer bytes. Negative numbers are not * supported. * <p> * The format is described further in {@link DataOutput#writeVInt(int)}. * * @see DataOutput#writeVLong(long) */ public long readVLong() throws IOException { return readVLong(false); } private long readVLong(boolean allowNegative) throws IOException { /* This is the original code of this method, * but a Hotspot bug (see LUCENE-2975) corrupts the for-loop if * readByte() is inlined. So the loop was unwinded! byte b = readByte(); long i = b & 0x7F; for (int shift = 7; (b & 0x80) != 0; shift += 7) { b = readByte(); i |= (b & 0x7FL) << shift; } return i; */ byte b = readByte(); if (b >= 0) return b; long i = b & 0x7FL; b = readByte(); i |= (b & 0x7FL) << 7; if (b >= 0) return i; b = readByte(); i |= (b & 0x7FL) << 14; if (b >= 0) return i; b = readByte(); i |= (b & 0x7FL) << 21; if (b >= 0) return i; b = readByte(); i |= (b & 0x7FL) << 28; if (b >= 0) return i; b = readByte(); i |= (b & 0x7FL) << 35; if (b >= 0) return i; b = readByte(); i |= (b & 0x7FL) << 42; if (b >= 0) return i; b = readByte(); i |= (b & 0x7FL) << 49; if (b >= 0) return i; b = readByte(); i |= (b & 0x7FL) << 56; if (b >= 0) return i; if (allowNegative) { b = readByte(); i |= (b & 0x7FL) << 63; if (b == 0 || b == 1) return i; throw new IOException("Invalid vLong detected (more than 64 bits)"); } else { throw new IOException("Invalid vLong detected (negative values disallowed)"); } } /** * Read a {@link BitUtil#zigZagDecode(long) zig-zag}-encoded * {@link #readVLong() variable-length} integer. Reads between one and ten * bytes. * @see DataOutput#writeZLong(long) */ public long readZLong() throws IOException { return BitUtil.zigZagDecode(readVLong(true)); } /** Reads a string. * @see DataOutput#writeString(String) */ public String readString() throws IOException { int length = readVInt(); final byte[] bytes = new byte[length]; readBytes(bytes, 0, length); return new String(bytes, 0, length, StandardCharsets.UTF_8); } /** Returns a clone of this stream. * * <p>Clones of a stream access the same data, and are positioned at the same * point as the stream they were cloned from. * * <p>Expert: Subclasses must ensure that clones may be positioned at * different points in the input from each other and from the stream they * were cloned from. */ @Override public DataInput clone() { try { return (DataInput) super.clone(); } catch (CloneNotSupportedException e) { throw new Error("This cannot happen: Failing to clone DataInput"); } } /** * Reads a Map<String,String> previously written * with {@link DataOutput#writeMapOfStrings(Map)}. * @return An immutable map containing the written contents. */ public Map<String,String> readMapOfStrings() throws IOException { int count = readVInt(); if (count == 0) { return Collections.emptyMap(); } else if (count == 1) { return Collections.singletonMap(readString(), readString()); } else { Map<String,String> map = count > 10 ? new HashMap<>() : new TreeMap<>(); for (int i = 0; i < count; i++) { final String key = readString(); final String val = readString(); map.put(key, val); } return Collections.unmodifiableMap(map); } } /** * Reads a Set<String> previously written * with {@link DataOutput#writeSetOfStrings(Set)}. * @return An immutable set containing the written contents. */ public Set<String> readSetOfStrings() throws IOException { int count = readVInt(); if (count == 0) { return Collections.emptySet(); } else if (count == 1) { return Collections.singleton(readString()); } else { Set<String> set = count > 10 ? new HashSet<>() : new TreeSet<>(); for (int i = 0; i < count; i++) { set.add(readString()); } return Collections.unmodifiableSet(set); } } /** * Skip over <code>numBytes</code> bytes. The contract on this method is that it * should have the same behavior as reading the same number of bytes into a * buffer and discarding its content. Negative values of <code>numBytes</code> * are not supported. */ public void skipBytes(final long numBytes) throws IOException { if (numBytes < 0) { throw new IllegalArgumentException("numBytes must be >= 0, got " + numBytes); } if (skipBuffer == null) { skipBuffer = new byte[SKIP_BUFFER_SIZE]; } assert skipBuffer.length == SKIP_BUFFER_SIZE; for (long skipped = 0; skipped < numBytes; ) { final int step = (int) Math.min(SKIP_BUFFER_SIZE, numBytes - skipped); readBytes(skipBuffer, 0, step, false); skipped += step; } } }