H5tiledLayoutBB.java example

Explorer
thredds-master
/*
 * Copyright 1998-2009 University Corporation for Atmospheric Research/Unidata
 *
 * Portions of this software were developed by the Unidata Program at the
 * University Corporation for Atmospheric Research.
 *
 * Access and use of this software shall impose the following obligations
 * and understandings on the user. The user is granted the right, without
 * any fee or cost, to use, copy, modify, alter, enhance and distribute
 * this software, and any derivative works thereof, and its supporting
 * documentation for any purpose whatsoever, provided that this entire
 * notice appears in all copies of the software, derivative works and
 * supporting documentation.  Further, UCAR requests that the user credit
 * UCAR/Unidata in any publications that result from the use of this
 * software or in any product that includes this software. The names UCAR
 * and/or Unidata, however, may not be used in any advertising or publicity
 * to endorse or promote any products or commercial entity unless specific
 * written permission is obtained from UCAR/Unidata. The user also
 * understands that UCAR/Unidata is not obligated to provide the user with
 * any support, consulting, training or assistance of any kind with regard
 * to the use, operation and performance of this software nor to provide
 * the user with any updates, revisions, new versions or "bug fixes."
 *
 * THIS SOFTWARE IS PROVIDED BY UCAR/UNIDATA "AS IS" AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL UCAR/UNIDATA BE LIABLE FOR ANY SPECIAL,
 * INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
 * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
 * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
 * WITH THE ACCESS, USE OR PERFORMANCE OF THIS SOFTWARE.
 */
package ucar.nc2.iosp.hdf5;

import com.google.common.primitives.Ints;
import com.google.common.primitives.Longs;
import ucar.ma2.DataType;
import ucar.ma2.InvalidRangeException;
import ucar.ma2.Section;
import ucar.nc2.Variable;
import ucar.nc2.iosp.LayoutBB;
import ucar.nc2.iosp.LayoutBBTiled;
import ucar.nc2.util.IO;
import ucar.unidata.io.RandomAccessFile;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;

/**
 * Iterator to read/write subsets of an array.
 * This calculates byte offsets for HD5 chunked datasets.
 * Assumes that the data is stored in chunks, indexed by a Btree.
 * for filtered data
 *
 * @author caron
 */
class H5tiledLayoutBB implements LayoutBB {
  private LayoutBBTiled delegate;

  private RandomAccessFile raf;
  private H5header.Filter[] filters;
  private ByteOrder byteOrder;
                                                                                                     
  private Section want;
  private int[] chunkSize; // from the StorageLayout message (exclude the elemSize)
  private int elemSize; // last dimension of the StorageLayout message
  private int nChunkDims;

  private boolean debug = false;

  /**
   * Constructor.
   * This is for HDF5 chunked data storage. The data is read by chunk, for efficency.
   *
   * @param v2          Variable to index over; assumes that vinfo is the data object
   * @param wantSection the wanted section of data, contains a List of Range objects. must be complete
   * @param raf the RandomAccessFile
   * @param filters set of filters that have been applied to the data
   * @throws InvalidRangeException if section invalid for this variable
   * @throws java.io.IOException   on io error
   */
  H5tiledLayoutBB(Variable v2, Section wantSection, RandomAccessFile raf, H5header.Filter[] filters, ByteOrder byteOrder) throws InvalidRangeException, IOException {
    wantSection = Section.fill(wantSection, v2.getShape());

    H5header.Vinfo vinfo = (H5header.Vinfo) v2.getSPobject();
    assert vinfo.isChunked;
    assert vinfo.btree != null;

    this.raf = raf;
    this.filters = filters;
    this.byteOrder = byteOrder;

    // we have to translate the want section into the same rank as the storageSize, in order to be able to call
    // Section.intersect(). It appears that storageSize (actually msl.chunkSize) may have an extra dimension, reletive
    // to the Variable.
    DataType dtype = v2.getDataType();
    if ((dtype == DataType.CHAR) && (wantSection.getRank() < vinfo.storageSize.length))
      this.want = new Section(wantSection).appendRange(1);
    else
      this.want = wantSection;

    // one less chunk dimension, except in the case of char
    nChunkDims = (dtype == DataType.CHAR) ? vinfo.storageSize.length : vinfo.storageSize.length - 1;
    this.chunkSize = new int[nChunkDims];
    System.arraycopy(vinfo.storageSize, 0, chunkSize, 0, nChunkDims);
    this.elemSize = vinfo.storageSize[vinfo.storageSize.length - 1]; // last one is always the elements size

    // create the data chunk iterator
    DataBTree.DataChunkIterator iter = vinfo.btree.getDataChunkIteratorFilter(this.want);
    DataChunkIterator dcIter = new DataChunkIterator(iter);
    delegate = new LayoutBBTiled(dcIter, chunkSize, elemSize, this.want);
    
    if (debug) System.out.println(" H5tiledLayout: " + this);
  }

  public long getTotalNelems() {
    return delegate.getTotalNelems();
  }

  public int getElemSize() {
    return delegate.getElemSize();
  }

  public boolean hasNext() {
    return delegate.hasNext();
  }

  public Chunk next() throws IOException {
    return delegate.next();
  }

  public String toString() {
    StringBuilder sbuff = new StringBuilder();
    sbuff.append("want=").append(want).append("; ");
    sbuff.append("chunkSize=[");
    for (int i = 0; i < chunkSize.length; i++) {
      if (i > 0) sbuff.append(",");
      sbuff.append(chunkSize[i]);
    }
    sbuff.append("] totalNelems=").append(getTotalNelems());
    sbuff.append(" elemSize=").append(elemSize);
    return sbuff.toString();
  }

  private class DataChunkIterator implements LayoutBBTiled.DataChunkIterator {
    DataBTree.DataChunkIterator delegate;

    DataChunkIterator(DataBTree.DataChunkIterator delegate) {
      this.delegate = delegate;
    }

    public boolean hasNext() {
      return delegate.hasNext();
    }

    public LayoutBBTiled.DataChunk next() throws IOException {
      return new DataChunk(delegate.next());
    }
  }

  private class DataChunk implements ucar.nc2.iosp.LayoutBBTiled.DataChunk {
    // Copied from ArrayList.
    private static final int MAX_ARRAY_LEN = Integer.MAX_VALUE - 8;

    DataBTree.DataChunk delegate;

    DataChunk(DataBTree.DataChunk delegate) {
      this.delegate = delegate;

      // Check that the chunk length (delegate.size) isn't greater than the maximum array length that we can
      // allocate (MAX_ARRAY_LEN). This condition manifests in two ways.
      // 1) According to the HDF docs (https://www.hdfgroup.org/HDF5/doc/Advanced/Chunking/, "Chunk Maximum Limits"),
      //    max chunk length is 4GB (i.e. representable in an unsigned int). Java, however, only has signed ints.
      //    So, if we try to store a large unsigned int in a singed int, it'll overflow, and the signed int will come
      //    out negative. We're trusting here that the chunk size read from the HDF file is never negative.
      // 2) In most JVM implementations MAX_ARRAY_LEN is actually less than Integer.MAX_VALUE (see note in ArrayList).
      //    So, we could have: "MAX_ARRAY_LEN < chunkSize <= Integer.MAX_VALUE".
      if (delegate.size < 0 || delegate.size > MAX_ARRAY_LEN) {
        // We want to report the size of the chunk, but we may be in an arithmetic overflow situation. So to get the
        // correct value, we're going to reinterpet the integer's bytes as long bytes.
        byte[] intBytes = Ints.toByteArray(delegate.size);
        byte[] longBytes = new byte[8];
        System.arraycopy(intBytes, 0, longBytes, 4, 4);   // Copy int bytes to the lowest 4 positions.
        long chunkSize = Longs.fromByteArray(longBytes);  // Method requires an array of length 8.

        throw new IllegalArgumentException(String.format("Filtered data chunk is %s bytes and we must load it all " +
                "into memory. However the maximum length of a byte array in Java is %s.", chunkSize, MAX_ARRAY_LEN));
      }
    }

    public int[] getOffset() {
      int[] offset = delegate.offset;
      if (offset.length > nChunkDims) { // may have to eliminate last offset
        offset = new int[nChunkDims];
        System.arraycopy(delegate.offset, 0, offset, 0, nChunkDims);
      }
      return offset;
    }

    public ByteBuffer getByteBuffer() throws IOException {
      try {
        // read the data
        byte[] data = new byte[delegate.size];
        raf.seek(delegate.filePos);
        raf.readFully(data);

        // apply filters backwards
        for (int i = filters.length - 1; i >= 0; i--) {
          H5header.Filter f = filters[i];
          if (isBitSet(delegate.filterMask, i)) {
            if (debug) System.out.println("skip for chunk " + delegate);
            continue;
          }
          if (f.id == 1) {
            data = inflate(data);
          } else if (f.id == 2) {
            data = shuffle(data, f.data[0]);
          } else if (f.id == 3) {
            data = checkfletcher32(data);
          } else
            throw new RuntimeException("Unknown filter type="+f.id);
        }

        ByteBuffer result = ByteBuffer.wrap(data);
        result.order(byteOrder);
        return result;
      } catch (OutOfMemoryError e) {
        Error oom =  new OutOfMemoryError("Ran out of memory trying to read HDF5 filtered chunk. Either increase the " +
                "JVM's heap size (use the -Xmx switch) or reduce the size of the dataset's chunks (use nccopy -c).");
        oom.initCause(e);  // OutOfMemoryError lacks a constructor with a cause parameter.
        throw oom;
      }
    }

    /**
     * inflate data
     *
     * @param compressed compressed data
     * @return uncompressed data
     * @throws IOException on I/O error
     */
    private byte[] inflate(byte[] compressed) throws IOException {
      // run it through the Inflator
      ByteArrayInputStream in = new ByteArrayInputStream(compressed);
      java.util.zip.InflaterInputStream inflater = new java.util.zip.InflaterInputStream(in);
      ByteArrayOutputStream out = new ByteArrayOutputStream(
              Math.min(8 * compressed.length, MAX_ARRAY_LEN));  // Fixes KXL-349288
      IO.copy(inflater, out);

      byte[] uncomp = out.toByteArray();
      if (debug) System.out.println(" inflate bytes in= " + compressed.length + " bytes out= " + uncomp.length);
      return uncomp;
    }

     // just strip off the 4-byte fletcher32 checksum at the end
    private byte[] checkfletcher32(byte[] org) throws IOException {
      byte[] result = new byte[org.length-4];
      System.arraycopy(org, 0, result, 0, result.length);
      if (debug) System.out.println(" checkfletcher32 bytes in= " + org.length + " bytes out= " + result.length);
      return result;
    }

    private byte[] shuffle(byte[] data, int n) throws IOException {
      if (debug) System.out.println(" shuffle bytes in= " + data.length + " n= " + n);

      assert data.length % n == 0;
      if (n <= 1) return data;

      int m = data.length / n;
      int[] count = new int[n];
      for (int k = 0; k < n; k++) count[k] = k * m;

      byte[] result = new byte[data.length];
      /* for (int i = 0; i < data.length; i += n) {
        for (int k = 0; k < n; k++) {
          result[count[k]++] = data[i + k];
        }
      } */

      for (int i = 0; i < m; i++) {
        for (int j = 0; j < n; j++) {
          result[i*n+j] = data[i + count[j]];
        }
      }

      return result;
    }

    boolean isBitSet(int val, int bitno) {
      return ((val >>> bitno) & 1) != 0;
    }

  }


}