/* * Copyright (c) 2012 Diamond Light Source Ltd. * * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html */ package uk.ac.diamond.scisoft.analysis.io; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.channels.FileChannel; import java.nio.channels.FileChannel.MapMode; import java.util.ArrayList; import org.eclipse.dawnsci.analysis.api.io.ScanFileHolderException; import org.eclipse.january.dataset.Dataset; import org.eclipse.january.dataset.DatasetFactory; import org.eclipse.january.dataset.ILazyDataset; import org.eclipse.january.metadata.IMetadata; import org.eclipse.january.metadata.Metadata; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import uk.ac.diamond.scisoft.analysis.io.NumPyFile.DataTypeInfo; /** * Reads files in npy format as defined here; http://svn.scipy.org/svn/numpy/trunk/doc/neps/npy-format.txt */ public class NumPyFileLoader extends AbstractFileLoader { private static final String NUMPY_NAME = "NumPy file"; private static final Logger logger = LoggerFactory.getLogger(NumPyFileLoader.class); public NumPyFileLoader() { } /** * @param fileName */ public NumPyFileLoader(String fileName) { this.fileName = fileName; } @Override protected void clearMetadata() { } @Override public DataHolder loadFile() throws ScanFileHolderException { DataHolder output = new DataHolder(); File f = null; FileInputStream fi = null; try { f = new File(fileName); fi = new FileInputStream(f); ByteBuffer fBuffer; FileChannel fc = null; // use on Non-windows only if (System.getProperty("os.name").contains("Windows")) { // This is a workaround for bug 4715154, see http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4715154 // Can't use fc.map, so load the whole file in a byte array instead // For small files it is likely that this alternative method is faster anyway, the problem is on // big files that temporarily a large amount of extra memory is needed. long fileSizeLong = f.length(); if (fileSizeLong > Integer.MAX_VALUE) throw new IOException("File too big " + f.getName()); int fileSize = (int) fileSizeLong; byte[] bytes = new byte[fileSize]; int offset = 0; int count = 0; while (offset < fileSize) { count = fi.read(bytes, offset, fileSize - offset); if (count >= 0) offset += count; else throw new IOException("Can't read file " + f.getName()); } fBuffer = ByteBuffer.wrap(bytes); } else { fc = fi.getChannel(); fBuffer = fc.map(MapMode.READ_ONLY, 0, fc.size()); } ILazyDataset data = loadDataset(f, fBuffer); if (fc != null) fc.close(); output.addDataset(NUMPY_NAME, data); if (loadMetadata) output.setMetadata(metadata); } catch (Exception ex) { if (ex instanceof ScanFileHolderException) throw (ScanFileHolderException) ex; throw new ScanFileHolderException("There was a problem reading the NumPy file", ex); } finally { if (fi != null) try { fi.close(); } catch (IOException e) { logger.error("Exception when closing file", e); } } return output; } protected ILazyDataset loadDataset(File f, ByteBuffer fBuffer) throws ScanFileHolderException { fBuffer.order(ByteOrder.LITTLE_ENDIAN); DataTypeInfo dataTypeInfo = getDataInfo(fBuffer); int dtype = dataTypeInfo .dType; int isize = dataTypeInfo.iSize; boolean unsigned = dataTypeInfo.unsigned; int[] shape = dataTypeInfo.getShape(); int rank = shape.length; if (loadMetadata) metadata = createMetadata(f.getAbsolutePath(), dataTypeInfo); ILazyDataset data; if (loadLazily) { data = createLazyDataset(NUMPY_NAME, dtype, shape, new NumPyFileLoader(fileName)); } else { int tSize = isize; for (int j = 0; j < rank; j++) { tSize *= shape[j]; } data = RawBinaryLoader.loadRawDataset(fBuffer, dtype, isize, tSize, shape); if (unsigned) data = DatasetFactory.createFromObject(unsigned, data); } return data; } private static DataTypeInfo getDataInfo(ByteBuffer fBuffer) throws ScanFileHolderException { for (int i = 0; i < NumPyFile.magic.length; i++) { byte b = fBuffer.get(); if (NumPyFile.magic[i] != b) { throw new ScanFileHolderException("File does not start npy magic number/version"); } } short header_len = fBuffer.getShort(); byte[] formatBytes = new byte[header_len]; fBuffer.get(formatBytes); String format; try { format = new String(formatBytes, "US-ASCII"); } catch (UnsupportedEncodingException e) { throw new ScanFileHolderException("Impossible error, US-ASCII is always available?", e); } // parse format // format looks like this, and always in this order: // {'descr': '<i4', 'fortran_order': False, 'shape': (100,), } // or: // {'descr': '<i4', 'fortran_order': False, 'shape': (100, 100), } String[] kvs = format.split(", ", 3); String[] descrPair = kvs[0].split(": "); String description = descrPair[1].substring(1, descrPair[1].length() - 1); String[] forOrdPair = kvs[1].split(": "); Boolean fortran_order = Boolean.parseBoolean(forOrdPair[1]); String[] shapePair = kvs[2].split(": "); String shapeTupleStr = shapePair[1].substring(1, shapePair[1].lastIndexOf(')')); String[] shapeTupleStrArray = shapeTupleStr.split(", ?"); ArrayList<Integer> shapeList = new ArrayList<Integer>(); if (shapeTupleStrArray.length == 1 && "".equals(shapeTupleStrArray[0])) { shapeList.add(1); } else { for (int i = 0; i < shapeTupleStrArray.length; i++) { shapeList.add(Integer.parseInt(shapeTupleStrArray[i].replace("L", ""))); } } int rank = shapeList.size(); int[] shape = new int[rank]; for (int j = 0; j < rank; j++) { shape[j] = shapeList.get(j); } if (fortran_order) { throw new ScanFileHolderException("Only Non-fortran order is supported"); } // Figure out the Data Set type from the description string NumPyFile.DataTypeInfo dataTypeInfo = NumPyFile.dataTypeMap.get(description); if (dataTypeInfo == null) { throw new ScanFileHolderException("Unknown/unsupported data type description: " + description); } dataTypeInfo.setShape(shape); return dataTypeInfo; } /** * Load a NumPy file and return its contained DataSet. * <p> * Provided as a convenience method * * @param fileName * @return loaded IDataset * @throws ScanFileHolderException * if the file failed to load as a NumPy file */ public static Dataset loadFileHelper(String fileName) throws ScanFileHolderException { NumPyFileLoader fileLoader = new NumPyFileLoader(fileName); DataHolder dataHolder = fileLoader.loadFile(); Dataset dataset = dataHolder.getDataset(0); return dataset; } private IMetadata createMetadata(String path, DataTypeInfo info) { IMetadata md = new Metadata(); md.setFilePath(path); md.addDataInfo(path, info.getShape()); return md; } }