/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package edu.harvard.iq.dataverse.dataaccess;
import edu.harvard.iq.dataverse.DataFile;
import edu.harvard.iq.dataverse.datavariable.DataVariable;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import java.util.logging.Logger;
/**
*
* @author Leonid Andreev
*/
public class TabularSubsetInputStream extends InputStream {
private static final Logger logger = Logger.getLogger(TabularSubsetInputStream.class.getCanonicalName());
private TabularSubsetGenerator subsetGenerator = null;
private int numberOfSubsetVariables;
private int numberOfObservations;
private int numberOfObservationsRead = 0;
private byte[] leftoverBytes = null;
public TabularSubsetInputStream(DataFile datafile, List<DataVariable> variables) throws IOException {
if (datafile == null) {
throw new IOException("Null datafile in subset request");
}
if (!datafile.isTabularData()) {
throw new IOException("Subset requested on a non-tabular data file");
}
numberOfObservations = datafile.getDataTable().getCaseQuantity().intValue();
if (variables == null || variables.size() < 1) {
throw new IOException("Null or empty list of variables in subset request.");
}
numberOfSubsetVariables = variables.size();
subsetGenerator = new TabularSubsetGenerator(datafile, variables);
}
//@Override
public int read() throws IOException {
throw new IOException("read() method not implemented; do not use.");
}
//@Override
public int read(byte[] b) throws IOException {
// TODO:
// Move this code into TabularSubsetGenerator
logger.fine("subset input stream: read request, on a "+b.length+" byte buffer;");
if (numberOfSubsetVariables == 1) {
logger.fine("calling the single variable subset read method");
return subsetGenerator.readSingleColumnSubset(b);
}
int bytesread = 0;
byte [] linebuffer;
// do we have a leftover?
if (leftoverBytes != null) {
if (leftoverBytes.length < b.length) {
System.arraycopy(leftoverBytes, 0, b, 0, leftoverBytes.length);
bytesread = leftoverBytes.length;
leftoverBytes = null;
} else {
// shouldn't really happen... unless it's a very large subset,
// or a very long string, etc.
System.arraycopy(leftoverBytes, 0, b, 0, b.length);
byte[] tmp = new byte[leftoverBytes.length - b.length];
System.arraycopy(leftoverBytes, b.length, tmp, 0, leftoverBytes.length - b.length);
leftoverBytes = tmp;
tmp = null;
return b.length;
}
}
while (bytesread < b.length && numberOfObservationsRead < numberOfObservations) {
linebuffer = subsetGenerator.readSubsetLineBytes();
numberOfObservationsRead++;
if (bytesread + linebuffer.length < b.length) {
// copy linebuffer into the return buffer:
System.arraycopy(linebuffer, 0, b, bytesread, linebuffer.length);
bytesread += linebuffer.length;
} else {
System.arraycopy(linebuffer, 0, b, bytesread, b.length - bytesread);
// save the leftover;
if (bytesread + linebuffer.length > b.length) {
leftoverBytes = new byte[bytesread + linebuffer.length - b.length];
System.arraycopy(linebuffer, b.length - bytesread, leftoverBytes, 0, bytesread + linebuffer.length - b.length);
}
return b.length;
}
}
// and this means we've reached the end of the tab file!
return bytesread > 0 ? bytesread : -1;
}
//@Override
public void close() {
if (subsetGenerator != null) {
subsetGenerator.close();
}
}
}