package edu.washington.escience.myria.operator;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.DataInput;
import java.io.DataInputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Objects;
import java.util.Scanner;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import edu.washington.escience.myria.DbException;
import edu.washington.escience.myria.Schema;
import edu.washington.escience.myria.Type;
import edu.washington.escience.myria.storage.TupleBatch;
import edu.washington.escience.myria.storage.TupleBatchBuffer;
import edu.washington.escience.myria.storage.TupleUtils;
/**
* Read and merge Tipsy bin file, iOrder ascii file and group number ascii file.
*
* @author leelee
*
*/
public class TipsyFileScan extends LeafOperator {
/** Required for Java serialization. */
private static final long serialVersionUID = 1L;
/** The header size in bytes. */
private static final int H_SIZE = 32;
/** The gas record size in bytes. */
private static final int G_SIZE = 48;
/** The dark record size in bytes. */
private static final int D_SIZE = 36;
/** The star record size in bytes. */
private static final int S_SIZE = 44;
/** The data input for bin file. */
private transient DataInput dataInputForBin;
/** Scanner used to parse the iOrder file. */
private transient Scanner iOrderScanner = null;
/** Scanner used to parse the group number file. */
private transient Scanner grpScanner = null;
/** Holds the tuples that are ready for release. */
private transient TupleBatchBuffer buffer;
/** The bin file name. */
private final String binFileName;
/** The iOrder file name. */
private final String iOrderFileName;
/** The group number file name. */
private final String grpFileName;
/** The number of gas particle record. */
private long ngas;
/** The number of star particle record. */
private long nstar;
/** The number of dark particle record. */
private long ndark;
/** Which line of the file the scanner is currently on. */
private int lineNumber;
/** Schema for all Tipsy files. */
private static final Schema TIPSY_SCHEMA =
new Schema(
ImmutableList.of(
Type.LONG_TYPE, // iOrder
Type.FLOAT_TYPE, // mass
Type.FLOAT_TYPE, // x
Type.FLOAT_TYPE, // y
Type.FLOAT_TYPE, // z
Type.FLOAT_TYPE, // vx
Type.FLOAT_TYPE, // vy
Type.FLOAT_TYPE, // vz
Type.FLOAT_TYPE, // rho
Type.FLOAT_TYPE, // temp
Type.FLOAT_TYPE, // hsmooth
Type.FLOAT_TYPE, // metals
Type.FLOAT_TYPE, // tform
Type.FLOAT_TYPE, // eps
Type.FLOAT_TYPE, // phi
Type.INT_TYPE, // grp
Type.STRING_TYPE // type
),
ImmutableList.of(
"iOrder", "mass", "x", "y", "z", "vx", "vy", "vz", "rho", "temp", "hsmooth", "metals",
"tform", "eps", "phi", "grp", "type"));
/**
* Construct a new TipsyFileScan object using the given binary filename, iOrder filename and group number filename. By
* default TipsyFileScan will read the given binary file in big endian format.
*
* @param binFileName The binary file that contains the data for gas, dark, star particles.
* @param iOrderFileName The ascii file that contains the data for iOrder.
* @param grpFileName The ascii file that contains the data for group number.
*/
public TipsyFileScan(
final String binFileName, final String iOrderFileName, final String grpFileName) {
Objects.requireNonNull(binFileName);
Objects.requireNonNull(iOrderFileName);
Objects.requireNonNull(grpFileName);
this.binFileName = binFileName;
this.iOrderFileName = iOrderFileName;
this.grpFileName = grpFileName;
}
@Override
protected final TupleBatch fetchNextReady() throws DbException {
processGasRecords();
processDarkRecords();
processStarRecords();
return buffer.popAny();
}
@Override
protected final void init(final ImmutableMap<String, Object> execEnvVars) throws DbException {
buffer = new TupleBatchBuffer(getSchema());
InputStream iOrderInputStream = openFileOrUrlInputStream(iOrderFileName);
InputStream grpInputStream = openFileOrUrlInputStream(grpFileName);
int ntot;
try {
// Create a fileInputStream for the bin file
InputStream fStreamForBin = openFileOrUrlInputStream(binFileName);
BufferedInputStream bufferedStreamForBin = new BufferedInputStream(fStreamForBin);
dataInputForBin = new DataInputStream(bufferedStreamForBin);
dataInputForBin.readDouble(); // time
ntot = dataInputForBin.readInt();
dataInputForBin.readInt();
ngas = dataInputForBin.readInt();
ndark = dataInputForBin.readInt();
nstar = dataInputForBin.readInt();
dataInputForBin.readInt();
long proposed = H_SIZE + ngas * G_SIZE + ndark * D_SIZE + nstar * S_SIZE;
if (ntot != ngas + ndark + nstar) {
throw new DbException("header info incorrect");
}
if (fStreamForBin instanceof FileInputStream
&& proposed != ((FileInputStream) fStreamForBin).getChannel().size()) {
throw new DbException("binary file size incorrect");
}
} catch (IOException e) {
throw new DbException(e);
}
Preconditions.checkArgument(
iOrderInputStream != null, "FileScan iOrder input stream has not been set!");
Preconditions.checkArgument(
grpInputStream != null, "FileScan group input stream has not been set!");
Preconditions.checkArgument(
dataInputForBin != null, "FileScan binary input stream has not been set!");
iOrderScanner = new Scanner(new BufferedReader(new InputStreamReader(iOrderInputStream)));
grpScanner = new Scanner(new BufferedReader(new InputStreamReader(grpInputStream)));
int numIOrder = iOrderScanner.nextInt();
int numGrp = grpScanner.nextInt();
if (numIOrder != ntot) {
throw new DbException(
"number of iOrder "
+ numIOrder
+ " is different from the number of tipsy record "
+ ntot
+ ".");
}
if (numGrp != ntot) {
throw new DbException("number of group is different from the number of tipsy record.");
}
lineNumber = 0;
}
@Override
protected final void cleanup() throws DbException {
iOrderScanner = null;
grpScanner = null;
while (buffer.numTuples() > 0) {
buffer.popAny();
}
}
/**
* Construct tuples for gas particle records. The expected gas particles schema in the bin file is mass, x, y, z, vx,
* vy, vz, rho, temp, hsmooth, metals, phi. Merge the record in the binary file with iOrder and group number and fill
* in the each tuple column accordingly.
*
* @throws DbException if error reading from file.
*/
private void processGasRecords() throws DbException {
while (ngas > 0 && (buffer.numTuples() < buffer.getBatchSize())) {
lineNumber++;
try {
int count = 0;
buffer.putLong(count++, iOrderScanner.nextLong());
buffer.putFloat(count++, dataInputForBin.readFloat());
buffer.putFloat(count++, dataInputForBin.readFloat());
buffer.putFloat(count++, dataInputForBin.readFloat());
buffer.putFloat(count++, dataInputForBin.readFloat());
buffer.putFloat(count++, dataInputForBin.readFloat());
buffer.putFloat(count++, dataInputForBin.readFloat());
buffer.putFloat(count++, dataInputForBin.readFloat());
buffer.putFloat(count++, dataInputForBin.readFloat());
buffer.putFloat(count++, dataInputForBin.readFloat());
buffer.putFloat(count++, dataInputForBin.readFloat());
buffer.putFloat(count++, dataInputForBin.readFloat());
/*
* TODO(leelee): Should be null for the next two columns. Put 0 for now as TupleBatchBuffer does not support
* null value.
*/
buffer.putFloat(count++, 0);
buffer.putFloat(count++, 0);
buffer.putFloat(count++, dataInputForBin.readFloat());
buffer.putInt(count++, grpScanner.nextInt());
buffer.putString(count++, "gas");
} catch (final IOException e) {
throw new DbException(e);
}
final String iOrderRest = iOrderScanner.nextLine().trim();
if (iOrderRest.length() > 0) {
throw new DbException(
"iOrderFile: Unexpected output at the end of line " + lineNumber + ": " + iOrderRest);
}
final String grpRest = grpScanner.nextLine().trim();
if (grpRest.length() > 0) {
throw new DbException(
"grpFile: Unexpected output at the end of line " + lineNumber + ": " + grpRest);
}
ngas--;
}
}
/**
* Construct tuples for gas particle records. The expected dark particles schema in the bin file is mass, x, y, z, vx,
* vy, vz, eps, phi. Merge the record in the binary file with iOrder and group number and fill in the each tuple
* column accordingly.
*
* @throws DbException if error reading from file.
*/
private void processDarkRecords() throws DbException {
while (ndark > 0 && (buffer.numTuples() < buffer.getBatchSize())) {
lineNumber++;
try {
int count = 0;
buffer.putLong(count++, iOrderScanner.nextLong());
buffer.putFloat(count++, dataInputForBin.readFloat());
buffer.putFloat(count++, dataInputForBin.readFloat());
buffer.putFloat(count++, dataInputForBin.readFloat());
buffer.putFloat(count++, dataInputForBin.readFloat());
buffer.putFloat(count++, dataInputForBin.readFloat());
buffer.putFloat(count++, dataInputForBin.readFloat());
buffer.putFloat(count++, dataInputForBin.readFloat());
/*
* TODO(leelee): Should be null for the next five columns. Put 0 for now as TupleBatchBuffer does not support
* null value.
*/
buffer.putFloat(count++, 0);
buffer.putFloat(count++, 0);
buffer.putFloat(count++, 0);
buffer.putFloat(count++, 0);
buffer.putFloat(count++, 0);
buffer.putFloat(count++, dataInputForBin.readFloat());
buffer.putFloat(count++, dataInputForBin.readFloat());
buffer.putInt(count++, grpScanner.nextInt());
buffer.putString(count++, "dark");
} catch (final IOException e) {
throw new DbException(e);
}
final String iOrderRest = iOrderScanner.nextLine().trim();
if (iOrderRest.length() > 0) {
throw new DbException(
"iOrderFile: Unexpected output at the end of line " + lineNumber + ": " + iOrderRest);
}
final String grpRest = grpScanner.nextLine().trim();
if (grpRest.length() > 0) {
throw new DbException(
"grpFile: Unexpected output at the end of line " + lineNumber + ": " + grpRest);
}
ndark--;
}
}
/**
* Construct tuples for gas particle records. The expected dark particles schema in the bin file is mass, x, y, z, vx,
* vy, vz, metals, tform, eps, phi. Merge the record in the binary file with iOrder and group number and fill in the
* each tuple column accordingly.
*
* @throws DbException if error reading from file.
*/
private void processStarRecords() throws DbException {
while (nstar > 0 && (buffer.numTuples() < buffer.getBatchSize())) {
lineNumber++;
try {
int count = 0;
buffer.putLong(count++, iOrderScanner.nextLong());
buffer.putFloat(count++, dataInputForBin.readFloat());
buffer.putFloat(count++, dataInputForBin.readFloat());
buffer.putFloat(count++, dataInputForBin.readFloat());
buffer.putFloat(count++, dataInputForBin.readFloat());
buffer.putFloat(count++, dataInputForBin.readFloat());
buffer.putFloat(count++, dataInputForBin.readFloat());
buffer.putFloat(count++, dataInputForBin.readFloat());
/*
* TODO(leelee): Should be null for the next three columns. Put 0 for now as TupleBatchBuffer does not support
* null value.
*/
buffer.putFloat(count++, 0);
buffer.putFloat(count++, 0);
buffer.putFloat(count++, 0);
buffer.putFloat(count++, dataInputForBin.readFloat());
buffer.putFloat(count++, dataInputForBin.readFloat());
buffer.putFloat(count++, dataInputForBin.readFloat());
buffer.putFloat(count++, dataInputForBin.readFloat());
buffer.putInt(count++, grpScanner.nextInt());
buffer.putString(count++, "star");
} catch (final IOException e) {
throw new DbException(e);
}
final String iOrderRest = iOrderScanner.nextLine().trim();
if (iOrderRest.length() > 0) {
throw new DbException(
"iOrderFile: Unexpected output at the end of line " + lineNumber + ": " + iOrderRest);
}
final String grpRest = grpScanner.nextLine().trim();
if (grpRest.length() > 0) {
throw new DbException(
"grpFile: Unexpected output at the end of line " + lineNumber + ": " + grpRest);
}
nstar--;
}
}
@Override
protected Schema generateSchema() {
return TIPSY_SCHEMA;
}
private static InputStream openFileOrUrlInputStream(String filenameOrUrl) throws DbException {
try {
URI uri = new URI(filenameOrUrl);
if (uri.getScheme() == null) {
return openFileInputStream(filenameOrUrl);
} else if (uri.getScheme().equals("hdfs")) {
return openHdfsInputStream(uri);
} else {
return uri.toURL().openStream();
}
} catch (IllegalArgumentException e) {
return openFileInputStream(filenameOrUrl);
} catch (URISyntaxException e) {
return openFileInputStream(filenameOrUrl);
} catch (MalformedURLException e) {
return openFileInputStream(filenameOrUrl);
} catch (IOException e) {
throw new DbException(e);
}
}
private static InputStream openFileInputStream(String filename) throws DbException {
try {
return new FileInputStream(filename);
} catch (FileNotFoundException e) {
throw new DbException(e);
}
}
private static InputStream openHdfsInputStream(final URI uri) throws DbException {
try {
FileSystem fs = FileSystem.get(uri, new Configuration());
Path path = new Path(uri);
return fs.open(path);
} catch (IOException e) {
throw new DbException(e);
}
}
}