package edu.washington.escience.myria;
import java.io.BufferedOutputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.util.List;
import java.util.concurrent.TimeUnit;
import org.joda.time.DateTime;
import edu.washington.escience.myria.storage.ReadableTable;
/**
* PostgresBinaryTupleWriter is a {@link TupleWriter} that serializes tuples to a a binary format that can be directly
* imported into PostgreSQL. See http://www.postgresql.org/docs/current/interactive/sql-copy.html.
*
* This requires integer time stamps.
*/
public class PostgresBinaryTupleWriter implements TupleWriter {
/** Required for Java serialization. */
static final long serialVersionUID = 1L;
/** The ByteBuffer to write the output. */
private transient DataOutputStream buffer;
/**
* @param out the {@link OutputStream} to which the data will be written.
* @throws IOException if there is an IO exception
*/
@Override
public void open(final OutputStream out) throws IOException {
buffer = new DataOutputStream(new BufferedOutputStream(out));
// 11 bytes required header
buffer.writeBytes("PGCOPY\n\377\r\n\0");
// 32 bit integer indicating no OID
buffer.writeInt(0);
// 32 bit header extension area length
buffer.writeInt(0);
}
/*
* No-op
*/
@Override
public void writeColumnHeaders(final List<String> columnNames) throws IOException {}
/**
* Converts the given java seconds to postgresql seconds. The conversion is valid for any year 100 BC onwards.
*
* from /org/postgresql/jdbc2/TimestampUtils.java
*
* @param seconds Postgresql seconds.
* @return Java seconds.
*/
@SuppressWarnings("checkstyle:magicnumber")
private static long toPgSecs(final long seconds) {
long secs = seconds;
// java epoc to postgres epoc
secs -= 946684800L;
// Julian/Greagorian calendar cutoff point
if (secs < -13165977600L) { // October 15, 1582 -> October 4, 1582
secs -= 86400 * 10;
if (secs < -15773356800L) { // 1500-03-01 -> 1500-02-28
int years = (int) ((secs + 15773356800L) / -3155823050L);
years++;
years -= years / 4;
secs += years * 86400;
}
}
return secs;
}
@Override
public void writeTuples(final ReadableTable tuples) throws IOException {
List<Type> columnTypes = tuples.getSchema().getColumnTypes();
for (int i = 0; i < tuples.numTuples(); ++i) {
short numColumns = (short) tuples.numColumns();
// 16 bit integer number of tuples
buffer.writeShort(numColumns);
for (int j = 0; j < numColumns; ++j) {
// 32 bit integer for length of value
// n bytes value
switch (columnTypes.get(j)) {
case BOOLEAN_TYPE:
// https://github.com/postgres/postgres/blob/master/src/backend/utils/adt/bool.c
buffer.writeInt(1);
if (tuples.getBoolean(j, i)) {
buffer.writeByte(1);
} else {
buffer.writeByte(0);
}
break;
case DOUBLE_TYPE:
// https://github.com/postgres/postgres/blob/master/src/backend/utils/adt/float.c
buffer.writeInt(8);
buffer.writeDouble(tuples.getDouble(j, i));
break;
case FLOAT_TYPE:
// https://github.com/postgres/postgres/blob/master/src/backend/utils/adt/float.c
buffer.writeInt(4);
buffer.writeFloat(tuples.getFloat(j, i));
break;
case INT_TYPE:
// https://github.com/postgres/postgres/blob/master/src/backend/utils/adt/int.c
buffer.writeInt(4);
buffer.writeInt(tuples.getInt(j, i));
break;
case LONG_TYPE:
// https://github.com/postgres/postgres/blob/master/src/backend/utils/adt/int8.c
buffer.writeInt(8);
buffer.writeLong(tuples.getLong(j, i));
break;
case DATETIME_TYPE:
// https://github.com/postgres/postgres/blob/master/src/backend/utils/adt/timestamp.c
// requires eight-byte integers for time stamps! This should be the default.
// See http://www.postgresql.org/docs/9.1/static/datatype-datetime.html
buffer.writeInt(8);
DateTime theTime = tuples.getDateTime(j, i);
long millis = theTime.getMillis();
// adjust time zone offset
millis += theTime.getZone().getOffset(millis);
// pg time 0 is 2000-01-01 00:00:00
long secs = toPgSecs(TimeUnit.MILLISECONDS.toSeconds(millis));
buffer.writeLong(TimeUnit.SECONDS.toMicros(secs));
break;
case STRING_TYPE:
// https://github.com/postgres/postgres/blob/master/src/backend/utils/adt/varchar.c
String string = tuples.getString(j, i);
final byte[] utf8Bytes = string.getBytes("UTF-8");
buffer.writeInt(utf8Bytes.length);
buffer.write(utf8Bytes);
break;
case BLOB_TYPE:
ByteBuffer bb = tuples.getBlob(j, i);
buffer.writeInt(bb.array().length);
buffer.write(bb.array());
break;
}
}
}
// 16 bit file trailer
buffer.writeShort(-1);
}
@Override
public void done() throws IOException {
buffer.flush();
buffer.close();
}
@Override
public void error() throws IOException {
try {
throw new IOException("An error ocurred when writing binary data.");
} finally {
buffer.close();
}
}
}