/*******************************************************************************
* Copyright (c) 2013 Michael Kutschke.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Michael Kutschke - initial API and implementation
******************************************************************************/
package org.eclipse.recommenders.jayes.io.jbif;
import static org.eclipse.recommenders.jayes.io.jbif.Constants.*;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.DoubleBuffer;
import org.apache.commons.io.IOUtils;
import org.eclipse.recommenders.jayes.BayesNet;
import org.eclipse.recommenders.jayes.BayesNode;
import org.eclipse.recommenders.jayes.io.IBayesNetWriter;
import com.google.common.base.Charsets;
import com.google.common.base.Preconditions;
import com.google.common.primitives.Doubles;
import com.google.common.primitives.Ints;
import com.google.common.primitives.Shorts;
/**
* Writer for the Jayes Binary Interchange Format (JBIF).
*
* JBIF conforms to the following grammar:
*
* <dl>
* <dt><var>JBIF</var></dt>
* <dd><var>Header</var> <var>BayesNet</var></dd>
* <dt><var>Header<var></dt>
* <dd>(magicNumber: 0xBA7E5B1F) (formatVersion: 1)</dd>
* <dt><var>BayesNet</var></dt>
* <dd><var>Name</var> (nodeCount: <code>int</code>) <var>NodeDeclaration</var>* <var>NodeDefinition</var>*</dd>
* <dt><var>Name</var></dt>
* <dd>(byteCount: <code>short</code>) (utf8: <code>byte</code>...)</dd>
* <dt><var>NodeDeclaration</var></dt>
* <dd><var>Name</var> (outcomeCount: <code>int</code>) <var>Outcomes</var>*</dd>
* <dt><var>Outcome</var></dt>
* <dd><var>Name</var></dd>
* <dt><var>NodeDefinition</var></dt>
* <dd><var>Parents</var> <var>CPT</var></dd>
* <dt><var>Parents</var></dt>
* <dd>(parentCount: <code>byte</code>) parentIds: (<code>int</code>...)</dd>
* <dt><var>CPT</var></dt>
* <dd>(entryCount: <code>int</code>) (probabilities: <code>double</code>...)</dd>
* </dl>
*
* Multi-byte primitive types are serialized in network byte-order.
*/
public class JayesBifWriter implements IBayesNetWriter {
private static final int HEADER_BYTES = 2 * Ints.BYTES;
private OutputStream out;
public JayesBifWriter(OutputStream out) {
this.out = out;
}
@Override
public void write(BayesNet bayesNet) throws IOException {
IOUtils.write(writeToArray(bayesNet), out);
}
private byte[] writeToArray(BayesNet bayesNet) {
ByteBuffer buffer = ByteBuffer.allocate(estimateBinarySize(bayesNet));
putHeader(buffer);
putBayesNet(bayesNet, buffer);
byte[] out = new byte[buffer.position()];
System.arraycopy(buffer.array(), 0, out, 0, buffer.position());
return out;
}
/**
* Estimate binary size. Due to UTF-8 being a variable-length encoding, this may be over-estimating but is
* guaranteed to never under-estimate.
*/
private int estimateBinarySize(BayesNet bayesNet) {
int size = HEADER_BYTES;
size += estimateBinarySize(bayesNet.getName());
size += Ints.BYTES;
for (BayesNode node : bayesNet.getNodes()) {
size += estimateBinarySize(node);
}
return size;
}
private int estimateBinarySize(String string) {
return Shorts.BYTES + string.length() * 4;
}
private int estimateBinarySize(BayesNode node) {
int size = 0;
size += estimateBinarySize(node.getName());
size += Ints.BYTES;
for (String outcome : node.getOutcomes()) {
size += estimateBinarySize(outcome);
}
size += 1 + Ints.BYTES * node.getParents().size();
size += Ints.BYTES + Doubles.BYTES * node.getProbabilities().length;
return size;
}
private void putHeader(ByteBuffer buffer) {
buffer.putInt(MAGIC_NUMBER);
buffer.putInt(FORMAT_VERSION);
}
private void putBayesNet(BayesNet bayesNet, ByteBuffer buffer) {
putName(bayesNet.getName(), buffer);
buffer.putInt(bayesNet.getNodes().size());
for (BayesNode node : bayesNet.getNodes()) {
putNodeDeclaration(node, buffer);
}
for (BayesNode node : bayesNet.getNodes()) {
putNodeDefinition(node, buffer);
}
}
private void putName(String string, ByteBuffer buffer) {
final byte[] utf8 = string.getBytes(Charsets.UTF_8);
Preconditions.checkArgument(utf8.length < 2 << Short.SIZE);
final short byteCount = (short) utf8.length;
buffer.putShort(byteCount);
buffer.put(utf8);
}
private void putNodeDeclaration(BayesNode node, ByteBuffer buffer) {
putName(node.getName(), buffer);
buffer.putInt(node.getOutcomeCount());
for (String outcome : node.getOutcomes()) {
putName(outcome, buffer);
}
}
private void putNodeDefinition(BayesNode node, ByteBuffer buffer) {
putParents(node, buffer);
putCpt(node, buffer);
}
private void putParents(BayesNode node, ByteBuffer buffer) {
final int parentCount = node.getParents().size();
Preconditions.checkArgument(parentCount < 2 << Byte.SIZE);
buffer.put((byte) parentCount);
for (BayesNode p : node.getParents()) {
buffer.putInt(p.getId());
}
}
private void putCpt(BayesNode node, ByteBuffer buffer) {
buffer.putInt(node.getProbabilities().length);
DoubleBuffer asDoubleBuffer = buffer.asDoubleBuffer();
asDoubleBuffer.put(node.getProbabilities());
buffer.position(buffer.position() + asDoubleBuffer.position() * Doubles.BYTES);
}
@Override
public void close() throws IOException {
out.close();
}
}