/*
* Encog(tm) Core v3.4 - Java Version
* http://www.heatonresearch.com/encog/
* https://github.com/encog/encog-java-core
* Copyright 2008-2016 Heaton Research, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* For more information on Heaton Research copyrights, licenses
* and trademarks visit:
* http://www.heatonresearch.com/copyright
*/
package org.encog.ml.data.buffer;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.channels.FileChannel;
/**
* Used to access an Encog Binary Training file (*.EGB).
*
*/
public class EncogEGBFile {
/**
* The size of a double.
*/
public static final int DOUBLE_SIZE = Double.SIZE / 8;
/**
* The size of the file header.
*/
public static final int HEADER_SIZE = EncogEGBFile.DOUBLE_SIZE * 3;
/**
* The file that we are working with.
*/
private final File file;
/**
* The number of input values per record.
*/
private int inputCount;
/**
* The number of ideal values per record.
*/
private int idealCount;
/**
* The underlying file.
*/
private RandomAccessFile raf;
/**
* The file channel used.
*/
private FileChannel fc;
/**
* A byte buffer to hold the header.
*/
private final ByteBuffer headerBuffer;
/**
* A byte buffer to hold the records.
*/
private ByteBuffer recordBuffer;
/**
* The number of values in a record, this is the input and ideal combined.
*/
private int recordCount;
/**
* The size of a record.
*/
private int recordSize;
/**
* The number of records int he file.
*/
private int numberOfRecords;
/**
* Construct an EGB file.
*
* @param theFile
* The file.
*/
public EncogEGBFile(final File theFile) {
this.file = theFile;
this.headerBuffer = ByteBuffer.allocate(EncogEGBFile.HEADER_SIZE);
}
/**
* Add a column.
*
* @param col
* THe column to add.
* @param isInput
* Is this an input column?
*/
public void addColumn(final int col, final boolean isInput) {
try {
// process the file
// allocate buffers
final ByteBuffer readBuffer = ByteBuffer
.allocate(EncogEGBFile.DOUBLE_SIZE * 1024);
final ByteBuffer writeBuffer = ByteBuffer
.allocate(EncogEGBFile.DOUBLE_SIZE * 1024);
readBuffer.clear();
writeBuffer.clear();
readBuffer.order(ByteOrder.LITTLE_ENDIAN);
writeBuffer.order(ByteOrder.LITTLE_ENDIAN);
long readLocation = EncogEGBFile.HEADER_SIZE;
long writeLocation = EncogEGBFile.HEADER_SIZE;
int recordOffset = 0;
this.fc.position(readLocation);
this.fc.read(readBuffer);
readLocation = this.fc.position();
readBuffer.rewind();
boolean done = false;
int count = 0;
do {
// if there is more to read, then process it
if (readBuffer.hasRemaining()) {
final double d = readBuffer.getDouble();
// If this is the column to insert, add a zero
if (recordOffset == col) {
// do we need to cycle the write buffer?
writeLocation = checkWrite(writeBuffer, writeLocation);
writeBuffer.putDouble(0);
}
// write the existing value
writeLocation = checkWrite(writeBuffer, writeLocation);
writeBuffer.putDouble(d);
// keep track of where we are in a record.
recordOffset++;
if (recordOffset >= this.recordCount) {
recordOffset = 0;
count++;
// are we done?
if (count >= this.numberOfRecords) {
done = true;
}
}
} else {
// read more
readBuffer.clear();
readBuffer.order(ByteOrder.LITTLE_ENDIAN);
this.fc.position(readLocation);
this.fc.read(readBuffer);
readLocation = this.fc.position();
readBuffer.rewind();
}
} while (!done);
// write any remaining data in the write buffer
if (writeBuffer.position() > 0) {
writeBuffer.flip();
this.fc.write(writeBuffer, writeLocation);
}
// does it fall inside of input or ideal?
if (isInput) {
this.inputCount++;
this.recordCount++;
} else {
this.idealCount++;
this.recordCount++;
}
this.recordCount = this.inputCount + this.idealCount + 1;
this.recordSize = this.recordCount * EncogEGBFile.DOUBLE_SIZE;
// adjust file size
this.raf.setLength((this.numberOfRecords * this.recordSize)
+ EncogEGBFile.HEADER_SIZE);
} catch (final IOException ex) {
throw new BufferedDataError(ex);
}
}
/**
* Add a row.
*
* @param row
* Where to add the row.
*/
public void addRow(final int row) {
try {
this.numberOfRecords++;
this.raf.setLength((this.numberOfRecords * this.recordSize)
+ EncogEGBFile.HEADER_SIZE);
for (int i = this.numberOfRecords - 1; i >= row; i--) {
final int s = EncogEGBFile.HEADER_SIZE + (this.recordSize * i);
final int t = EncogEGBFile.HEADER_SIZE + (this.recordSize * i)
+ this.recordSize;
clear();
this.fc.read(this.recordBuffer, s);
this.recordBuffer.flip();
this.fc.write(this.recordBuffer, t);
}
clear();
for (int i = 0; i < this.recordCount-1; i++) {
this.recordBuffer.putDouble(0);
}
this.recordBuffer.putDouble(1.0);
this.recordBuffer.flip();
this.fc.write(this.recordBuffer, EncogEGBFile.HEADER_SIZE
+ (this.recordSize * row));
} catch (final IOException ex) {
throw new BufferedDataError(ex);
}
}
/**
* Calculate the index for the specified row.
*
* @param row
* The row to calculate for.
* @return The index.
*/
private long calculateIndex(final long row) {
return (long)EncogEGBFile.HEADER_SIZE + (row * (long)this.recordSize);
}
/**
* Read a row and column.
*
* @param row
* The row, or record, to read.
* @param col
* The column to read.
* @return THe value read.
*/
private int calculateIndex(final int row, final int col) {
return EncogEGBFile.HEADER_SIZE + (row * this.recordSize)
+ (col * EncogEGBFile.DOUBLE_SIZE);
}
/**
* Check a write, make sure there is enough room to write.
*
* @param writeBuffer
* The buffer.
* @param inWriteLocation
* The write location.
* @return The new write location.
* @throws IOException
* If an IO error occurs.
*/
private long checkWrite(final ByteBuffer writeBuffer,
final long inWriteLocation) throws IOException {
long writeLocation = inWriteLocation;
if (!writeBuffer.hasRemaining()) {
this.fc.position(writeLocation);
writeBuffer.flip();
this.fc.write(writeBuffer);
writeLocation = this.fc.position();
writeBuffer.clear();
writeBuffer.order(ByteOrder.LITTLE_ENDIAN);
}
return writeLocation;
}
/**
* Clear the record buffer.
*/
private void clear() {
this.recordBuffer.clear();
this.recordBuffer.order(ByteOrder.LITTLE_ENDIAN);
}
/**
* Close the file.
*/
public void close() {
try {
if (this.raf != null) {
this.raf.close();
this.raf = null;
}
if (this.fc != null) {
this.fc.close();
this.fc = null;
}
System.gc();
} catch (final IOException ex) {
throw new BufferedDataError(ex);
}
}
/**
* Create a new RGB file.
*
* @param theInputCount
* The input count.
* @param theIdealCount
* The ideal count.
*/
public void create(final int theInputCount, final int theIdealCount) {
try {
this.inputCount = theInputCount;
this.idealCount = theIdealCount;
final double[] input = new double[inputCount];
final double[] ideal = new double[idealCount];
this.file.delete();
this.raf = new RandomAccessFile(this.file, "rw");
this.raf.setLength(0);
this.fc = this.raf.getChannel();
this.headerBuffer.clear();
this.headerBuffer.order(ByteOrder.LITTLE_ENDIAN);
this.headerBuffer.put((byte) 'E');
this.headerBuffer.put((byte) 'N');
this.headerBuffer.put((byte) 'C');
this.headerBuffer.put((byte) 'O');
this.headerBuffer.put((byte) 'G');
this.headerBuffer.put((byte) '-');
this.headerBuffer.put((byte) '0');
this.headerBuffer.put((byte) '0');
this.headerBuffer.putDouble(input.length);
this.headerBuffer.putDouble(ideal.length);
this.numberOfRecords = 0;
this.recordCount = this.inputCount + this.idealCount + 1;
this.recordSize = this.recordCount * EncogEGBFile.DOUBLE_SIZE;
this.recordBuffer = ByteBuffer.allocate(this.recordSize);
this.headerBuffer.flip();
this.fc.write(this.headerBuffer);
} catch (final IOException ex) {
throw new BufferedDataError(ex);
}
}
/**
* Delete a column.
*
* @param col
* The column to delete.
*/
public void deleteCol(final int col) {
try {
// process the file
// allocate buffers
final ByteBuffer readBuffer = ByteBuffer
.allocate(EncogEGBFile.DOUBLE_SIZE * 1024);
final ByteBuffer writeBuffer = ByteBuffer
.allocate(EncogEGBFile.DOUBLE_SIZE * 1024);
readBuffer.clear();
writeBuffer.clear();
readBuffer.order(ByteOrder.LITTLE_ENDIAN);
writeBuffer.order(ByteOrder.LITTLE_ENDIAN);
long readLocation = EncogEGBFile.HEADER_SIZE;
long writeLocation = EncogEGBFile.HEADER_SIZE;
int recordOffset = 0;
this.fc.position(readLocation);
this.fc.read(readBuffer);
readLocation = this.fc.position();
readBuffer.rewind();
boolean done = false;
int count = 0;
do {
// if there is more to read, then process it
if (readBuffer.hasRemaining()) {
final double d = readBuffer.getDouble();
// skip the specified column, as we write
if (recordOffset != col) {
writeLocation = checkWrite(writeBuffer, writeLocation);
writeBuffer.putDouble(d);
}
// keep track of where we are in a record.
recordOffset++;
if (recordOffset >= this.recordCount) {
recordOffset = 0;
count++;
// are we done?
if (count >= this.numberOfRecords) {
done = true;
}
}
} else {
// read more
readBuffer.clear();
readBuffer.order(ByteOrder.LITTLE_ENDIAN);
this.fc.position(readLocation);
this.fc.read(readBuffer);
readLocation = this.fc.position();
readBuffer.rewind();
}
} while (!done);
// write any remaining data in the write buffer
if (writeBuffer.position() > 0) {
writeBuffer.flip();
this.fc.write(writeBuffer, writeLocation);
}
// does it fall inside of input or ideal?
if (col < this.inputCount) {
this.inputCount--;
this.recordCount--;
} else {
this.idealCount--;
this.recordCount--;
}
this.recordCount = this.inputCount + this.idealCount + 1;
this.recordSize = this.recordCount * EncogEGBFile.DOUBLE_SIZE;
// adjust file size
this.raf.setLength((this.numberOfRecords * this.recordSize)
+ EncogEGBFile.HEADER_SIZE);
} catch (final IOException ex) {
throw new BufferedDataError(ex);
}
}
/**
* Delete a row.
*
* @param row
* The row to delete.
*/
public void deleteRow(final int row) {
try {
for (int i = row; i < this.numberOfRecords - 1; i++) {
final int s = EncogEGBFile.HEADER_SIZE + (this.recordSize * i)
+ this.recordSize;
final int t = EncogEGBFile.HEADER_SIZE + (this.recordSize * i);
clear();
this.fc.read(this.recordBuffer, s);
this.recordBuffer.flip();
this.fc.write(this.recordBuffer, t);
}
this.numberOfRecords--;
this.raf.setLength((this.numberOfRecords * this.recordSize)
+ EncogEGBFile.HEADER_SIZE);
} catch (final IOException ex) {
throw new BufferedDataError(ex);
}
}
/**
* @return the fc
*/
public FileChannel getFc() {
return this.fc;
}
/**
* @return the file
*/
public File getFile() {
return this.file;
}
/**
* @return the headerBuffer
*/
public ByteBuffer getHeaderBuffer() {
return this.headerBuffer;
}
/**
* @return the idealCount
*/
public int getIdealCount() {
return this.idealCount;
}
/**
* @return the inputCount
*/
public int getInputCount() {
return this.inputCount;
}
/**
* @return the numberOfRecords
*/
public int getNumberOfRecords() {
return this.numberOfRecords;
}
/**
* @return the raf
*/
public RandomAccessFile getRaf() {
return this.raf;
}
/**
* @return the recordBuffer
*/
public ByteBuffer getRecordBuffer() {
return this.recordBuffer;
}
/**
* @return the recordCount
*/
public int getRecordCount() {
return this.recordCount;
}
/**
* @return the recordSize
*/
public int getRecordSize() {
return this.recordSize;
}
/**
* OPen an existing EGB file.
*/
public void open() {
try {
this.raf = new RandomAccessFile(this.file, "rw");
this.fc = this.raf.getChannel();
this.headerBuffer.clear();
this.headerBuffer.order(ByteOrder.LITTLE_ENDIAN);
boolean isEncogFile = true;
this.fc.read(this.headerBuffer);
this.headerBuffer.position(0);
isEncogFile = isEncogFile ? this.headerBuffer.get() == 'E' : false;
isEncogFile = isEncogFile ? this.headerBuffer.get() == 'N' : false;
isEncogFile = isEncogFile ? this.headerBuffer.get() == 'C' : false;
isEncogFile = isEncogFile ? this.headerBuffer.get() == 'O' : false;
isEncogFile = isEncogFile ? this.headerBuffer.get() == 'G' : false;
isEncogFile = isEncogFile ? this.headerBuffer.get() == '-' : false;
if (!isEncogFile) {
throw new BufferedDataError(
"File is not a valid Encog binary file:"
+ this.file.toString());
}
final char v1 = (char) this.headerBuffer.get();
final char v2 = (char) this.headerBuffer.get();
final String versionStr = "" + v1 + v2;
try {
final int version = Integer.parseInt(versionStr);
if (version > 0) {
throw new BufferedDataError(
"File is from a newer version of Encog than is currently in use.");
}
} catch (final NumberFormatException ex) {
throw new BufferedDataError("File has invalid version number.");
}
this.inputCount = (int) this.headerBuffer.getDouble();
this.idealCount = (int) this.headerBuffer.getDouble();
this.recordCount = this.inputCount + this.idealCount + 1;
this.recordSize = this.recordCount * EncogEGBFile.DOUBLE_SIZE;
if( this.recordSize==0 ) {
this.numberOfRecords = 0;
} else {
this.numberOfRecords
= (int) ((this.file.length() - EncogEGBFile.HEADER_SIZE)
/ this.recordSize);
}
this.recordBuffer = ByteBuffer.allocate(this.recordSize);
} catch (final IOException ex) {
throw new BufferedDataError(ex);
}
}
/**
* Read a single double.
*
* @return The double read.
*/
public double read() {
try {
clear();
this.recordBuffer.limit(EncogEGBFile.DOUBLE_SIZE);
this.fc.read(this.recordBuffer);
this.recordBuffer.position(0);
return this.recordBuffer.getDouble();
} catch (final IOException ex) {
throw new BufferedDataError(ex);
}
}
/**
* Read an array of doubles.
*
* @param d
* The array to read into.
*/
public void read(final double[] d) {
try {
clear();
this.recordBuffer.limit(EncogEGBFile.DOUBLE_SIZE * d.length);
this.fc.read(this.recordBuffer);
this.recordBuffer.position(0);
for (int i = 0; i < d.length; i++) {
d[i] = this.recordBuffer.getDouble();
}
} catch (final IOException ex) {
throw new BufferedDataError(ex);
}
}
/**
* Read a double array at the specified record.
*
* @param row
* The record to read.
* @param d
* The array to read into.
*/
public void read(final int row, final double[] d) {
try {
clear();
this.recordBuffer.limit(EncogEGBFile.DOUBLE_SIZE * d.length);
this.fc.read(this.recordBuffer, calculateIndex(row));
this.recordBuffer.position(0);
for (int i = 0; i < this.recordCount; i++) {
d[i] = this.recordBuffer.getDouble();
}
} catch (final IOException ex) {
throw new BufferedDataError(ex);
}
}
/**
* Read a row and column.
*
* @param row
* The row to read.
* @param col
* The column to read.
* @return The value read.
*/
public double read(final int row, final int col) {
try {
clear();
this.recordBuffer.limit(EncogEGBFile.DOUBLE_SIZE);
this.fc.read(this.recordBuffer, calculateIndex(row, col));
this.recordBuffer.position(0);
return this.recordBuffer.getDouble(0);
} catch (final IOException ex) {
throw new BufferedDataError(ex);
}
}
/**
* Set the current location to the specified row.
*
* @param row
* The row.
*/
public void setLocation(final int row) {
try {
this.fc.position(calculateIndex(row));
} catch (final IOException ex) {
throw new BufferedDataError(ex);
}
}
/**
* Write a byte.
*
* @param b
* The byte to write.
*/
public void write(final byte b) {
try {
clear();
this.recordBuffer.put(b);
this.recordBuffer.flip();
this.fc.write(this.recordBuffer);
} catch (final IOException ex) {
throw new BufferedDataError(ex);
}
}
/**
* Write an array.
*
* @param v
* The array to write.
*/
public void write(final double[] v) {
try {
clear();
for (final double element : v) {
this.recordBuffer.putDouble(element);
}
this.recordBuffer.flip();
this.fc.write(this.recordBuffer);
} catch (final IOException ex) {
throw new BufferedDataError(ex);
}
}
/**
* Write an array at the specified record.
*
* @param row
* The record to write.
* @param v
* The array to write.
*/
public void write(final int row, final double[] v) {
try {
clear();
for (final double element : v) {
this.recordBuffer.putDouble(element);
}
this.recordBuffer.flip();
this.fc.write(this.recordBuffer);
} catch (final IOException ex) {
throw new BufferedDataError(ex);
}
}
/**
* Write the specified row and column.
*
* @param row
* The row.
* @param col
* The column.
* @param v
* The value.
*/
public void write(final int row, final int col, final double v) {
try {
clear();
this.recordBuffer.putDouble(v);
this.recordBuffer.flip();
this.fc.write(this.recordBuffer, calculateIndex(row, col));
} catch (final IOException ex) {
throw new BufferedDataError(ex);
}
}
public void write(double d) {
try {
clear();
this.recordBuffer.putDouble(d);
this.recordBuffer.flip();
this.fc.write(this.recordBuffer);
} catch (final IOException ex) {
throw new BufferedDataError(ex);
}
}
}