/*
* Encog(tm) Core v2.5 - Java Version
* http://www.heatonresearch.com/encog/
* http://code.google.com/p/encog-java/
* Copyright 2008-2010 Heaton Research, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* For more information on Heaton Research copyrights, licenses
* and trademarks visit:
* http://www.heatonresearch.com/copyright
*/
package org.encog.neural.data.folded;
import java.util.Iterator;
import org.encog.engine.data.EngineData;
import org.encog.engine.data.EngineIndexableSet;
import org.encog.neural.data.Indexable;
import org.encog.neural.data.NeuralData;
import org.encog.neural.data.NeuralDataPair;
import org.encog.neural.networks.training.TrainingError;
/**
* A folded data set allows you to "fold" the data into several equal(or nearly
* equal) datasets. You then have the ability to select which fold the dataset
* will process. This is very useful for crossvalidation.
*
* This dataset works off of an underlying dataset. By default there are no
* folds (fold size 1). Call the fold method to create more folds.
*
*/
public class FoldedDataSet implements Indexable {
/**
* Error message: adds are not supported.
*/
public static final String ADD_NOT_SUPPORTED = "Direct adds to the folded dataset are not supported.";
/**
* The underlying dataset.
*/
private final Indexable underlying;
/**
* The fold that we are currently on.
*/
private int currentFold;
/**
* The total number of folds. Or 0 if the data has not been folded yet.
*/
private int numFolds;
/**
* The size of all folds, except the last fold, the last fold may have a
* different number.
*/
private int foldSize;
/**
* The size of the last fold.
*/
private int lastFoldSize;
/**
* The offset to the current fold.
*/
private int currentFoldOffset;
/**
* The size of the current fold.
*/
private int currentFoldSize;
/**
* The owner object(from openAdditional)
*/
private FoldedDataSet owner;
/**
* Create a folded dataset.
*
* @param underlying
* The underlying folded dataset.
*/
public FoldedDataSet(final Indexable underlying) {
this.underlying = underlying;
fold(1);
}
/**
* Not supported.
*
* @param data1
* Not used.
*/
@Override
public void add(final NeuralData data1) {
throw new TrainingError(FoldedDataSet.ADD_NOT_SUPPORTED);
}
/**
* Not supported.
*
* @param inputData
* Not used.
* @param idealData
* Not used.
*/
@Override
public void add(final NeuralData inputData, final NeuralData idealData) {
throw new TrainingError(FoldedDataSet.ADD_NOT_SUPPORTED);
}
/**
* Not supported.
*
* @param inputData
* Not used.
*/
@Override
public void add(final NeuralDataPair inputData) {
throw new TrainingError(FoldedDataSet.ADD_NOT_SUPPORTED);
}
/**
* Close the dataset.
*/
@Override
public void close() {
this.underlying.close();
}
/**
* Fold the dataset. Must be done before the dataset is used.
*
* @param numFolds
* The number of folds.
*/
public void fold(final int numFolds) {
this.numFolds = (int) Math.min(numFolds, this.underlying
.getRecordCount());
this.foldSize = (int) (this.underlying.getRecordCount() / this.numFolds);
this.lastFoldSize = (int) (this.underlying.getRecordCount() - (this.foldSize * this.numFolds));
setCurrentFold(0);
}
/**
* @return the currentFold
*/
public int getCurrentFold() {
if( this.owner!=null )
return owner.getCurrentFold();
else
return this.currentFold;
}
/**
* @return the currentFoldOffset
*/
public int getCurrentFoldOffset() {
if( this.owner!=null )
return owner.getCurrentFoldOffset();
else
return this.currentFoldOffset;
}
/**
* @return the currentFoldSize
*/
public int getCurrentFoldSize() {
if( this.owner!=null )
return this.owner.getCurrentFoldSize();
else
return this.currentFoldSize;
}
/**
* {@inheritDoc}
*/
@Override
public int getIdealSize() {
return this.underlying.getIdealSize();
}
/**
* {@inheritDoc}
*/
@Override
public int getInputSize() {
return this.underlying.getInputSize();
}
/**
* @return the numFolds
*/
public int getNumFolds() {
return this.numFolds;
}
/**
* {@inheritDoc}
*/
@Override
public void getRecord(final long index, final EngineData pair) {
this.underlying.getRecord(this.getCurrentFoldOffset() + index, pair);
}
/**
* {@inheritDoc}
*/
@Override
public long getRecordCount() {
return getCurrentFoldSize();
}
/**
* @return The underlying dataset.
*/
public Indexable getUnderlying() {
return this.underlying;
}
/**
* {@inheritDoc}
*/
@Override
public boolean isSupervised() {
return this.underlying.isSupervised();
}
/**
* @return The owner.
*/
public FoldedDataSet getOwner() {
return owner;
}
/**
* @param owner The owner.
*/
public void setOwner(FoldedDataSet owner) {
this.owner = owner;
}
/**
* {@inheritDoc}
*/
@Override
public Iterator<NeuralDataPair> iterator() {
return new FoldedIterator(this);
}
/**
* {@inheritDoc}
*/
@Override
public EngineIndexableSet openAdditional() {
final FoldedDataSet folded = new FoldedDataSet(
(Indexable) this.underlying.openAdditional());
folded.setOwner(this);
return folded;
}
/**
* Set the current fold.
* @param currentFold
* the currentFold to set
*/
public void setCurrentFold(final int currentFold) {
if( this.owner!=null ) {
throw new TrainingError("Can't set the fold on a non-top-level set.");
}
if (currentFold >= this.numFolds) {
throw new TrainingError(
"Can't set the current fold to be greater than the number of folds.");
}
this.currentFold = currentFold;
this.currentFoldOffset = this.foldSize * this.currentFold;
if (this.currentFold == (this.numFolds - 1)) {
this.currentFoldSize = this.lastFoldSize;
} else {
this.currentFoldSize = this.foldSize;
}
}
}