KernelNetworkTrain.java example

Explorer
VAFusion2-master
/*
 * Encog(tm) Core v2.5 - Java Version
 * http://www.heatonresearch.com/encog/
 * http://code.google.com/p/encog-java/
 
 * Copyright 2008-2010 Heaton Research, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *   
 * For more information on Heaton Research copyrights, licenses 
 * and trademarks visit:
 * http://www.heatonresearch.com/copyright
 */

package org.encog.engine.opencl.kernels;

import java.util.Map;

import org.encog.engine.data.BasicEngineData;
import org.encog.engine.data.EngineData;
import org.encog.engine.data.EngineIndexableSet;
import org.encog.engine.network.activation.ActivationFunction;
import org.encog.engine.network.flat.FlatNetwork;
import org.encog.engine.network.train.prop.OpenCLTrainingProfile;
import org.encog.engine.opencl.EncogCLDevice;
import org.encog.engine.opencl.EncogCLQueue;
import org.encog.engine.opencl.exceptions.OpenCLError;
import org.encog.engine.opencl.exceptions.OutOfOpenCLResources;
import org.encog.engine.util.EngineArray;
import org.encog.engine.util.ResourceLoader;
import org.jocl.CLException;
import org.jocl.cl_mem;

/**
 * An OpenCL kernel that is designed to calculate gradients and help train a
 * neural network.
 */
public class KernelNetworkTrain extends EncogKernel {

	/**
	 * The input count.
	 */
	public static final int PARRAY_INPUT_COUNT = 0;

	/**
	 * The output count.
	 */
	public static final int PARRAY_OUTPUT_COUNT = 1;

	/**
	 * The layer count.
	 */
	public static final int PARRAY_LAYER_COUNT = 2;

	/**
	 * Are we learning? 0=no, 1 =yes.
	 */
	public static final int PARRAY_LEARN = 3;

	/**
	 * What is the starting index to train at.
	 */
	public static final int PARRAY_START = 4;

	/**
	 * Items to train per call.
	 */
	public static final int PARRAY_ITEMS_PER = 5;

	/**
	 * Items to train per call.
	 */
	public static final int PARRAY_ITERATIONS = 6;

	/**
	 * A buffer to communicate weights to the kernel.
	 */
	private cl_mem weightInArrayBuffer;

	/**
	 * A buffer to communicate weights from the kernel.
	 */
	private cl_mem weightOutArrayBuffer;

	/**
	 * A buffer to hold the layer index.
	 */
	private cl_mem layerIndexBuffer;

	/**
	 * A buffer to hold the layer counts.
	 */
	private cl_mem layerCountBuffer;

	/**
	 * A buffer to hold the layer feed counts.
	 */
	private cl_mem layerFeedCountBuffer;

	/**
	 * A buffer to hold the weight indexes.
	 */
	private cl_mem weightIndexBuffer;

	/**
	 * A buffer to hold the activations for each of the layers.
	 */
	private cl_mem activationTypeBuffer;

	/**
	 * The temp data in buffer. Temp data that is used while training.
	 */
	private cl_mem tempDataInBuffer;

	/**
	 * The temp data out buffer. Temp data that is used while training.
	 */
	private cl_mem tempDataOutBuffer;

	/**
	 * The weight and bias array for the network.
	 */
	private final float[] weightInArray;

	/**
	 * The weight output array.
	 */
	private final float[] weightOutArray;

	/**
	 * The temp data array. Temp data that is used while training.
	 */
	private float[] tempDataArray;

	/**
	 * The size of all layer deltas.
	 */
	private int layerDeltaSize;

	/**
	 * An array to hold the input to the neural network.
	 */
	private final float[] inputArray;

	/**
	 * An array to hold the ideal values expected from the network.
	 */
	private final float[] idealArray;

	/**
	 * The input buffer.
	 */
	private cl_mem inputBuffer;

	/**
	 * The ideal buffer.
	 */
	private cl_mem idealBuffer;

	/**
	 * Holds parameters passed to the kernel.
	 */
	private final int[] paramArray;

	/**
	 * A buffer to hold the parameters.
	 */
	private cl_mem paramBuffer;

	/**
	 * A buffer to hold the errors.
	 */
	private cl_mem errorBuffer;

	/**
	 * A buffer to hold the gradients.
	 */
	private cl_mem gradientOutBuffer;

	/**
	 * The gradient input buffer.
	 */
	private cl_mem gradientInBuffer;

	/**
	 * The network to train.
	 */
	private final FlatNetwork flat;

	/**
	 * The training errors for this workload.
	 */
	private float[] errors;

	/**
	 * The gradients.
	 */
	private final float[] gradients;

	/**
	 * The training data to use.
	 */
	private final EngineIndexableSet training;

	/**
	 * The device to train with.
	 */
	private final EncogCLDevice device;

	/**
	 * The length of the training data.
	 */
	private final int trainingLength;

	/**
	 * Construct a kernel to train the network.
	 * 
	 * @param device
	 *            The OpenCL device to use.
	 * @param flat
	 *            The network to train.
	 * @param training
	 *            The training data.
	 * @param tempDataSize
	 *            How much temp data.
	 */
	public KernelNetworkTrain(final EncogCLDevice device,
			final FlatNetwork flat, final EngineIndexableSet training,
			final int tempDataSize) {
		super(device, "org/encog/engine/resources/KernelNetTrain.txt",
				"NetworkTrain");

		this.training = training;
		this.trainingLength = (int) this.training.getRecordCount();
		this.device = device;
		this.flat = flat;
		this.weightInArray = new float[flat.getWeights().length];
		this.weightOutArray = new float[flat.getWeights().length];
		this.tempDataArray = new float[tempDataSize];
		this.gradients = new float[flat.getWeights().length];

		this.layerDeltaSize = 0;
		for (int i = 0; i < flat.getLayerCounts().length; i++) {
			this.layerDeltaSize += flat.getLayerCounts()[i];
		}

		final int inputSize = flat.getInputCount();
		final int idealSize = flat.getOutputCount();

		this.inputArray = new float[inputSize * this.trainingLength];
		this.idealArray = new float[idealSize * this.trainingLength];
		this.paramArray = new int[10];

		final EngineData pair = BasicEngineData.createPair(
				flat.getInputCount(), flat.getOutputCount());

		int inputIndex = 0;
		int idealIndex = 0;

		for (int i = 0; i < this.trainingLength; i++) {
			training.getRecord(i, pair);
			for (int col = 0; col < flat.getInputCount(); col++) {
				this.inputArray[inputIndex++] = (float) pair.getInputArray()[col];
			}

			for (int col = 0; col < flat.getOutputCount(); col++) {
				this.idealArray[idealIndex++] = (float) pair.getIdealArray()[col];
			}
		}

	}

	/**
	 * Assign the workgroup sizes based on the training set size.
	 * 
	 * @param trainingSize
	 *            The training set size.
	 * @param requestedGlobalSize
	 *            The requested global size.
	 */
	public void assignWorkgroupSizes(final int trainingSize,
			final int requestedGlobalSize) {
		// Calculate the work-item dimensions
		final int threads = Math.min(trainingSize, requestedGlobalSize);
		setLocalWork(Math.min(getMaxWorkGroupSize(), threads));
		setGlobalWork(threads);
	}

	/**
	 * Calculate one iteration over the specified range.
	 * 
	 * @param start
	 *            The starting position to calculate for.
	 * @param size
	 *            The ending position to calculate for.
	 * @param iterations
	 *            The number of iterations to execute.
	 * @param learn
	 *            True, if we should learn.
	 */
	public void calculate(final int start, final int size, final boolean learn,
			final int iterations) {
		prepareKernel();

		this.paramArray[KernelNetworkTrain.PARRAY_LEARN] = learn ? 1 : 0;
		this.paramArray[KernelNetworkTrain.PARRAY_START] = start;
		this.paramArray[KernelNetworkTrain.PARRAY_ITEMS_PER] = size;
		this.paramArray[KernelNetworkTrain.PARRAY_ITERATIONS] = iterations;

		EngineArray.arrayCopy(this.flat.getWeights(), this.weightInArray);

		setArg(0, this.paramBuffer);
		setArg(1, this.errorBuffer);
		setArg(2, this.layerIndexBuffer);
		setArg(3, this.layerCountBuffer);
		setArg(4, this.layerFeedCountBuffer);
		setArg(5, this.weightIndexBuffer);
		setArg(6, this.inputBuffer);
		setArg(7, this.idealBuffer);
		setArg(8, this.weightInArrayBuffer);
		setArg(9, this.weightOutArrayBuffer);
		setArg(10, this.gradientOutBuffer);
		setArg(11, this.activationTypeBuffer);
		setArg(12, this.tempDataInBuffer);
		setArg(13, this.tempDataOutBuffer);
		setArg(14, this.gradientInBuffer);

		try {
			final EncogCLQueue queue = this.device.getQueue();

			EngineArray.fill(this.gradients, 0);

			if (learn) {
				this.paramArray[3] = 1;
			} else {
				this.paramArray[3] = 0;
			}

			this.paramArray[4] = start;

			queue.array2Buffer(this.weightInArray, this.weightInArrayBuffer);
			queue.array2Buffer(this.tempDataArray, this.tempDataInBuffer);
			queue.array2Buffer(this.gradients, this.gradientInBuffer);
			queue.array2Buffer(this.paramArray, this.paramBuffer);

			// Execute the kernel
			queue.execute(this);
			queue.waitFinish();

			// Read the results
			queue.buffer2Array(this.errorBuffer, this.errors);
			queue.buffer2Array(this.weightOutArrayBuffer, this.weightOutArray);
			queue.buffer2Array(this.tempDataOutBuffer, this.tempDataArray);
			queue.buffer2Array(this.gradientOutBuffer, this.gradients);

		} catch (final CLException e) {
			if (e.getMessage().equals("CL_OUT_OF_RESOURCES")) {
				throw new OutOfOpenCLResources(e);
			} else {
				throw new OpenCLError(e);
			}
		} catch (final Exception e) {
			throw new OpenCLError(e);
		}
	}

	/**
	 * Compile the kernel.
	 * 
	 * @param options
	 *            The options.
	 * @param profile
	 *            The OpenCL training profile.
	 * @param network
	 *            The network to compile for.
	 */
	public void compile(final Map<String, String> options,
			final OpenCLTrainingProfile profile, final FlatNetwork network) {

		final ActivationFunction activation = network.getActivationFunctions()[0];
		final StringBuilder source = new StringBuilder();

		source.append("#define ACTIVATION(x,slope)");
		source.append(activation.getOpenCLExpression(false));
		source.append("\r\n");

		source.append("#define DERIVATIVE(x,slope)");
		source.append(activation.getOpenCLExpression(true));
		source.append("\r\n");

		source.append(ResourceLoader.loadString(getSourceName()));
		setCLSource(source.toString());

		compile(options);
		profile.calculateKernelParams(this, this.training);
		// setup
		init(profile);
	}

	/**
	 * @return the errors
	 */
	public float[] getErrors() {
		return this.errors;
	}

	/**
	 * @return the tempDataArray
	 */
	public float[] getTempDataArray() {
		return this.tempDataArray;
	}

	/**
	 * @return the weightOutArray
	 */
	public float[] getWeightOutArray() {
		return this.weightOutArray;
	}

	/**
	 * Setup the kernel.
	 * @param profile The OpenCL training profile.
	 */
	public void init(final OpenCLTrainingProfile profile) {
		final int errorSize = profile.getKernelGlobalWorkgroup();
		final int gradientSize = profile.getKernelGlobalWorkgroup()
				* this.flat.getWeights().length;

		this.errors = new float[errorSize];

		this.paramArray[0] = this.flat.getInputCount();
		this.paramArray[1] = this.flat.getOutputCount();
		this.paramArray[2] = this.flat.getLayerCounts().length;

		// create the buffers
		this.inputBuffer = createArrayReadOnly(this.inputArray);
		this.idealBuffer = createArrayReadOnly(this.idealArray);
		this.errorBuffer = createFloatArrayWriteOnly(errorSize);
		this.gradientOutBuffer = createFloatArrayWriteOnly(gradientSize);
		this.gradientInBuffer = createArrayReadOnly(this.gradients);
		this.paramBuffer = createArrayReadOnly(this.paramArray);
		this.layerIndexBuffer = createArrayReadOnly(this.flat.getLayerIndex());
		this.layerCountBuffer = createArrayReadOnly(this.flat.getLayerCounts());
		this.layerFeedCountBuffer = createArrayReadOnly(this.flat
				.getLayerFeedCounts());
		this.weightInArrayBuffer = createArrayReadOnly(this.weightInArray);
		this.weightOutArrayBuffer = createFloatArrayWriteOnly(this.weightInArray.length);
		this.weightIndexBuffer = createArrayReadOnly(this.flat.getWeightIndex());
		this.activationTypeBuffer = createArrayReadOnly(this.flat
				.getLayerCounts());
		this.tempDataInBuffer = createArrayReadOnly(this.tempDataArray);
		this.tempDataOutBuffer = createFloatArrayWriteOnly(this.tempDataArray.length);
	}

	/**
	 * Release the kernel and all buffers.
	 */
	@Override
	public void release() {
		super.release();
		releaseBuffer(this.activationTypeBuffer);
		releaseBuffer(this.errorBuffer);
		releaseBuffer(this.gradientOutBuffer);
		releaseBuffer(this.gradientInBuffer);
		releaseBuffer(this.idealBuffer);
		releaseBuffer(this.inputBuffer);
		releaseBuffer(this.layerCountBuffer);
		releaseBuffer(this.layerFeedCountBuffer);
		releaseBuffer(this.layerIndexBuffer);
		releaseBuffer(this.paramBuffer);
		releaseBuffer(this.tempDataInBuffer);
		releaseBuffer(this.tempDataOutBuffer);
		releaseBuffer(this.weightInArrayBuffer);
		releaseBuffer(this.weightIndexBuffer);
		releaseBuffer(this.weightOutArrayBuffer);
	}

	/**
	 * @param tempDataArray
	 *            the tempDataArray to set
	 */
	public void setTempDataArray(final float[] tempDataArray) {
		this.tempDataArray = tempDataArray;
	}
}