OpenCLTrainingProfile.java example

Explorer
VAFusion2-master
/*
 * Encog(tm) Core v2.5 - Java Version
 * http://www.heatonresearch.com/encog/
 * http://code.google.com/p/encog-java/
 
 * Copyright 2008-2010 Heaton Research, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *   
 * For more information on Heaton Research copyrights, licenses 
 * and trademarks visit:
 * http://www.heatonresearch.com/copyright
 */
package org.encog.engine.network.train.prop;

import org.encog.engine.EncogEngine;
import org.encog.engine.data.EngineIndexableSet;
import org.encog.engine.opencl.EncogCLDevice;
import org.encog.engine.opencl.exceptions.OpenCLError;
import org.encog.engine.opencl.kernels.EncogKernel;

/**
 * Specifies a training profile for an OpenCL training session. Includes the
 * following information.
 * 
 * device The device to use.
 * 
 * local ratio: The local workgroup is a OpenCL concept where the global work
 * group is broken into several local work groups. The bigger the local work
 * group the faster things will run. However, your OpenCL device will impose a
 * maximum local work group size. This ratio allows you to use a smaller local
 * work group, for example 0.5 would be half of the max size of the local work
 * group. You will almost always want to leave this value at the max 1.0. It is
 * rare that you might need to decrease it because of the GPU being overtaxed.
 * 
 * 
 * global ratio: The global work group must be a multiple of the local work
 * group. The default value is 1, which means local and global workgroups the
 * same size. Do not set this value lower than 1.0. Values higher than 1.0 can
 * result in higher performance. Should be set to an integer value. For example,
 * 2 would specify a global work workgroup twice the size of the local. Higher
 * values will increase resource load on the GPU and may crash.
 * 
 * segmentation ratio: The main purpose of this ratio is to allow you to scale
 * back on how long the kernels take to execute. For maximum performance leave
 * this value at the default 1.0 value. However, if your GPU is crashing,
 * setting it to a value lower can help. If your are running Encog on the same
 * GPU as your display uses, you may run into timeout issues if your kernel
 * takes too long to execute. Setting this ratio lower can help.
 * 
 */
public class OpenCLTrainingProfile {

	/**
	 * The OpenCL device to use.
	 */
	private EncogCLDevice device;

	/**
	 * The local ratio
	 */
	private final double localRatio;

	/**
	 * The global ratio.
	 */
	private final int globalRatio;

	/**
	 * The segmentation ratio.
	 */
	private final double segmentationRatio;

	/**
	 * The calculated size of the global workgroup.
	 */
	private int kernelGlobalWorkgroup;

	/**
	 * The calculated size of the local workgroup.
	 */
	private int kernelLocalWorkgroup;

	/**
	 * The number of training items processed per call.
	 */
	private int kernelWorkPerCall;

	/**
	 * The number of calls to the kernel that will be made. The number of
	 * segments.
	 */
	private int kernelNumberOfCalls;

	/**
	 * The number of items in the remainder.
	 */
	private int kernelRemainder;

	/**
	 * The size of the global and local workgroups for the remainder.
	 */
	private int kernelRemainderGlobal;

	/**
	 * The number of training items processed per call in the remainder.
	 */
	private int kernelRemainderPer;

	/**
	 * Construct a training profile with the specified device and the value of
	 * one for all ratios.
	 * 
	 * @param device
	 *            The device to use.
	 */
	public OpenCLTrainingProfile(final EncogCLDevice device) {
		this(device, 1.0, 1, 1.0);
	}

	/**
	 * Construct a training profile.
	 * 
	 * @param device
	 *            The device to use.
	 * @param localRatio
	 *            The local ratio.
	 * @param globalRatio
	 *            The global ratio.
	 * @param segmentationRatio
	 *            The segmentation ratio.
	 */
	public OpenCLTrainingProfile(final EncogCLDevice device,
			final double localRatio, final int globalRatio,
			final double segmentationRatio) {
		super();
		this.device = device;

		if ((localRatio < 0) || (globalRatio < 0) || (segmentationRatio < 0)) {
			throw new OpenCLError("None of the ratios can be below zero.");
		}

		if (localRatio > 1.0) {
			throw new OpenCLError(
					"The local ratio cannot be greater than 1.0.  That would cause the OpenCL device to have more local items than it can handle.");
		}

		if (globalRatio < 1.0) {
			throw new OpenCLError(
					"The global ratio cannot be less than 1.0.  That would cause the global work area to be less than a local work area.");
		}

		if (segmentationRatio > 1.0) {
			throw new OpenCLError(
					"The segmentation ratio cannot be greater than 1.0.  That would cause the trainer to require more training elements per iteration than exist.");
		}

		this.localRatio = localRatio;
		this.globalRatio = globalRatio;
		this.segmentationRatio = segmentationRatio;
	}

	/**
	 * Calculate the kernel values.
	 * 
	 * @param kernel
	 *            The kernel to calculate for.
	 * @param training
	 *            The training params to use.
	 */
	public void calculateKernelParams(final EncogKernel kernel,
			final EngineIndexableSet training) {
		boolean globalValuesAssigned = false;
		int workPerIteration;

		// there are two special cases

		// first, if the ratio is 1.0
		if (Math.abs(this.segmentationRatio - 1.0) < EncogEngine.DEFAULT_ZERO_TOLERANCE) {
			// if the segmentation ratio is 1, then we want NO SEGMENTATION
			// we will have to find a workgroup size that is even
			int trialLocalSize = (int) Math.min(kernel.getMaxWorkGroupSize(),
					training.getRecordCount());

			trialLocalSize++;// falsely add one so the loop can decrease it
								// with
			// no effect.

			// loop and try to find a local size small enough to be even.
			do {
				trialLocalSize--;
				this.kernelLocalWorkgroup = (int) (trialLocalSize * this.localRatio);
				this.kernelGlobalWorkgroup = (this.kernelLocalWorkgroup * this.globalRatio);
				this.kernelWorkPerCall = (int) ((training.getRecordCount() / this.kernelGlobalWorkgroup) * this.segmentationRatio);
				workPerIteration = this.kernelGlobalWorkgroup
						* this.kernelWorkPerCall;
			} while ((workPerIteration != training.getRecordCount())
					&& (trialLocalSize > 1));

			if (trialLocalSize > 0) {
				globalValuesAssigned = true;
			}
		}

		// if we either wanted to segment, or the attempt to find an even group
		// size above failed
		if (!globalValuesAssigned) {
			// otherwise divide into segments
			final int maxLocalSize = (int) Math.min(kernel
					.getMaxWorkGroupSize(), training.getRecordCount());
			this.kernelLocalWorkgroup = (int) (maxLocalSize * this.localRatio);
			this.kernelGlobalWorkgroup = (this.kernelLocalWorkgroup * this.globalRatio);

			// second special case, if the segmentation ratio is zero, then just
			// do one item per OpenCL call
			if (this.segmentationRatio < EncogEngine.DEFAULT_ZERO_TOLERANCE) {
				this.kernelWorkPerCall = 1;
			} else {
				this.kernelWorkPerCall = (int) ((training.getRecordCount() / this.kernelGlobalWorkgroup) * this.segmentationRatio);
				if( this.kernelWorkPerCall==0 ) {
					this.kernelWorkPerCall= 1;
				}
			}
		}

		workPerIteration = this.kernelGlobalWorkgroup * this.kernelWorkPerCall;

		this.kernelNumberOfCalls = (int) (training.getRecordCount() / workPerIteration);
		this.kernelRemainder = (int) (training.getRecordCount() % workPerIteration);

		this.kernelRemainderGlobal = this.kernelGlobalWorkgroup;

		// if there is no "final training set", because it lined up evenly,
		// still create one.
		// the final training set is where learning happens.
		if (this.kernelRemainder == 0) {
			this.kernelRemainder = this.kernelGlobalWorkgroup;
			this.kernelRemainderPer = this.kernelWorkPerCall;
			this.kernelNumberOfCalls--;
		} else {
			this.kernelRemainderPer = this.kernelRemainder
					/ this.kernelGlobalWorkgroup;
		}

		// does the remainder not have enough to fill the global tasks global?
		if (this.kernelRemainderPer == 0) {
			this.kernelRemainderPer = 1;
			this.kernelRemainderGlobal = this.kernelRemainder;
		}
	}

	/**
	 * @return The device to use.
	 */
	public EncogCLDevice getDevice() {
		return this.device;
	}

	/**
	 * @return The global ratio.
	 */
	public int getGlobalRatio() {
		return this.globalRatio;
	}

	/**
	 * @return The calculated size of the global workgroup.
	 */
	public int getKernelGlobalWorkgroup() {
		return this.kernelGlobalWorkgroup;
	}

	/**
	 * @return The calculated size of the local workgroup.
	 */
	public int getKernelLocalWorkgroup() {
		return this.kernelLocalWorkgroup;
	}

	/**
	 * @return The number of calls to the kernel that will be made. The number
	 *         of segments.
	 */
	public int getKernelNumberOfCalls() {
		return this.kernelNumberOfCalls;
	}

	/**
	 * @return The number of items in the remainder.
	 */
	public int getKernelRemainder() {
		return this.kernelRemainder;
	}

	/**
	 * @return The size of the global and local workgroups for the remainder.
	 */
	public int getKernelRemainderGlobal() {
		return this.kernelRemainderGlobal;
	}

	/**
	 * @return The number of training items processed per call in the remainder.
	 */
	public int getKernelRemainderPer() {
		return this.kernelRemainderPer;
	}

	/**
	 * @return The number of training items processed per call.
	 */
	public int getKernelWorkPerCall() {
		return this.kernelWorkPerCall;
	}

	/**
	 * @return The local ratio.
	 */
	public double getLocalRatio() {
		return this.localRatio;
	}

	/**
	 * @return The segmentation ratio.
	 */
	public double getSegmentationRatio() {
		return this.segmentationRatio;
	}

	/**
	 * Set the device to use.
	 * 
	 * @param device
	 *            The device to use.
	 */
	public void setDevice(final EncogCLDevice device) {
		this.device = device;
	}

	/**
	 * @return All internal values as a string.
	 */
	@Override
	public String toString() {
		final StringBuilder result = new StringBuilder();
		result.append("OpenCL Profile:\n");
		result.append("Local Ratio: ");
		result.append(this.localRatio);
		result.append("\n");
		result.append("Number of global work items: ");
		result.append(this.globalRatio);
		result.append("\n");
		result.append("Segmentation Ratio: ");
		result.append(this.segmentationRatio);
		result.append("\n");
		result.append("Device: ");
		result.append(this.device.toString());
		result.append("\n");

		result.append("kernelGlobalWorkgroup: ");
		result.append(this.kernelGlobalWorkgroup);
		result.append("\n");

		result.append("kernelLocalWorkgroup: ");
		result.append(this.kernelLocalWorkgroup);
		result.append("\n");

		result.append("kernelWorkPerCall: ");
		result.append(this.kernelWorkPerCall);
		result.append("\n");

		result.append("kernelNumberOfCalls: ");
		result.append(this.kernelNumberOfCalls);
		result.append("\n");

		result.append("kernelRemainder: ");
		result.append(this.kernelRemainder);
		result.append("\n");

		result.append("kernelRemainderGlobal: ");
		result.append(this.kernelRemainderGlobal);
		result.append("\n");

		result.append("kernelRemainderPer: ");
		result.append(this.kernelRemainderPer);
		result.append("\n");

		return result.toString();
	}

}