/*
* Encog(tm) Core v2.5 - Java Version
* http://www.heatonresearch.com/encog/
* http://code.google.com/p/encog-java/
* Copyright 2008-2010 Heaton Research, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* For more information on Heaton Research copyrights, licenses
* and trademarks visit:
* http://www.heatonresearch.com/copyright
*/
package org.encog.engine.network.train.prop;
import org.encog.engine.EncogEngine;
import org.encog.engine.data.EngineIndexableSet;
import org.encog.engine.opencl.EncogCLDevice;
import org.encog.engine.opencl.exceptions.OpenCLError;
import org.encog.engine.opencl.kernels.EncogKernel;
/**
* Specifies a training profile for an OpenCL training session. Includes the
* following information.
*
* device The device to use.
*
* local ratio: The local workgroup is a OpenCL concept where the global work
* group is broken into several local work groups. The bigger the local work
* group the faster things will run. However, your OpenCL device will impose a
* maximum local work group size. This ratio allows you to use a smaller local
* work group, for example 0.5 would be half of the max size of the local work
* group. You will almost always want to leave this value at the max 1.0. It is
* rare that you might need to decrease it because of the GPU being overtaxed.
*
*
* global ratio: The global work group must be a multiple of the local work
* group. The default value is 1, which means local and global workgroups the
* same size. Do not set this value lower than 1.0. Values higher than 1.0 can
* result in higher performance. Should be set to an integer value. For example,
* 2 would specify a global work workgroup twice the size of the local. Higher
* values will increase resource load on the GPU and may crash.
*
* segmentation ratio: The main purpose of this ratio is to allow you to scale
* back on how long the kernels take to execute. For maximum performance leave
* this value at the default 1.0 value. However, if your GPU is crashing,
* setting it to a value lower can help. If your are running Encog on the same
* GPU as your display uses, you may run into timeout issues if your kernel
* takes too long to execute. Setting this ratio lower can help.
*
*/
public class OpenCLTrainingProfile {
/**
* The OpenCL device to use.
*/
private EncogCLDevice device;
/**
* The local ratio
*/
private final double localRatio;
/**
* The global ratio.
*/
private final int globalRatio;
/**
* The segmentation ratio.
*/
private final double segmentationRatio;
/**
* The calculated size of the global workgroup.
*/
private int kernelGlobalWorkgroup;
/**
* The calculated size of the local workgroup.
*/
private int kernelLocalWorkgroup;
/**
* The number of training items processed per call.
*/
private int kernelWorkPerCall;
/**
* The number of calls to the kernel that will be made. The number of
* segments.
*/
private int kernelNumberOfCalls;
/**
* The number of items in the remainder.
*/
private int kernelRemainder;
/**
* The size of the global and local workgroups for the remainder.
*/
private int kernelRemainderGlobal;
/**
* The number of training items processed per call in the remainder.
*/
private int kernelRemainderPer;
/**
* Construct a training profile with the specified device and the value of
* one for all ratios.
*
* @param device
* The device to use.
*/
public OpenCLTrainingProfile(final EncogCLDevice device) {
this(device, 1.0, 1, 1.0);
}
/**
* Construct a training profile.
*
* @param device
* The device to use.
* @param localRatio
* The local ratio.
* @param globalRatio
* The global ratio.
* @param segmentationRatio
* The segmentation ratio.
*/
public OpenCLTrainingProfile(final EncogCLDevice device,
final double localRatio, final int globalRatio,
final double segmentationRatio) {
super();
this.device = device;
if ((localRatio < 0) || (globalRatio < 0) || (segmentationRatio < 0)) {
throw new OpenCLError("None of the ratios can be below zero.");
}
if (localRatio > 1.0) {
throw new OpenCLError(
"The local ratio cannot be greater than 1.0. That would cause the OpenCL device to have more local items than it can handle.");
}
if (globalRatio < 1.0) {
throw new OpenCLError(
"The global ratio cannot be less than 1.0. That would cause the global work area to be less than a local work area.");
}
if (segmentationRatio > 1.0) {
throw new OpenCLError(
"The segmentation ratio cannot be greater than 1.0. That would cause the trainer to require more training elements per iteration than exist.");
}
this.localRatio = localRatio;
this.globalRatio = globalRatio;
this.segmentationRatio = segmentationRatio;
}
/**
* Calculate the kernel values.
*
* @param kernel
* The kernel to calculate for.
* @param training
* The training params to use.
*/
public void calculateKernelParams(final EncogKernel kernel,
final EngineIndexableSet training) {
boolean globalValuesAssigned = false;
int workPerIteration;
// there are two special cases
// first, if the ratio is 1.0
if (Math.abs(this.segmentationRatio - 1.0) < EncogEngine.DEFAULT_ZERO_TOLERANCE) {
// if the segmentation ratio is 1, then we want NO SEGMENTATION
// we will have to find a workgroup size that is even
int trialLocalSize = (int) Math.min(kernel.getMaxWorkGroupSize(),
training.getRecordCount());
trialLocalSize++;// falsely add one so the loop can decrease it
// with
// no effect.
// loop and try to find a local size small enough to be even.
do {
trialLocalSize--;
this.kernelLocalWorkgroup = (int) (trialLocalSize * this.localRatio);
this.kernelGlobalWorkgroup = (this.kernelLocalWorkgroup * this.globalRatio);
this.kernelWorkPerCall = (int) ((training.getRecordCount() / this.kernelGlobalWorkgroup) * this.segmentationRatio);
workPerIteration = this.kernelGlobalWorkgroup
* this.kernelWorkPerCall;
} while ((workPerIteration != training.getRecordCount())
&& (trialLocalSize > 1));
if (trialLocalSize > 0) {
globalValuesAssigned = true;
}
}
// if we either wanted to segment, or the attempt to find an even group
// size above failed
if (!globalValuesAssigned) {
// otherwise divide into segments
final int maxLocalSize = (int) Math.min(kernel
.getMaxWorkGroupSize(), training.getRecordCount());
this.kernelLocalWorkgroup = (int) (maxLocalSize * this.localRatio);
this.kernelGlobalWorkgroup = (this.kernelLocalWorkgroup * this.globalRatio);
// second special case, if the segmentation ratio is zero, then just
// do one item per OpenCL call
if (this.segmentationRatio < EncogEngine.DEFAULT_ZERO_TOLERANCE) {
this.kernelWorkPerCall = 1;
} else {
this.kernelWorkPerCall = (int) ((training.getRecordCount() / this.kernelGlobalWorkgroup) * this.segmentationRatio);
if( this.kernelWorkPerCall==0 ) {
this.kernelWorkPerCall= 1;
}
}
}
workPerIteration = this.kernelGlobalWorkgroup * this.kernelWorkPerCall;
this.kernelNumberOfCalls = (int) (training.getRecordCount() / workPerIteration);
this.kernelRemainder = (int) (training.getRecordCount() % workPerIteration);
this.kernelRemainderGlobal = this.kernelGlobalWorkgroup;
// if there is no "final training set", because it lined up evenly,
// still create one.
// the final training set is where learning happens.
if (this.kernelRemainder == 0) {
this.kernelRemainder = this.kernelGlobalWorkgroup;
this.kernelRemainderPer = this.kernelWorkPerCall;
this.kernelNumberOfCalls--;
} else {
this.kernelRemainderPer = this.kernelRemainder
/ this.kernelGlobalWorkgroup;
}
// does the remainder not have enough to fill the global tasks global?
if (this.kernelRemainderPer == 0) {
this.kernelRemainderPer = 1;
this.kernelRemainderGlobal = this.kernelRemainder;
}
}
/**
* @return The device to use.
*/
public EncogCLDevice getDevice() {
return this.device;
}
/**
* @return The global ratio.
*/
public int getGlobalRatio() {
return this.globalRatio;
}
/**
* @return The calculated size of the global workgroup.
*/
public int getKernelGlobalWorkgroup() {
return this.kernelGlobalWorkgroup;
}
/**
* @return The calculated size of the local workgroup.
*/
public int getKernelLocalWorkgroup() {
return this.kernelLocalWorkgroup;
}
/**
* @return The number of calls to the kernel that will be made. The number
* of segments.
*/
public int getKernelNumberOfCalls() {
return this.kernelNumberOfCalls;
}
/**
* @return The number of items in the remainder.
*/
public int getKernelRemainder() {
return this.kernelRemainder;
}
/**
* @return The size of the global and local workgroups for the remainder.
*/
public int getKernelRemainderGlobal() {
return this.kernelRemainderGlobal;
}
/**
* @return The number of training items processed per call in the remainder.
*/
public int getKernelRemainderPer() {
return this.kernelRemainderPer;
}
/**
* @return The number of training items processed per call.
*/
public int getKernelWorkPerCall() {
return this.kernelWorkPerCall;
}
/**
* @return The local ratio.
*/
public double getLocalRatio() {
return this.localRatio;
}
/**
* @return The segmentation ratio.
*/
public double getSegmentationRatio() {
return this.segmentationRatio;
}
/**
* Set the device to use.
*
* @param device
* The device to use.
*/
public void setDevice(final EncogCLDevice device) {
this.device = device;
}
/**
* @return All internal values as a string.
*/
@Override
public String toString() {
final StringBuilder result = new StringBuilder();
result.append("OpenCL Profile:\n");
result.append("Local Ratio: ");
result.append(this.localRatio);
result.append("\n");
result.append("Number of global work items: ");
result.append(this.globalRatio);
result.append("\n");
result.append("Segmentation Ratio: ");
result.append(this.segmentationRatio);
result.append("\n");
result.append("Device: ");
result.append(this.device.toString());
result.append("\n");
result.append("kernelGlobalWorkgroup: ");
result.append(this.kernelGlobalWorkgroup);
result.append("\n");
result.append("kernelLocalWorkgroup: ");
result.append(this.kernelLocalWorkgroup);
result.append("\n");
result.append("kernelWorkPerCall: ");
result.append(this.kernelWorkPerCall);
result.append("\n");
result.append("kernelNumberOfCalls: ");
result.append(this.kernelNumberOfCalls);
result.append("\n");
result.append("kernelRemainder: ");
result.append(this.kernelRemainder);
result.append("\n");
result.append("kernelRemainderGlobal: ");
result.append(this.kernelRemainderGlobal);
result.append("\n");
result.append("kernelRemainderPer: ");
result.append(this.kernelRemainderPer);
result.append("\n");
return result.toString();
}
}