package org.encog.examples.neural.opencl; import org.encog.ConsoleStatusReportable; import org.encog.Encog; import org.encog.engine.util.Stopwatch; import org.encog.neural.data.NeuralDataSet; import org.encog.neural.networks.BasicNetwork; import org.encog.neural.networks.training.concurrent.ConcurrentTrainingManager; import org.encog.neural.networks.training.concurrent.jobs.RPROPJob; import org.encog.neural.networks.training.concurrent.jobs.TrainingJob; import org.encog.neural.networks.training.strategy.end.EndIterationsStrategy; import org.encog.util.benchmark.RandomTrainingFactory; import org.encog.util.logging.Logging; import org.encog.util.simple.EncogUtility; /** * Demonstrates concurrent training. Will make use of multiple OpenCL devices, as well as your CPU. * */ public class BenchmarkConcurrent { public static final int OUTPUT_SIZE = 2; public static final int INPUT_SIZE = 10; public static final int HIDDEN1 = 6; public static final int HIDDEN2 = 0; public static final int TRAINING_SIZE = 1000; public static final int ITERATIONS = 1000; public static final int JOBS = 50; /** * Iterations per cycle. Higher numbers load up OpenCL more, but too high may * timeout your GPU, if your OS has a timeout. Do not set this higher than one * unless you are using the MAX openCL ratio of 1.0, otherwise it is pointless, * and will throw an error. */ public static final int ITERATIONS_PER = 1; public static final double LOCAL_RATIO = 1.0; public static final int GLOBAL_RATIO = 1; public static final double SEGMENTATION_RATIO = 1.0; /** * Max cores to use, 0=autodetect, -1=no CPU cores, other number is the # of cores. */ public static final int MAX_CORES = 0; public TrainingJob generateTrainingJob(ConcurrentTrainingManager manager) { NeuralDataSet training = RandomTrainingFactory.generate(1000, TRAINING_SIZE, INPUT_SIZE, OUTPUT_SIZE, -1, 1); BasicNetwork network = EncogUtility.simpleFeedForward( training.getInputSize(), HIDDEN1, HIDDEN2, training.getIdealSize(), true); network.reset(); RPROPJob job = new RPROPJob(network,training,true,LOCAL_RATIO,GLOBAL_RATIO,SEGMENTATION_RATIO,ITERATIONS_PER); job.getStrategies().add(new EndIterationsStrategy(ITERATIONS)); manager.addTrainingJob(job); return job; } public int benchmark(boolean splitCores) { Stopwatch stopWatch = new Stopwatch(); stopWatch.start(); ConcurrentTrainingManager manager = ConcurrentTrainingManager .getInstance(); manager.setReport(new ConsoleStatusReportable()); manager.detectPerformers(splitCores, 0); System.out.println("Device(s) in use:"); System.out.println(manager.toString()); manager.clearQueue(); for (int i = 0; i < JOBS; i++) generateTrainingJob(manager); manager.start(); System.out.println("Manager has started."); manager.join(); System.out.println("Manager has stopped."); stopWatch.stop(); return (int) (stopWatch.getElapsedMilliseconds() / 1000); } public void run() { Logging.stopConsoleLogging(); System.out.println("* * * Performing CPU-Only Test * * *"); int cpu = benchmark(false); System.out.println("CPU-only took: " + cpu + " seconds."); System.out.println(); System.out.println("* * * Performing CPU-Only(split cores) Test * * *"); int cpuSplit = benchmark(true); System.out.println("CPU-only(split cores took: " + cpuSplit + " seconds."); Logging.stopConsoleLogging(); Encog.getInstance().initCL(); System.out.println(); System.out.println("* * * Performing OpenCL Test * * *"); Encog.getInstance().initCL(); int gpu = benchmark(true); System.out.println("OpenCL took: " + gpu + " seconds."); System.out.println(); System.out.println("Final times:"); System.out.println("CPU-Only : " + cpu + "ms"); System.out.println("CPU-Split Cores: " + cpuSplit + "ms"); System.out.println("CPU and OpenCL : " + gpu + "ms"); } public static void main(String[] args) { BenchmarkConcurrent program = new BenchmarkConcurrent(); program.run(); Encog.getInstance().shutdown(); } }