package org.deeplearning4j.examples.multigpu.w2vsentiment; import org.apache.commons.io.FilenameUtils; import org.deeplearning4j.eval.Evaluation; import org.deeplearning4j.nn.conf.GradientNormalization; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.layers.GravesLSTM; import org.deeplearning4j.nn.conf.layers.RnnOutputLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.optimize.listeners.PerformanceListener; import org.deeplearning4j.parallelism.ParallelWrapper; import org.nd4j.jita.conf.CudaEnvironment; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.dataset.ExistingMiniBatchDataSetIterator; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.lossfunctions.LossFunctions; import org.slf4j.Logger; import java.io.File; /**Example: Given a movie review (raw text), classify that movie review as either positive or negative based on the words it contains. * This is done by combining Word2Vec vectors and a recurrent neural network model. Each word in a review is vectorized * (using the Word2Vec model) and fed into a recurrent neural network. * Training data is the "Large Movie Review Dataset" from http://ai.stanford.edu/~amaas/data/sentiment/ * This data set contains 25,000 training reviews + 25,000 testing reviews * * Process: * 1. Automatic on first run of example: Download data (movie reviews) + extract * 2. Load existing Word2Vec model (for example: Google News word vectors. You will have to download this MANUALLY) * 3. Load each each review. Convert words to vectors + reviews to sequences of vectors * 4. Train network * * With the current configuration, gives approx. 83% accuracy after 1 epoch. Better performance may be possible with * additional tuning. * * NOTE / INSTRUCTIONS: * You will have to download the Google News word vector model manually. ~1.5GB * The Google News vector model available here: https://code.google.com/p/word2vec/ * Download the GoogleNews-vectors-negative300.bin.gz file * Then: set the WORD_VECTORS_PATH field to point to this location. * * @author Alex Black */ public class Word2VecSentimentRNN { private static final Logger log = org.slf4j.LoggerFactory.getLogger(Word2VecSentimentRNN.class); public static final String TRAIN_PATH = FilenameUtils.concat(System.getProperty("java.io.tmpdir"), "dl4j_w2vSentiment_train/"); public static final String TEST_PATH = FilenameUtils.concat(System.getProperty("java.io.tmpdir"), "dl4j_w2vSentiment_test/"); public static void main(String[] args) throws Exception { int vectorSize = 300; //Size of the word vectors. 300 in the Google News model int nEpochs = 1; //Number of epochs (full passes of training data) to train on // Nd4j.setDataType(DataBuffer.Type.DOUBLE); CudaEnvironment.getInstance().getConfiguration() // key option enabled .allowMultiGPU(true) // we're allowing larger memory caches .setMaximumDeviceCache(2L * 1024L * 1024L * 1024L) // cross-device access is used for faster model averaging over pcie .allowCrossDeviceAccess(true); //Set up network configuration MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .updater(Updater.ADAM).adamMeanDecay(0.9).adamVarDecay(0.999) .regularization(true).l2(1e-5) .weightInit(WeightInit.XAVIER) .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(1.0) .learningRate(2e-2) .list() .layer(0, new GravesLSTM.Builder().nIn(vectorSize).nOut(256) .activation(Activation.TANH).build()) .layer(1, new RnnOutputLayer.Builder().activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).nIn(256).nOut(2).build()) .pretrain(false).backprop(true).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); net.setListeners(new PerformanceListener(10, true)); //DataSetIterators for training and testing respectively DataSetIterator train = new ExistingMiniBatchDataSetIterator(new File(TRAIN_PATH)); DataSetIterator test = new ExistingMiniBatchDataSetIterator(new File(TEST_PATH)); ParallelWrapper pw = new ParallelWrapper.Builder<>(net) .prefetchBuffer(16 * Nd4j.getAffinityManager().getNumberOfDevices()) .reportScoreAfterAveraging(true) .averagingFrequency(10) .useLegacyAveraging(false) .useMQ(true) .workers(Nd4j.getAffinityManager().getNumberOfDevices()) .build(); log.info("Starting training..."); for (int i = 0; i < nEpochs; i++) { pw.fit(train); train.reset(); } log.info("Starting evaluation..."); //Run evaluation. This is on 25k reviews, so can take some time Evaluation evaluation = new Evaluation(); while (test.hasNext()) { DataSet t = test.next(); INDArray features = t.getFeatureMatrix(); INDArray lables = t.getLabels(); INDArray inMask = t.getFeaturesMaskArray(); INDArray outMask = t.getLabelsMaskArray(); INDArray predicted = net.output(features, false, inMask, outMask); evaluation.evalTimeSeries(lables, predicted, outMask); } System.out.println(evaluation.stats()); } }