package org.deeplearning4j.examples.multigpu.w2vsentiment;
import org.apache.commons.io.FilenameUtils;
import org.deeplearning4j.eval.Evaluation;
import org.deeplearning4j.nn.conf.GradientNormalization;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.layers.GravesLSTM;
import org.deeplearning4j.nn.conf.layers.RnnOutputLayer;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.deeplearning4j.nn.weights.WeightInit;
import org.deeplearning4j.optimize.listeners.PerformanceListener;
import org.deeplearning4j.parallelism.ParallelWrapper;
import org.nd4j.jita.conf.CudaEnvironment;
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.dataset.ExistingMiniBatchDataSetIterator;
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import org.slf4j.Logger;
import java.io.File;
/**Example: Given a movie review (raw text), classify that movie review as either positive or negative based on the words it contains.
* This is done by combining Word2Vec vectors and a recurrent neural network model. Each word in a review is vectorized
* (using the Word2Vec model) and fed into a recurrent neural network.
* Training data is the "Large Movie Review Dataset" from http://ai.stanford.edu/~amaas/data/sentiment/
* This data set contains 25,000 training reviews + 25,000 testing reviews
*
* Process:
* 1. Automatic on first run of example: Download data (movie reviews) + extract
* 2. Load existing Word2Vec model (for example: Google News word vectors. You will have to download this MANUALLY)
* 3. Load each each review. Convert words to vectors + reviews to sequences of vectors
* 4. Train network
*
* With the current configuration, gives approx. 83% accuracy after 1 epoch. Better performance may be possible with
* additional tuning.
*
* NOTE / INSTRUCTIONS:
* You will have to download the Google News word vector model manually. ~1.5GB
* The Google News vector model available here: https://code.google.com/p/word2vec/
* Download the GoogleNews-vectors-negative300.bin.gz file
* Then: set the WORD_VECTORS_PATH field to point to this location.
*
* @author Alex Black
*/
public class Word2VecSentimentRNN {
private static final Logger log = org.slf4j.LoggerFactory.getLogger(Word2VecSentimentRNN.class);
public static final String TRAIN_PATH = FilenameUtils.concat(System.getProperty("java.io.tmpdir"), "dl4j_w2vSentiment_train/");
public static final String TEST_PATH = FilenameUtils.concat(System.getProperty("java.io.tmpdir"), "dl4j_w2vSentiment_test/");
public static void main(String[] args) throws Exception {
int vectorSize = 300; //Size of the word vectors. 300 in the Google News model
int nEpochs = 1; //Number of epochs (full passes of training data) to train on
// Nd4j.setDataType(DataBuffer.Type.DOUBLE);
CudaEnvironment.getInstance().getConfiguration()
// key option enabled
.allowMultiGPU(true)
// we're allowing larger memory caches
.setMaximumDeviceCache(2L * 1024L * 1024L * 1024L)
// cross-device access is used for faster model averaging over pcie
.allowCrossDeviceAccess(true);
//Set up network configuration
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.updater(Updater.ADAM).adamMeanDecay(0.9).adamVarDecay(0.999)
.regularization(true).l2(1e-5)
.weightInit(WeightInit.XAVIER)
.gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(1.0)
.learningRate(2e-2)
.list()
.layer(0, new GravesLSTM.Builder().nIn(vectorSize).nOut(256)
.activation(Activation.TANH).build())
.layer(1, new RnnOutputLayer.Builder().activation(Activation.SOFTMAX)
.lossFunction(LossFunctions.LossFunction.MCXENT).nIn(256).nOut(2).build())
.pretrain(false).backprop(true).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
net.setListeners(new PerformanceListener(10, true));
//DataSetIterators for training and testing respectively
DataSetIterator train = new ExistingMiniBatchDataSetIterator(new File(TRAIN_PATH));
DataSetIterator test = new ExistingMiniBatchDataSetIterator(new File(TEST_PATH));
ParallelWrapper pw = new ParallelWrapper.Builder<>(net)
.prefetchBuffer(16 * Nd4j.getAffinityManager().getNumberOfDevices())
.reportScoreAfterAveraging(true)
.averagingFrequency(10)
.useLegacyAveraging(false)
.useMQ(true)
.workers(Nd4j.getAffinityManager().getNumberOfDevices())
.build();
log.info("Starting training...");
for (int i = 0; i < nEpochs; i++) {
pw.fit(train);
train.reset();
}
log.info("Starting evaluation...");
//Run evaluation. This is on 25k reviews, so can take some time
Evaluation evaluation = new Evaluation();
while (test.hasNext()) {
DataSet t = test.next();
INDArray features = t.getFeatureMatrix();
INDArray lables = t.getLabels();
INDArray inMask = t.getFeaturesMaskArray();
INDArray outMask = t.getLabelsMaskArray();
INDArray predicted = net.output(features, false, inMask, outMask);
evaluation.evalTimeSeries(lables, predicted, outMask);
}
System.out.println(evaluation.stats());
}
}