package edu.stanford.nlp.benchmarks;
import edu.stanford.nlp.classify.Dataset;
import edu.stanford.nlp.classify.LinearClassifierFactory;
import edu.stanford.nlp.classify.RVFDataset;
import edu.stanford.nlp.ie.crf.CRFClassifier;
import edu.stanford.nlp.ie.machinereading.structure.AnnotationUtils;
import edu.stanford.nlp.ling.*;
import edu.stanford.nlp.optimization.DiffFunction;
import edu.stanford.nlp.optimization.Minimizer;
import edu.stanford.nlp.optimization.SGDMinimizer;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.Counter;
import edu.stanford.nlp.util.Factory;
import java.util.*;
/**
* Created by keenon on 6/19/15.
*
* Down and dirty (and not entirely representative) benchmarks to quickly judge improvement as we optimize stuff
*/
public class Benchmarks {
/**
* 67% of time spent in LogConditionalObjectiveFunction.rvfcalculate()
* 29% of time spent in dataset construction (11% in RVFDataset.addFeatures(), 7% rvf incrementCount(), 11% rest)
*
* Single threaded, 4700 ms
* Multi threaded, 700 ms
*
* With same data, seed 42, 245 ms
* With reordered accesses for cacheing, 195 ms
* Down to 80% of the time, not huge but a win nonetheless
*
* with 8 cpus, a 6.7x speedup -- almost, but not quite linear, pretty good
*/
public static void benchmarkRVFLogisticRegression() {
RVFDataset<String, String> data = new RVFDataset<>();
for (int i = 0; i < 10000; i++) {
Random r = new Random(42);
Counter<String> features = new ClassicCounter<>();
boolean cl = r.nextBoolean();
for (int j = 0; j < 1000; j++) {
double value;
if (cl && i % 2 == 0) {
value = (r.nextDouble()*2.0)-0.6;
}
else {
value = (r.nextDouble()*2.0)-1.4;
}
features.incrementCount("f" + j, value);
}
data.add(new RVFDatum<>(features, "target:" + cl));
}
LinearClassifierFactory<String, String> factory = new LinearClassifierFactory<>();
long msStart = System.currentTimeMillis();
factory.trainClassifier(data);
long delay = System.currentTimeMillis() - msStart;
System.out.println("Training took "+delay+" ms");
}
/**
* 57% of time spent in LogConditionalObjectiveFunction.calculateCLBatch()
* 22% spent in constructing datums (expensive)
*
* Single threaded, 4100 ms
* Multi threaded, 600 ms
*
* With same data, seed 42, 52 ms
* With reordered accesses for cacheing, 38 ms
* Down to 73% of the time
*
* with 8 cpus, a 6.8x speedup -- basically the same as with RVFDatum
*/
public static void benchmarkLogisticRegression() {
Dataset<String, String> data = new Dataset<>();
for (int i = 0; i < 10000; i++) {
Random r = new Random(42);
Set<String> features = new HashSet<>();
boolean cl = r.nextBoolean();
for (int j = 0; j < 1000; j++) {
if (cl && i % 2 == 0) {
if (r.nextDouble() > 0.3) {
features.add("f:"+j+":true");
}
else {
features.add("f:"+j+":false");
}
}
else {
if (r.nextDouble() > 0.3) {
features.add("f:" + j + ":false");
}
else {
features.add("f:"+j+":false");
}
}
}
data.add(new BasicDatum<String, String>(features, "target:" + cl));
}
LinearClassifierFactory<String, String> factory = new LinearClassifierFactory<>();
long msStart = System.currentTimeMillis();
factory.trainClassifier(data);
long delay = System.currentTimeMillis() - msStart;
System.out.println("Training took "+delay+" ms");
}
/**
* 29% in FactorTable.getValue()
* 28% in CRFCliqueTree.getCalibratedCliqueTree()
* 12.6% waiting for threads
*
* Single threaded: 15000 ms - 26000 ms
* Multi threaded: 4500 ms - 7000 ms
*
* with 8 cpus, 3.3x - 3.7x speedup, around 800% utilization
*/
public static void benchmarkCRF() {
Properties props = new Properties();
props.setProperty("macro", "true"); // use a generic CRF configuration
props.setProperty("useIfInteger", "true");
props.setProperty("featureFactory", "edu.stanford.nlp.benchmarks.BenchmarkFeatureFactory");
props.setProperty("saveFeatureIndexToDisk", "false");
CRFClassifier<CoreLabel> crf = new CRFClassifier<CoreLabel>(props);
Random r = new Random(42);
List<List<CoreLabel>> data = new ArrayList<>();
for (int i = 0; i < 100; i++) {
List<CoreLabel> sentence = new ArrayList<>();
for (int j = 0; j < 20; j++) {
CoreLabel l = new CoreLabel();
l.setWord("j:"+j);
boolean tag = j % 2 == 0 ^ (r.nextDouble() > 0.7);
l.set(CoreAnnotations.AnswerAnnotation.class, "target:"+tag);
sentence.add(l);
}
data.add(sentence);
}
long msStart = System.currentTimeMillis();
crf.train(data);
long delay = System.currentTimeMillis() - msStart;
System.out.println("Training took "+delay+" ms");
}
public static void benchmarkSGD() {
Dataset<String, String> data = new Dataset<>();
for (int i = 0; i < 10000; i++) {
Random r = new Random(42);
Set<String> features = new HashSet<>();
boolean cl = r.nextBoolean();
for (int j = 0; j < 1000; j++) {
if (cl && i % 2 == 0) {
if (r.nextDouble() > 0.3) {
features.add("f:"+j+":true");
}
else {
features.add("f:"+j+":false");
}
}
else {
if (r.nextDouble() > 0.3) {
features.add("f:" + j + ":false");
}
else {
features.add("f:"+j+":false");
}
}
}
data.add(new BasicDatum<String, String>(features, "target:" + cl));
}
LinearClassifierFactory<String, String> factory = new LinearClassifierFactory<>();
factory.setMinimizerCreator(new Factory<Minimizer<DiffFunction>>() {
@Override
public Minimizer<DiffFunction> create() {
return new SGDMinimizer<DiffFunction>(0.1, 100, 0, 1000);
}
});
long msStart = System.currentTimeMillis();
factory.trainClassifier(data);
long delay = System.currentTimeMillis() - msStart;
System.out.println("Training took "+delay+" ms");
}
public static void benchmarkDatum() {
long msStart = System.currentTimeMillis();
Dataset<String, String> data = new Dataset<>();
for (int i = 0; i < 10000; i++) {
Random r = new Random(42);
Set<String> features = new HashSet<>();
boolean cl = r.nextBoolean();
for (int j = 0; j < 1000; j++) {
if (cl && i % 2 == 0) {
if (r.nextDouble() > 0.3) {
features.add("f:"+j+":true");
}
else {
features.add("f:"+j+":false");
}
}
else {
if (r.nextDouble() > 0.3) {
features.add("f:" + j + ":false");
}
else {
features.add("f:"+j+":false");
}
}
}
data.add(new BasicDatum<String, String>(features, "target:" + cl));
}
long delay = System.currentTimeMillis() - msStart;
System.out.println("Dataset construction took "+delay+" ms");
msStart = System.currentTimeMillis();
for (int i = 0; i < 10000; i++) {
Random r = new Random(42);
Set<String> features = new HashSet<>();
boolean cl = r.nextBoolean();
for (int j = 0; j < 1000; j++) {
if (cl && i % 2 == 0) {
if (r.nextDouble() > 0.3) {
}
else {
}
}
else {
if (r.nextDouble() > 0.3) {
}
else {
}
}
}
}
delay = System.currentTimeMillis() - msStart;
System.out.println("MultiVector took "+delay+" ms");
}
/**
* on my machine this results in a factor of two gain, roughly
*/
public static void testAdjacency() {
double[][] sqar = new double[10000][1000];
Random r = new Random();
int k = 0;
long msStart = System.currentTimeMillis();
for (int i = 0; i < 10000; i++) {
int loc = r.nextInt(10000);
for (int j = 0; j < 1000; j++) {
k+= sqar[loc][j];
}
}
long delay = System.currentTimeMillis() - msStart;
System.out.println("Scanning with cache friendly lookups took "+delay+" ms");
int[] randLocs = new int[10000];
for (int i = 0; i < 10000; i++) {
randLocs[i] = r.nextInt(10000);
}
k = 0;
msStart = System.currentTimeMillis();
for (int j = 0; j < 1000; j++) {
for (int i = 0; i < 10000; i++) {
k+= sqar[randLocs[i]][j];
}
}
delay = System.currentTimeMillis() - msStart;
System.out.println("Scanning with cache UNfriendly lookups took "+delay+" ms");
}
public static void main(String[] args) {
for (int i = 0; i < 100; i++) {
// benchmarkRVFLogisticRegression();
// benchmarkLogisticRegression();
benchmarkSGD();
// benchmarkCRF();
// testAdjacency();
}
}
}