package org.wikibrain.core.dao;
import com.jolbox.bonecp.BoneCPDataSource;
import org.apache.commons.io.FileUtils;
import org.jooq.TableField;
import org.wikibrain.core.dao.sql.FastLoader;
import org.wikibrain.core.dao.sql.WpDataSource;
import org.wikibrain.core.jooq.Tables;
import java.io.File;
import java.io.IOException;
import java.sql.ResultSet;
import java.sql.SQLException;
/**
* @author Shilad Sen
*
* Results on Shilad's Macbook Pro:
* csv load, no url options: 277K records per second
* csv load, JDB_URL_OPTS options setSchema 344K records per second
* csv load, JDB_URL_OPTS options no setSchema 190K records per second (removed setSchema as a result of awesome batch performance)
* non csv load, non-batch: 43K per second
* non csv load, batch: 363K per second
*
*/
public class BenchmarkFastLoader {
public static int NUM_ENTRIES = 10000000;
private static final String JDBC_URL_OPTS = ";LOG=0;CACHE_SIZE=65536;LOCK_MODE=0;UNDO_LOG=0";
private static final TableField [] INSERT_FIELDS = new TableField[] {
Tables.LOCAL_LINK.LANG_ID,
Tables.LOCAL_LINK.ANCHOR_TEXT,
Tables.LOCAL_LINK.SOURCE_ID,
Tables.LOCAL_LINK.DEST_ID,
Tables.LOCAL_LINK.LOCATION,
Tables.LOCAL_LINK.IS_PARSEABLE,
Tables.LOCAL_LINK.LOCATION_TYPE,
};
public static void main(String args[]) throws IOException, DaoException, SQLException {
File dbPath = new File("tmp/benchmark-loader-db");
if (dbPath.exists()) FileUtils.forceDelete(dbPath);
dbPath.mkdirs();
FileUtils.forceDeleteOnExit(dbPath);
BoneCPDataSource ds = new BoneCPDataSource();
System.out.println("Establishing new data source");
ds.setJdbcUrl("jdbc:h2:" + dbPath.getAbsolutePath() + JDBC_URL_OPTS);
ds.setUsername("sa");
ds.setPassword("");
ds.getConnection().createStatement().execute("DROP TABLE IF EXISTS local_link");
String schema = "CREATE TABLE local_link (\n" +
" lang_id SMALLINT NOT NULL,\n" +
" anchor_text TEXT NOT NULL,\n" +
" source_id INT NOT NULL,\n" +
" dest_id INT NOT NULL,\n" +
" location INT NOT NULL,\n" +
" is_parseable BOOLEAN NOT NULL,\n" +
" location_type SMALLINT NOT NULL\n" +
");\n";
ds.getConnection().createStatement().execute(schema);
FastLoader loader = new FastLoader(new WpDataSource(ds), INSERT_FIELDS);
long t1 = System.currentTimeMillis();
for (int i = 0; i < NUM_ENTRIES; i++) {
loader.load(new Object[] {
10,
"Foo bar baz",
324234,
3219,
313,
true,
99
});
}
long t2 = System.currentTimeMillis();
System.err.println("insert time was " + (t2-t1) / 1000.0 + " seconds");
System.err.println("inserted " + 1000.0 * NUM_ENTRIES / (t2-t1) + " entries per second");
loader.endLoad();
long t3 = System.currentTimeMillis();
System.err.println("load time was " + (t3-t2) / 1000.0 + " seconds");
System.err.println("loaded " + 1000.0 * NUM_ENTRIES / (t3-t2) + " entries per second");
ResultSet rs = ds.getConnection().createStatement()
.executeQuery("select count(*) from local_link");
rs.next();
System.err.println("inserted " + rs.getInt(1) + " records");
rs = ds.getConnection().createStatement()
.executeQuery("select * from local_link limit 100000");
while (rs.next()) {
assert(rs.getInt(1) == 10);
assert(rs.getString(2).equals("Foo bar baz"));
assert(rs.getInt(3) == 324234);
assert(rs.getInt(4) == 3219);
assert(rs.getInt(5) == 313);
assert(rs.getBoolean(6));
assert(rs.getInt(7) == 99);
}
}
}