/**
* $Id: StatisticsDataGenerator.java 4413 2009-10-07 16:25:08Z benbosman $
* $URL: https://scm.dspace.org/svn/repo/dspace/tags/dspace-1.6.2/dspace-stats/src/main/java/org/dspace/statistics/util/StatisticsDataGenerator.java $
* *************************************************************************
* Copyright (c) 2002-2009, DuraSpace. All rights reserved
* Licensed under the DuraSpace Foundation License.
*
* A copy of the DuraSpace License has been included in this
* distribution and is available at: http://scm.dspace.org/svn/repo/licenses/LICENSE.txt
*/
package org.dspace.statistics.util;
import org.apache.commons.cli.*;
import org.apache.commons.lang.time.DateFormatUtils;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
import org.dspace.core.Context;
import org.dspace.core.Constants;
import org.dspace.core.ConfigurationManager;
import org.dspace.content.DSpaceObject;
import org.dspace.content.Bitstream;
import org.dspace.content.DCValue;
import org.dspace.content.Item;
import org.dspace.eperson.EPerson;
import org.dspace.statistics.SolrLogger;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.Iterator;
import java.text.SimpleDateFormat;
import com.maxmind.geoip.LookupService;
import com.maxmind.geoip.Location;
/**
* Test class to generate random statistics data.
* Used for load testing of searches. Inputs are slow
* due to inefficient randomizer.
*
* @author kevinvandevelde at atmire.com
* @author ben at atmire.com
*/
public class StatisticsDataGenerator {
public static void main(String[] args) throws Exception {
CommandLineParser parser = new PosixParser();
Options options = new Options();
options.addOption("n", "nrlogs", true,
"type: nr of logs to be generated");
options.addOption("s", "startDate", true,
"type: the start date from which we start generating our logs");
options.addOption("e", "endDate", true,
"type: the end date from which we start generating our logs");
options.addOption("a", "cms", true, "The starting id of our community");
options.addOption("b", "cme", true, "The end id of our community");
options
.addOption("c", "cls", true,
"The starting id of our collection");
options.addOption("d", "cle", true, "The end if of our collection");
options.addOption("f", "is", true, "The starting id of our item");
options.addOption("g", "ie", true, "The end id of our item");
options.addOption("h", "bs", true, "The starting id of our bitstream");
options.addOption("i", "be", true, "The end id of our bitstream");
options.addOption("j", "ps", true, "The starting id of our epersons");
options.addOption("k", "pe", true, "The end id of our epersons");
CommandLine line = parser.parse(options, args);
int nrLogs;
long startDate;
long endDate;
long commStartId;
long commEndId;
long collStartId;
long collEndId;
long itemStartId;
long itemEndId;
long bitStartId;
long bitEndId;
long epersonStartId;
long epersonEndId;
if (line.hasOption("n"))
nrLogs = Integer.parseInt(line.getOptionValue("n"));
else {
System.out
.println("We need to know how many logs we need to create");
return;
}
if (line.hasOption("s")) {
startDate = getDateInMiliseconds(line.getOptionValue("s"));
} else
startDate = getDateInMiliseconds("01/01/2006");
if (line.hasOption("e")) {
endDate = getDateInMiliseconds(line.getOptionValue("e"));
} else
endDate = new Date().getTime();
if (line.hasOption("a"))
commStartId = Long.parseLong(line.getOptionValue("a"));
else
return;
if (line.hasOption("b"))
commEndId = Long.parseLong(line.getOptionValue("b"));
else
return;
if (line.hasOption("c"))
collStartId = Long.parseLong(line.getOptionValue("c"));
else
return;
if (line.hasOption("d"))
collEndId = Long.parseLong(line.getOptionValue("d"));
else
return;
if (line.hasOption("f"))
itemStartId = Long.parseLong(line.getOptionValue("f"));
else
return;
if (line.hasOption("g"))
itemEndId = Long.parseLong(line.getOptionValue("g"));
else
return;
if (line.hasOption("h"))
bitStartId = Long.parseLong(line.getOptionValue("h"));
else
return;
if (line.hasOption("i"))
bitEndId = Long.parseLong(line.getOptionValue("i"));
else
return;
if (line.hasOption("j"))
epersonStartId = Long.parseLong(line.getOptionValue("j"));
else
return;
if (line.hasOption("k"))
epersonEndId = Long.parseLong(line.getOptionValue("k"));
else
return;
// Get the max id range
long maxIdTotal = Math.max(commEndId, collEndId);
maxIdTotal = Math.max(maxIdTotal, itemEndId);
maxIdTotal = Math.max(maxIdTotal, bitEndId);
// We got 3/4 chance the person visting the dso is not logged in
epersonEndId *= 4;
// We got all our parameters now get the rest
Context context = new Context();
// Find our solrserver
CommonsHttpSolrServer solr = new CommonsHttpSolrServer(
ConfigurationManager.getProperty("solr.log.server"));
solr.deleteByQuery("*:*");
solr.commit();
Map metadataStorageInfo = SolrLogger.getMetadataStorageInfo();
String prevIp = null;
String dbfile = ConfigurationManager.getProperty("solr.dbfile");
LookupService cl = new LookupService(dbfile,
LookupService.GEOIP_STANDARD);
int countryErrors = 0;
for (int i = 0; i < nrLogs; i++) {
String ip = "";
Date time;
String continent;
String country = "";
String countryCode;
float longitude;
float latitude;
String city;
// 1. Generate an ip for our user
for (int j = 0; j < 4; j++) {
ip += getRandomNumberInRange(0, 254);
if (j != 3)
ip += ".";
}
// 2 Depending on our ip get all the location info
Location location;
try {
location = cl.getLocation(ip);
} catch (Exception e) {
location = null;
}
if (location == null) {
// If we haven't got a prev ip this is pretty useless so move on
// to the next one
if (prevIp == null)
continue;
ip = prevIp;
location = cl.getLocation(ip);
}
city = location.city;
country = location.countryName;
countryCode = location.countryCode;
longitude = location.longitude;
latitude = location.latitude;
try {
continent = LocationUtils.getContinentCode(countryCode);
} catch (Exception e) {
// We could get an error if our country == Europa this doesn't
// matter for generating statistics so ignore it
System.out.println("COUNTRY ERROR: " + countryCode);
countryErrors++;
continue;
}
// 3. Generate a date that the object was visited
time = new Date(getRandomNumberInRange(startDate, endDate));
// 4. Get our dspaceobject we are supposed to be working on
// We got mostly item views so lets say we got 1/2 chance that we
// got an item view
// What type have we got (PS: I know we haven't got 5 as a dso type
// we can log but it is used so our item gets move traffic)
int type = (int) getRandomNumberInRange(0, 8);
if (type == Constants.BUNDLE || type >= 5)
type = Constants.ITEM;
int dsoId = -1;
// Now we need to find a valid id
switch (type) {
case Constants.COMMUNITY:
dsoId = (int) getRandomNumberInRange(commStartId, commEndId);
break;
case Constants.COLLECTION:
dsoId = (int) getRandomNumberInRange(collStartId, collEndId);
break;
case Constants.ITEM:
dsoId = (int) getRandomNumberInRange(itemStartId, itemEndId);
break;
case Constants.BITSTREAM:
dsoId = (int) getRandomNumberInRange(bitStartId, bitEndId);
break;
}
// Now find our dso
DSpaceObject dso = DSpaceObject.find(context, type, dsoId);
if (dso instanceof Bitstream) {
Bitstream bit = (Bitstream) dso;
if (bit.getFormat().isInternal()) {
dso = null;
}
}
// Make sure we got a dso
boolean substract = false;
while (dso == null) {
// If our dsoId gets higher then our maxIdtotal we need to lower
// to find a valid id
if (dsoId == maxIdTotal)
substract = true;
if (substract)
dsoId--;
else
dsoId++;
dso = DSpaceObject.find(context, type, dsoId);
if (dso instanceof Bitstream) {
Bitstream bit = (Bitstream) dso;
if (bit.getFormat().isInternal()) {
dso = null;
}
}
// System.out.println("REFIND");
}
// Find the person who is visting us
int epersonId = (int) getRandomNumberInRange(epersonStartId,
epersonEndId);
EPerson eperson = EPerson.find(context, epersonId);
if (eperson == null)
epersonId = -1;
// System.out.println(ip);
// System.out.println(country + " " +
// LocationUtils.getCountryName(countryCode));
// Resolve the dns
String dns = null;
try {
dns = DnsLookup.reverseDns(ip);
} catch (Exception e) {
}
System.out.println(ip);
System.out.println(dns);
// Save it in our server
SolrInputDocument doc1 = new SolrInputDocument();
doc1.addField("ip", ip);
doc1.addField("type", dso.getType());
doc1.addField("id", dso.getID());
doc1.addField("time", DateFormatUtils.format(time,
SolrLogger.DATE_FORMAT_8601));
doc1.addField("continent", continent);
// doc1.addField("country", country);
doc1.addField("countryCode", countryCode);
doc1.addField("city", city);
doc1.addField("latitude", latitude);
doc1.addField("longitude", longitude);
if (epersonId > 0)
doc1.addField("epersonid", epersonId);
if (dns != null)
doc1.addField("dns", dns.toLowerCase());
if (dso instanceof Item) {
Item item = (Item) dso;
// Store the metadata
for (Object storedField : metadataStorageInfo.keySet()) {
String dcField = (String) metadataStorageInfo
.get(storedField);
DCValue[] vals = item.getMetadata(dcField.split("\\.")[0],
dcField.split("\\.")[1], dcField.split("\\.")[2],
Item.ANY);
for (DCValue val1 : vals) {
String val = val1.value;
doc1.addField(String.valueOf(storedField), val);
doc1.addField(String.valueOf(storedField + "_search"),
val.toLowerCase());
}
}
}
SolrLogger.storeParents(doc1, dso);
solr.add(doc1);
// Make sure we have a previous ip
prevIp = ip;
}
System.out.println("Nr of countryErrors: " + countryErrors);
// Commit at the end cause it takes a while
solr.commit();
}
/**
* Method returns a random integer between the given int
*
* @param min
* the random number must be greater or equal to this
* @param max
* the random number must be smaller or equal to this
* @return a random in
*/
private static long getRandomNumberInRange(long min, long max) {
return min + (long) (Math.random() * ((max - min) + 1));
}
/**
* Method to get the miliseconds from a datestring
*
* @param dateString
* the string containing our date in a string
* @return the nr of miliseconds in the given datestring
* @throws java.text.ParseException
* should not happen
*/
private static long getDateInMiliseconds(String dateString)
throws java.text.ParseException {
SimpleDateFormat formatter = new SimpleDateFormat("dd/MM/yyyy");
return formatter.parse(dateString).getTime();
}
}