package uk.bl.monitrix.util;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import uk.bl.monitrix.heritrix.LogFileEntry;
import uk.bl.monitrix.heritrix.SimpleLogfileReader;
/**
* The Heritrix Dummy 'simulates' a crawl in progress by replaying an existing Heritrix log ("source log")
* into a dummy output log ("dummy log"). Timestamps from the source log will be replaced by current wall
* clock timestamps.
* @author Rainer Simon <rainer.simon@ait.ac.at>
*/
public class HeritrixDummy {
// Heritrix Date format
private static DateFormat ISO_FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
// Path to the source log file to replay
private static final String PATH_TO_SOURCE_LOG = "/home/simonr/Downloads/crawl.log.20120914182409";
// Config: how many dummy crawler logs should be generated?
private static final int NUMBER_OF_CRAWLERS = 4;
// Dummy log path template
private static final String PATH_TO_DUMMY_LOG = "/home/simonr/dummy@@number@@.log";
// The approx. amount of time between two log events in the dummy log, in millis (we will add a bit of random jitter)
private static final int APPROX_LOG_INTERVAL_MS = 10;
public static void main(String[] args) throws IOException, InterruptedException {
List<BufferedWriter> writers = new ArrayList<BufferedWriter>();
for (int i=0; i<NUMBER_OF_CRAWLERS; i++) {
File log = new File(PATH_TO_DUMMY_LOG.replace("@@number@@", Integer.toString(i)));
writers.add(new BufferedWriter(new FileWriter(log, true)));
}
Iterator<LogFileEntry> sourceLog = new SimpleLogfileReader(PATH_TO_SOURCE_LOG).iterator();
Random rnd = new Random();
while (sourceLog.hasNext()) {
for (BufferedWriter writer : writers) {
if (sourceLog.hasNext()) {
String entry = sourceLog.next().toString();
// Rewrite timestamp
String rewritten = ISO_FORMAT.format(new Date()) + entry.substring(entry.indexOf(' '));
writer.append(rewritten + "\n");
writer.flush();
}
}
// Wait
int jitter = rnd.nextInt(2 * APPROX_LOG_INTERVAL_MS) - APPROX_LOG_INTERVAL_MS;
Thread.sleep(APPROX_LOG_INTERVAL_MS + Math.abs(jitter));
}
}
}