package org.curriki.tools.loganalyzer;
import org.apache.commons.io.input.Tailer;
import org.apache.commons.io.input.TailerListener;
import java.io.File;
import java.io.FileInputStream;
import java.util.List;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class LogCollector {
public LogCollector(String baseDir, String logFilePattern, boolean doResume, boolean debug, LogAnalysisCursor cursor) throws Exception {
this.logFilePattern = Pattern.compile(logFilePattern);
this.baseDir = new File(baseDir);
this.currentFile = findLatestFile();
tailer = createTailer(currentFile, doResume);
fileRotator.start();
queueProcessor.start();
this.debug = debug;
if(cursor==null)
cursor = new LogAnalysisCursor("","", true);
this.logAnalysisCursor = cursor;
}
private Tailer tailer;
private Pattern logFilePattern;
private File baseDir, currentFile;
private boolean running = true;
private boolean doResume;
private long lastMeasuredLatest = 0;
private long lastSeenLine = 0;
private boolean debug;
private static final long ONLY_CHANGE_EVERY_X_MS = 5000;
private static final int LARGEST_QUEUE_SIZE = 65536;
private Tailer createTailer(final File f, boolean resume) {
Tailer t = new Tailer(f, tailerListener, 20, !resume) {
@Override
public void run() {
if(debug) System.err.println("Starting to tail: " + f.getName());
try {
super.run();
} catch (Exception e) {
e.printStackTrace();
}
if(debug) System.err.println("Finished to tail: " + f.getName());
}
};
Thread thread = new Thread(t, "Tailer-" + f.getName());
thread.setDaemon(true);
thread.start();
return t;
}
private synchronized File findLatestFile() {
File[] files = baseDir.listFiles();
long lastModifMax = 0;
File latestOne = null;
for(File f: files) {
if(logFilePattern.matcher(f.getName()).matches()) {
long l = f.lastModified();
if(l>lastModifMax) {
latestOne = f; lastModifMax = l;
}
}
}
lastMeasuredLatest = System.currentTimeMillis();
if(debug) System.err.println("Pattern (" + logFilePattern + "): file: " + latestOne.getName());
return latestOne;
}
private Thread fileRotator = new Thread("File Rotator") {
@Override
public void run() {
long tick = System.currentTimeMillis();
FileInputStream readFile = null;
// implement the watchdog that looks to change the file once every 10s.
while(running) {
try {
long time = System.currentTimeMillis();
// run every 100 ms
if(time-tick+100>0)
Thread.sleep(time-tick+100);
tick = time;
// has there been input in the last second?
if(time-lastSeenLine<1000)
continue; // then no need to scan for a changed file
// has there been a file-change recently? then maybe wait a bit
if(System.currentTimeMillis()-lastMeasuredLatest< ONLY_CHANGE_EVERY_X_MS)
continue;
// detect changed file
File elected = findLatestFile();
if(elected.equals(currentFile)) continue;
// create tailer
tailer.stop();
tailer = createTailer(elected, true);
currentFile = elected;
// swallow existing content first
// TODO: lock here
if(elected!=null) {
readFile = new FileInputStream(elected);
List<String> firstLines = org.apache.commons.io.IOUtils.readLines(readFile);
for(String s : firstLines) {
queueLine(s);
}
}
} catch(Exception ex) {
ex.printStackTrace();
} finally {
if(readFile!=null) {
try {
readFile.close();
} catch(Exception ex) {
// ignore
}
}
}
}
try {
if(tailer!=null) tailer.stop();
if(queueProcessor!=null && queueProcessor.isAlive()) queueProcessor.stop();
if(fileRotator!=null && fileRotator.isAlive()) fileRotator.stop();
} catch(Throwable x) {
x.printStackTrace();
}
}
};
private TailerListener tailerListener = new TailerListener() {
public void init(Tailer tailer) {
}
public void fileNotFound() {
}
public void fileRotated() {
}
public void handle(String line) {
lastSeenLine = System.currentTimeMillis();
queueLine(line);
}
public void handle(Exception ex) {
}
};
// ================== QUEUE ===================================================
private ArrayBlockingQueue<String> queue = new ArrayBlockingQueue<String>(LARGEST_QUEUE_SIZE+1);
private void queueLine(String line) {
if(debug) System.err.println("Queuing line: " + line);
if(queue.size()>=LARGEST_QUEUE_SIZE) {
// Pull alarm trigger
new RuntimeException("Queue overflow.").printStackTrace();
System.exit(0);
}
queue.add(line);
}
private Thread queueProcessor = new Thread("Queue Processor") {
public void run() {
while(running) {
try {
String line = queue.poll();
if(line!=null) {
processLine(line);
}
synchronized(queue) {
if(queue.isEmpty()) queue.wait(1000);
}
} catch (InterruptedException e) {
// no big issue here
} catch (Exception e) {
e.printStackTrace();
}
}
}
};
// =================== LINE PROCESSING ============================================
/*
LogFormat "%h %l %u %t %D User:%{username}C \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" combined
(remote host, remote user, remote user, time the request was received, first line of request,
status, size of response, refererer, user agent)
Examples:
69.145.82.248 - - [03/Apr/2014:14:37:30 -0700] 686 User:- "GET /wp-content/uploads/2012/05/read-more-small.png HTTP/1.1" 302 226 "http://www.curriki.org/?gclid=CPbp_pKjxb0CFU1bfgod2xcAUA" "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; .NET4.0E)"
69.144.182.1 - - [03/Apr/2014:14:37:32 -0700] 222424 User:- "GET /xwiki/bin/view/Search/ HTTP/1.1" 200 6435 "http://www.curriki.org/welcome/resources-curricula/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.117 Safari/537.36"
93.222.192.249 - - [03/Apr/2014:14:40:29 -0700] 869976 User:\"ChslXwq0Ofm9IQXsF6SbRQ__\" "GET /xwiki/skins/curriki20/extjs/ext-all-debug.js HTTP/1.1" 200 212596 "http://www.curriki.org/xwiki/bin/view/Coll_adminPolx/clustercheck" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.152 Safari/537.36"
63.241.109.8 - demo [04/Apr/2014:10:36:24 -0700] 10500470 User:- "GET / HTTP/1.0" 200 50200 "-" "Wget/1.11.4"
93.222.235.200 - demo [04/Apr/2014:10:38:02 -0700] 190835 User:- "GET /xwiki/bin/view/Registration/MemberIFrame?xpage=plain HTTP/1.1" 200 633 "http://beta.curriki.org/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.152 Safari/537.36"
180.76.5.169 - - [04/Apr/2014:10:48:46 -0700] 1139972 User:- "GET /xwiki/gen/js/72b9/TBD HTTP/1.1" 404 6303 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"
93.222.235.200 - demo [04/Apr/2014:11:11:28 -0700] 25620 User:- "GET /curricki/js//layerslider/skins/lightskin/skin.css HTTP/1.1" 404 207 "http://beta.curriki.org/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.152 Safari/537.36"
*/
Pattern pattern = Pattern.compile("([0-9\\.]+) (-|demo) (-|demo) \\[([^\\]]+)\\] ([0-9]+) User:([^ ]+) \"([A-Z]+) ([^ ]+) ([^ ]*)\" ([0-9]*) ([0-9-]*) \"([^\"]*)\" \"([^\"]*)\" (.*)");
final LogAnalysisCursor logAnalysisCursor;
public void processLine(String line) {
Matcher m = pattern.matcher(line);
if(!m.matches()) {
System.err.println("Non-matching-line:");
System.err.println(line);
return;
}
if(debug) {
System.err.println("IP: " + m.group(1));
System.err.println("Time: " + m.group(4));
System.err.println("Duration: " + m.group(5));
System.err.println("User: " + m.group(6));
System.err.println("Method: " + m.group(7));
System.err.println("Path: " + m.group(8));
System.err.println("Protocol: " + m.group(9));
System.err.println("Status: " + m.group(10));
System.err.println("Size: " + m.group(11));
System.err.println("Referer: " + m.group(12));
System.err.println("User Agent: " + m.group(13));
System.err.println("Route: " + m.group(14));
}
logAnalysisCursor.handleLine(m.group(1), m.group(4), m.group(5), m.group(6), m.group(7), m.group(8), m.group(9), m.group(10), m.group(11), m.group(12), m.group(13), m.group(14) );
}
}