package org.commoncrawl.service.statscollector;
import java.io.File;
import java.io.IOException;
import java.util.TreeMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.commoncrawl.crawl.common.internal.CrawlEnvironment;
import org.commoncrawl.rpc.base.internal.AsyncClientChannel;
import org.commoncrawl.rpc.base.internal.AsyncContext;
import org.commoncrawl.rpc.base.internal.AsyncServerChannel;
import org.commoncrawl.rpc.base.internal.NullMessage;
import org.commoncrawl.rpc.base.internal.AsyncRequest.Status;
import org.commoncrawl.rpc.base.shared.RPCException;
import org.commoncrawl.server.CommonCrawlServer;
import org.commoncrawl.util.CCStringUtils;
public class CrawlStatsCollectorService
extends CommonCrawlServer
implements CrawlerStatsService,
AsyncServerChannel.ConnectionCallback {
private FileSystem _fileSystem = null;
private File _localDataDir = null;
private static final Log LOG = LogFactory.getLog(CrawlStatsCollectorService.class);
private StatsLogManager _logManager;
public static TreeMap<String,StatsCollection> _statsCollectionMap = new TreeMap<String,StatsCollection>();
private static CrawlStatsCollectorService _singleton;
public static CrawlStatsCollectorService getSingleton() { return _singleton; }
public FileSystem getFileSystem() {
return _fileSystem;
}
private final File getLocalRoot() {
return _localDataDir;
}
@Override
protected String getDefaultHttpInterface() {
return CrawlEnvironment.DEFAULT_HTTP_INTERFACE;
}
@Override
protected int getDefaultHttpPort() {
return CrawlEnvironment.CRAWLSTATSCOLLECTOR_SERVICE_HTTP_PORT;
}
@Override
protected String getDefaultLogFileName() {
return "crawlstats_service.log";
}
@Override
protected String getDefaultRPCInterface() {
return CrawlEnvironment.DEFAULT_RPC_INTERFACE;
}
@Override
protected int getDefaultRPCPort() {
return CrawlEnvironment.CRAWLSTATSCOLLECTOR_SERVICE_RPC_PORT;
}
@Override
protected String getWebAppName() {
return CrawlEnvironment.CRAWLSTATSCOLLECTOR_SERVICE_WEBAPP_NAME;
}
@Override
protected boolean initServer() {
_singleton = this;
try {
_fileSystem = CrawlEnvironment.getDefaultFileSystem();
File workingDirectory = new File(getDataDirectory(),"stats_server_data");
workingDirectory.mkdirs();
// initialize log manager
_logManager = new StatsLogManager(getEventLoop(), workingDirectory);
// load collections
loadCollections(workingDirectory);
// create server channel ...
AsyncServerChannel channel = new AsyncServerChannel(this, this.getEventLoop(),this.getServerAddress(),this);
// register RPC services it supports ...
registerService(channel,CrawlerStatsService.spec);
getWebServer().addServlet("tq", "/tq", CrawlerStatsQuery.class);
return true;
}
catch (IOException e) {
LOG.error(CCStringUtils.stringifyException(e));
}
return false;
}
private void loadCollections(File workingDirectory) throws IOException {
//TODO: THIS ONE BIG HACK :-(
File files[] = workingDirectory.listFiles();
for (File file : files) {
LOG.info("Loader Found File:" + file.getName());
if (file.getName().endsWith(".events")){
int indexOfFirstDash = file.getName().indexOf('-');
String groupKey = file.getName().substring(0,indexOfFirstDash);
String uniqueKey = file.getName().substring(indexOfFirstDash + 1,file.getName().length() - ".events".length());
if (groupKey.equalsIgnoreCase(CrawlerStatsCollection.GROUP_KEY)) {
LOG.info("Found CrawlStatsCollection Prefix:" + groupKey + " UniqueKey:" + uniqueKey);
_statsCollectionMap.put(StatsLogManager.makeCollectionName(groupKey, uniqueKey),new CrawlerStatsCollection(_logManager,uniqueKey));
}
}
}
}
@Override
protected boolean parseArguements(String[] argv) {
return true;
}
@Override
protected void overrideConfig(Configuration conf) {
}
@Override
protected void printUsage() {
}
@Override
protected boolean startDaemons() {
return true;
}
@Override
protected void stopDaemons() {
}
@Override
protected String getDefaultDataDir() {
// TODO Auto-generated method stub
return null;
}
@Override
public void IncomingClientConnected(AsyncClientChannel channel) {
}
@Override
public void IncomingClientDisconnected(AsyncClientChannel channel) {
}
@Override
public void logCrawlerStats(AsyncContext<LogCrawlStatsRequest, NullMessage> rpcContext) throws RPCException {
LOG.info("Received Stats From Crawler:"+ rpcContext.getInput().getCrawlerName());
synchronized (_statsCollectionMap) {
try {
String collectionName = StatsLogManager.makeCollectionName(CrawlerStatsCollection.GROUP_KEY,rpcContext.getInput().getCrawlerName());
CrawlerStatsCollection statsCollection
= (CrawlerStatsCollection) _statsCollectionMap.get(collectionName);
if (statsCollection == null) {
statsCollection = new CrawlerStatsCollection(_logManager,rpcContext.getInput().getCrawlerName());
_statsCollectionMap.put(collectionName, statsCollection);
}
statsCollection.addValue(rpcContext.getInput().getCrawlerStats().getTimestamp(), rpcContext.getInput().getCrawlerStats());
}
catch (IOException e) {
LOG.error(CCStringUtils.stringifyException(e));
rpcContext.setStatus(Status.Error_RequestFailed);
rpcContext.setErrorDesc(CCStringUtils.stringifyException(e));
}
rpcContext.completeRequest();
}
}
}