/**
* Copyright 2008 - CommonCrawl Foundation
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
**/
package org.commoncrawl.service.dns;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.net.URL;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.PriorityQueue;
import java.util.Vector;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.log4j.DailyRollingFileAppender;
import org.apache.log4j.Layout;
import org.apache.log4j.spi.LoggingEvent;
import org.commoncrawl.async.Timer;
import org.commoncrawl.crawl.common.internal.CrawlEnvironment;
import org.commoncrawl.io.NIODNSAsyncResolver;
import org.commoncrawl.io.NIODNSQueryLogger;
import org.commoncrawl.io.NIODNSQueryResult;
import org.commoncrawl.io.NIODNSCache;
import org.commoncrawl.io.NIODNSQueryClient;
import org.commoncrawl.io.NIODNSResolver;
import org.commoncrawl.io.NIODNSCache.Node;
import org.commoncrawl.rpc.base.internal.AsyncClientChannel;
import org.commoncrawl.rpc.base.internal.AsyncContext;
import org.commoncrawl.rpc.base.internal.AsyncRequest;
import org.commoncrawl.rpc.base.internal.AsyncServerChannel;
import org.commoncrawl.rpc.base.internal.NullMessage;
import org.commoncrawl.rpc.base.shared.RPCException;
import org.commoncrawl.server.CommonCrawlServer;
import org.commoncrawl.service.crawler.filters.FilterResults;
import org.commoncrawl.service.crawler.filters.Filter.FilterResult;
import org.commoncrawl.service.directory.DirectoryServiceCallback;
import org.commoncrawl.service.directory.DirectoryServiceItemList;
import org.commoncrawl.service.directory.DirectoryServiceRegistrationInfo;
import org.commoncrawl.service.directory.DirectoryServiceServer;
import org.commoncrawl.service.directory.DirectoryServiceSubscriptionInfo;
import org.commoncrawl.util.CCStringUtils;
import org.commoncrawl.util.CustomLogger;
import org.commoncrawl.util.IPAddressUtils;
import org.commoncrawl.util.IntrusiveList;
import org.commoncrawl.util.JVMStats;
import org.commoncrawl.util.URLUtils;
import com.google.common.collect.Lists;
/**
*
* @author rana
*
*/
public class DNSServiceServer
extends CommonCrawlServer
implements DNSService,
DirectoryServiceCallback,
AsyncClientChannel.ConnectionCallback,
AsyncServerChannel.ConnectionCallback,
NIODNSQueryLogger {
private static class CustomLoggerLayout extends Layout {
StringBuffer sbuf = new StringBuffer(1024);
@Override
public String format(LoggingEvent event) {
sbuf.setLength(0);
sbuf.append(event.getRenderedMessage());
sbuf.append(LINE_SEP);
return sbuf.toString();
}
@Override
public boolean ignoresThrowable() {
return true;
}
public void activateOptions() {
}
}
static final int DEFAULT_DNS_TIMEOUT = 30000;
static final int CACHE_CHECKPOINT_INTERVAL = 60 * 60 * 1000; // checkpoint the caches every 60 minutes or so
static final int FULL_STATS_DUMP_INTERVAL = 60 * 60 * 1000;
/** the max age for a checkpointed terminal node **/
static final int DEFAULT_MAX_CHECKPOINT_ITEM_AGE = 30 * 60 * 1000; // 30 minutes
static final int DEFAULT_RESOLVER_QUEUE_SIZE = 20;
static final String GOOD_NAMES_CACHE_CHECKPOINT_FILE = "dnsServiceGoodNamesCheckpoint.log";
static final String BAD_NAMES_CACHE_CHECKPOINT_FILE = "dnsServiceBadNamesCheckpoint.log";
CustomLogger _DNSSuccessLog;
CustomLogger _DNSFailureLog;
CustomLogger _DNSFailureDetailLog;
String _serversFile;
//IntrusiveList<NIODNSResolver> _resolvers = new IntrusiveList<NIODNSResolver>();
PriorityQueue<NIODNSResolver> _resolverQueue;
//NIODNSLocalResolver _nextResolver = null;
Timer _statsTimer = null;
Timer _checkpointTimer = null;
long _directCacheHits=0;
InetAddress _directoryServiceAddress;
DNSNoCacheFilter _noCacheFilter;
DNSRewriteFilter _rewriteFilter;
long _directoryServiceCallbackCookie = 0;
long _lastFullStatsDumpTime = -1;
int _resolverQueueSize = DEFAULT_RESOLVER_QUEUE_SIZE;
AsyncClientChannel _directoryServiceChannel;
DirectoryServiceServer.AsyncStub _directoryServiceStub;
@Override
protected String getDefaultHttpInterface() {
return CrawlEnvironment.DEFAULT_HTTP_INTERFACE;
}
@Override
protected int getDefaultHttpPort() {
return CrawlEnvironment.DNS_SERVICE_HTTP_PORT;
}
@Override
protected String getDefaultLogFileName() {
return "dnsservice.log";
}
@Override
protected String getDefaultRPCInterface() {
return CrawlEnvironment.DEFAULT_RPC_INTERFACE;
}
@Override
protected int getDefaultRPCPort() {
return CrawlEnvironment.DNS_SERVICE_RPC_PORT;
}
@Override
protected String getWebAppName() {
return CrawlEnvironment.DNS_SERVICE_WEBAPP_NAME;
}
//@Override
protected String getDefaultDataDir() {
return CrawlEnvironment.DEFAULT_DATA_DIR;
}
private File getGoodNamesCheckpointFileName() {
return new File(getLogDirectory(),GOOD_NAMES_CACHE_CHECKPOINT_FILE);
}
private File getBadNamesCheckpointFileName() {
return new File(getLogDirectory(),BAD_NAMES_CACHE_CHECKPOINT_FILE);
}
@Override
protected boolean initServer() {
try {
_DNSSuccessLog = new CustomLogger("DNSSuccessLog");
_DNSFailureLog = new CustomLogger("DNSFailureLog");
_DNSFailureDetailLog = new CustomLogger("DNSFailureDetailLog");
_DNSSuccessLog.addAppender(new DailyRollingFileAppender(new CustomLoggerLayout(),getLogDirectory() + "/dnsServiceDNSSuccess.log","yyyy-MM-dd"));
_DNSFailureLog.addAppender(new DailyRollingFileAppender(new CustomLoggerLayout(),getLogDirectory() + "/dnsServiceDNSFailures.log","yyyy-MM-dd"));
_DNSFailureDetailLog.addAppender(new DailyRollingFileAppender(new CustomLoggerLayout(),getLogDirectory() + "/dnsServiceDNSFailuresDetail.log","yyyy-MM-dd"));
NIODNSResolver.setLogger(this);
if (_serversFile == null) {
LOG.fatal("Servers file (--servers) is NULL!");
return false;
}
LOG.info("Loading Servers File");
// parse server file
parseServersFile();
// create server channel ...
AsyncServerChannel channel = new AsyncServerChannel(this, this.getEventLoop(),this.getServerAddress(),this);
// register RPC services it supports ...
registerService(channel,DNSService.spec);
registerService(channel,DirectoryServiceCallback.spec);
channel.open();
// load filters ...
reloadDNSFilters();
// now register with the directory service ...
_directoryServiceChannel = new AsyncClientChannel(_eventLoop,new InetSocketAddress(0),new InetSocketAddress(_directoryServiceAddress,CrawlEnvironment.DIRECTORY_SERVICE_RPC_PORT),this);
_directoryServiceChannel.open();
_directoryServiceStub = new DirectoryServiceServer.AsyncStub(_directoryServiceChannel);
// load cache
LOG.info("Loading Good Item Cache");
preloadGoodHostDNSCache();
LOG.info("Loading Bad Item Cache");
preloadBadHostDNSCache();
_statsTimer = new Timer(1000,true, new Timer.Callback() {
@Override
public void timerFired(Timer timer) {
publishStats();
}
});
// setup cache checkpoint timer ...
_checkpointTimer = new Timer(CACHE_CHECKPOINT_INTERVAL,true,new Timer.Callback() {
@Override
public void timerFired(Timer timer) {
LOG.info("Cache Checkpoint Timer Fired");
checkpointDNSCache(NIODNSResolver.getDNSCache(),getGoodNamesCheckpointFileName(),
new NIODNSCache.LoadFilter() {
@Override
public boolean loadItem(String hostName, String ipAddress,String name, long expireTime,long lastTouchedTime) {
FilterResults filterResults = new FilterResults();
if (expireTime >= System.currentTimeMillis()) {
if (lastTouchedTime == -1 || (System.currentTimeMillis() - lastTouchedTime) < DEFAULT_MAX_CHECKPOINT_ITEM_AGE ) {
String rootDomainName = URLUtils.extractRootDomainName(hostName);
if (rootDomainName != null) {
return (_noCacheFilter == null || _noCacheFilter.filterItem(rootDomainName,hostName, null, null,filterResults) == FilterResult.Filter_NoAction);
}
}
}
return false;
}
@Override
public String validateName(String hostName) {
FilterResults filterResults = new FilterResults();
String rootDomain = URLUtils.extractRootDomainName(hostName);
if (rootDomain != null) {
// rewrite if necessary
if (_rewriteFilter != null && _rewriteFilter.filterItem(rootDomain,hostName, null, null, filterResults) == FilterResult.Filter_Modified) {
// LOG.info("Rewrote:" + hostName + " to:" + filterResults.getRewrittenDomainName());
return filterResults.getRewrittenDomainName();
}
}
return hostName;
}
});
checkpointDNSCache(NIODNSResolver.getBadHostCache(),getBadNamesCheckpointFileName(),null);
}
});
_eventLoop.setTimer(_statsTimer);
_eventLoop.setTimer(_checkpointTimer);
return true;
}
catch (IOException e) {
LOG.error(CCStringUtils.stringifyException(e));
}
return false;
}
@Override
protected boolean parseArguements(String[] argv) {
for(int i=0; i < argv.length;++i) {
if (argv[i].equalsIgnoreCase("--servers")) {
if (i+1 < argv.length) {
_serversFile = argv[++i];
}
}
else if (argv[i].equalsIgnoreCase("--directoryserver")) {
if (i+1 < argv.length) {
try {
_directoryServiceAddress = InetAddress.getByName(argv[++i]);
} catch (UnknownHostException e) {
LOG.error(CCStringUtils.stringifyException(e));
}
}
}
else if (argv[i].equalsIgnoreCase("--queueSize")) {
_resolverQueueSize = Integer.parseInt(argv[++i]);
}
}
return (_serversFile != null && _directoryServiceAddress != null);
}
@Override
protected void printUsage() {
// TODO Auto-generated method stub
}
@Override
protected boolean startDaemons() {
// TODO Auto-generated method stub
return true;
}
@Override
protected void stopDaemons() {
// TODO Auto-generated method stub
}
private static abstract class NIODNSQueryClientWithHopCount implements NIODNSQueryClient {
protected int _hopCount = 0;
protected String _hostName;
protected boolean _skipCache = false;
}
AtomicInteger _activeAsyncRequests = new AtomicInteger();
@Override
public void doQuery(final AsyncContext<DNSQueryInfo, DNSQueryResponse> rpcContext) throws RPCException {
_activeAsyncRequests.incrementAndGet();
NIODNSQueryClientWithHopCount queryClient = new NIODNSQueryClientWithHopCount() {
@Override
public void AddressResolutionFailure(NIODNSResolver eventSource,String hostName, Status status,String errorDesc) {
if (eventSource != null) {
rpcContext.getOutput().setSourceServer(eventSource.toString());
}
else {
rpcContext.getOutput().setSourceServer("CACHE");
}
if (status == Status.RESOLVER_FAILURE)
rpcContext.getOutput().setStatus(DNSQueryResponse.Status.RESOLVER_FAILURE);
else if (status == Status.SERVER_FAILURE) {
// if first hop on server fail .. try a different resolver ...
if (this._hopCount++ == 0 && this._hostName != null) {
LOG.info("Rescheduling Resolution of Host on first hop with SERVFAIL result:" + rpcContext.getInput().getHostName());
DNSServiceSession session = getSessionForClient(rpcContext.getClientChannel());
if (session != null) {
try {
session.queuedWorkItems.add(getNextResolver().resolve(this,_hostName,_skipCache,rpcContext.getInput().getIsHighPriorityRequest(),DEFAULT_DNS_TIMEOUT));
} catch (IOException e) {
LOG.error(CCStringUtils.stringifyException(e));
rpcContext.setErrorDesc(CCStringUtils.stringifyException(e));
rpcContext.setStatus(AsyncRequest.Status.Error_RequestFailed);
try {
_activeAsyncRequests.decrementAndGet();
rpcContext.completeRequest();
} catch (RPCException e1) {
LOG.error(CCStringUtils.stringifyException(e1));
}
}
// first SERVER_FAILURE triggers a second try
return;
}
else {
LOG.error("Session Object NULL when Servicing AddressResolutionFailure for Host:" + hostName);
}
}
else {
rpcContext.getOutput().setStatus(DNSQueryResponse.Status.SERVER_FAILURE);
}
}
rpcContext.getOutput().setErrorDesc(errorDesc);
rpcContext.setStatus(org.commoncrawl.rpc.base.internal.AsyncRequest.Status.Success);
try {
_activeAsyncRequests.decrementAndGet();
rpcContext.completeRequest();
} catch (RPCException e) {
LOG.error(CCStringUtils.stringifyException(e));
}
}
@Override
public void AddressResolutionSuccess(NIODNSResolver eventSource,String hostName, String cName, InetAddress address, long addressTTL) {
if (eventSource != null) {
rpcContext.getOutput().setSourceServer(eventSource.toString());
}
else {
rpcContext.getOutput().setSourceServer("CACHE");
}
rpcContext.getOutput().setStatus(DNSQueryResponse.Status.SUCCESS);
rpcContext.getOutput().setAddress(IPAddressUtils.IPV4AddressToInteger(address.getAddress()));
rpcContext.getOutput().setTtl(addressTTL);
if (cName != null) {
rpcContext.getOutput().setCname(cName);
}
rpcContext.setStatus(org.commoncrawl.rpc.base.internal.AsyncRequest.Status.Success);
try {
_activeAsyncRequests.decrementAndGet();
rpcContext.completeRequest();
} catch (RPCException e) {
LOG.error(CCStringUtils.stringifyException(e));
}
}
@Override
public void DNSResultsAvailable() {
}
@Override
public void done(NIODNSResolver eventSource,Future<NIODNSQueryResult> future) {
if (eventSource != null) {
rpcContext.getOutput().setSourceServer(eventSource.toString());
}
else {
rpcContext.getOutput().setSourceServer("CACHE");
}
DNSServiceSession session = getSessionForClient(rpcContext.getClientChannel());
if (session != null) {
session.queuedWorkItems.remove(future);
}
}
};
try {
String dnsName = rpcContext.getInput().getHostName();
FilterResults filterResults = new FilterResults();
String rootName = URLUtils.extractRootDomainName(dnsName);
if (rootName != null) {
// rewrite if necessary
if (_rewriteFilter != null && _rewriteFilter.filterItem(rootName,dnsName, null, null, filterResults) == FilterResult.Filter_Modified) {
// LOG.info("Rewrote:" + dnsName + " to:" + filterResults.getRewrittenDomainName());
dnsName = filterResults.getRewrittenDomainName();
}
}
boolean skipCache = (_noCacheFilter != null && rootName != null && _noCacheFilter.filterItem(rootName,dnsName, null, null,filterResults) == FilterResult.Filter_Accept);
if (skipCache) {
LOG.info("Skiiping Cache check for name:" + dnsName);
}
final NIODNSQueryResult cachedResult = (!skipCache) ? NIODNSResolver.checkCache(queryClient, dnsName) : null;
if (cachedResult != null) {
_eventLoop.setTimer(new Timer(0,false,new Timer.Callback() {
@Override
public void timerFired(Timer timer) {
// LOG.info("Query for Host:" + rpcContext.getInput().getHostName() + " Processed via cache");
_directCacheHits++;
cachedResult.fireCallback();
}
}));
}
else {
DNSServiceSession session = getSessionForClient(rpcContext.getClientChannel());
queryClient._hostName = dnsName;
queryClient._skipCache = skipCache;
session.queuedWorkItems.add(getNextResolver().resolve(queryClient,dnsName,skipCache,rpcContext.getInput().getIsHighPriorityRequest(),DEFAULT_DNS_TIMEOUT));
}
}
catch (IOException e) {
LOG.error(CCStringUtils.stringifyException(e));
rpcContext.setErrorDesc(CCStringUtils.stringifyException(e));
rpcContext.setStatus(AsyncRequest.Status.Error_RequestFailed);
_activeAsyncRequests.decrementAndGet();
rpcContext.completeRequest();
}
}
@Override
public void IncomingClientConnected(AsyncClientChannel channel) {
LOG.info("Incoming Client Connected");
_sessions.add( new DNSServiceSession(channel));
}
@Override
public void IncomingClientDisconnected(AsyncClientChannel channel) {
DNSServiceSession selectedSessionObj = getSessionForClient(channel);
if (selectedSessionObj != null) {
selectedSessionObj.cancelWorkItems();
_sessions.remove(selectedSessionObj);
}
}
/***************************************************************/
/* Internal Implementation */
/***************************************************************/
private void publishStats() {
/*
LOG.info("**Resolver Stats. DirectCacheHits:" +_directCacheHits + ")**");
for (NIODNSResolver resolver : _resolvers) {
String resolverStats = "Resolver:" + resolver + " QueueSize:" + resolver.getQueuedItemCount() + " CacheHits:" + resolver.getCacheHitCount()
+" OutstandingAsyncReq:" + _activeAsyncRequests.get();
LOG.info(resolverStats);
}
*/
if (_lastFullStatsDumpTime == -1) {
_lastFullStatsDumpTime = System.currentTimeMillis();
}
else {
if (System.currentTimeMillis() - _lastFullStatsDumpTime >= FULL_STATS_DUMP_INTERVAL) {
long timeStart = System.currentTimeMillis();
LOG.info("Dumping Hot IP nodes - Locking Cache");
synchronized (NIODNSResolver.getDNSCache()) {
NIODNSCache cache = NIODNSResolver.getDNSCache();
List<Node> terminalIPNodes = new Vector<Node>();
cache.collectTerminalIPNodes(terminalIPNodes);
LOG.info("Sorting nodes");
Collections.sort(terminalIPNodes,new Comparator<NIODNSCache.Node>() {
@Override
public int compare(Node o1, Node o2) {
return o1.getTimeToLive() > o2.getTimeToLive() ? -1 : o1.getTimeToLive() < o2.getTimeToLive() ? 1 : 0;
}
});
LOG.info("Top 100 Hot Nodes");
int maxNodes = Math.min(1000, terminalIPNodes.size());
for (int i=0;i<maxNodes;++i) {
LOG.info("Node:" + terminalIPNodes.get(i).getFullName() + " CName:" + terminalIPNodes.get(i).getCannonicalName() + " HitCount:" + terminalIPNodes.get(i).getTimeToLive());
}
}
long timeEnd = System.currentTimeMillis();
LOG.info("Full Stat Dump took:" + (timeEnd - timeStart) + "MS");
_lastFullStatsDumpTime = System.currentTimeMillis();
}
}
}
private DNSServiceSession getSessionForClient(AsyncClientChannel channel) {
for (DNSServiceSession session : _sessions) {
if (session.channel == channel) {
return session;
}
}
return null;
}
private LinkedList<DNSServiceSession> _sessions = new LinkedList<DNSServiceSession>();
private class DNSServiceSession {
public DNSServiceSession(AsyncClientChannel channel) {
this.channel = channel;
}
public void cancelWorkItems() {
LOG.info("Cancelling Work Items for Client");
for (Future<NIODNSQueryResult> task : queuedWorkItems) {
task.cancel(false);
}
queuedWorkItems.clear();
LOG.info("DONE -Cancelling Work Items for Client");
}
public AsyncClientChannel channel;
public LinkedList<Future<NIODNSQueryResult>> queuedWorkItems = new LinkedList<Future<org.commoncrawl.io.NIODNSQueryResult>>();
}
NIODNSResolver getNextResolver() {
NIODNSResolver selectedResolver = _resolverQueue.remove();
selectedResolver.incQueuedCount();
_resolverQueue.add(selectedResolver);
return selectedResolver;
/*
NIODNSResolver resolverOut = (_nextResolver == null) ? _resolvers.getHead() : _nextResolver;
_nextResolver = (resolverOut.getNext() != null) ? resolverOut.getNext() : null;
return resolverOut;
*/
}
@Override
public void logDNSFailure(String hostName, String errorDescription) {
synchronized(_DNSFailureLog) {
_DNSFailureLog.error(hostName + "," + errorDescription);
}
}
@Override
public void logDNSQuery(String hostName, InetAddress address, long ttl,String opCName) {
synchronized(_DNSSuccessLog) {
_DNSSuccessLog.info(hostName + "," + address.toString() + "," + ttl + "," + opCName);
}
}
@Override
public void logDNSException(String hostName, String exceptionDesc) {
synchronized (_DNSFailureDetailLog) {
_DNSFailureDetailLog.info(hostName + "," + exceptionDesc);
}
}
private static class DirectByteBufferAccessStream extends ByteArrayOutputStream {
public DirectByteBufferAccessStream(int initialSize) {
super(initialSize);
}
public byte[] getBuffer() { return buf; }
}
private void checkpointDNSCache(final NIODNSCache cache,final File checkpointFileName,NIODNSCache.LoadFilter filter) {
LOG.info("Starting Cache Checkpoint");
final DirectByteBufferAccessStream streamOut = new DirectByteBufferAccessStream(1024*1024);
try {
// lock the cache for the duration of the atomic operation ...
synchronized (cache) {
long timeStart = System.currentTimeMillis();
cache.dumpNameTree(streamOut, new NIODNSCache.NodeDumpFilter() {
@Override
public boolean dumpTerminalNode(Node node) {
if (node.getTimeToLive() >= System.currentTimeMillis()){
return true;
}
return false;
}
});
long timeEnd = System.currentTimeMillis();
final byte dataBuffer[] = streamOut.getBuffer();
LOG.info("Name Tree Dump took:" + (timeEnd-timeStart) + "MS and produced DataBuffer of size:" + dataBuffer.length);
LOG.info("Reloading cache from data buffer ");
timeStart = System.currentTimeMillis();
cache.clear();
System.gc();
cache.loadTree(new ByteArrayInputStream(dataBuffer,0,streamOut.size()), filter);
timeEnd = System.currentTimeMillis();
LOG.info("Reload took:" + (timeEnd-timeStart) + "MS");
LOG.info("Starting cache writer thread");
new Thread(new Runnable() {
final static int WRITE_CHUNK_SIZE = 4096 * 4;
@Override
public void run() {
LOG.info("Cache writer thread writing new checkpoint file to path:" + checkpointFileName);
File oldCheckpointFileName = new File(checkpointFileName.getParentFile(),checkpointFileName.getName() + ".OLD");
if (checkpointFileName.exists()) {
oldCheckpointFileName.delete();
checkpointFileName.renameTo(oldCheckpointFileName);
}
BufferedOutputStream outputStream = null;
try {
outputStream = new BufferedOutputStream(new FileOutputStream(checkpointFileName));
for (int offset=0;offset<streamOut.size();offset += WRITE_CHUNK_SIZE) {
outputStream.write(dataBuffer,offset,Math.min(WRITE_CHUNK_SIZE,streamOut.size() - offset));
}
outputStream.flush();
LOG.info("Cache Writer Thread Succesfully wrote Checkpoint File:" + checkpointFileName);
}
catch (IOException e) {
LOG.error(CCStringUtils.stringifyException(e));
try {
outputStream.close();
outputStream = null;
checkpointFileName.delete();
} catch (IOException e1) {
LOG.error(CCStringUtils.stringifyException(e1));
}
}
finally {
if (outputStream != null) {
try {
outputStream.close();
} catch (IOException e) {
LOG.error(CCStringUtils.stringifyException(e));
}
}
}
LOG.info("Checkpoint File Writer Thread Exiting");
}
}).start();
}
} catch (IOException e) {
LOG.error("Good Cache Checkpoint Failed with Exception:" + CCStringUtils.stringifyException(e));
}
}
private void loadCacheFromCheckpointFile(NIODNSCache cache,File checkpointFile,NIODNSCache.LoadFilter filter)throws IOException {
LOG.info("Pre-Loading DNS Cache from Checkpoint File:" + checkpointFile);
InputStream stream = null;
try {
try {
stream = new BufferedInputStream(new FileInputStream(checkpointFile));
long timeStart = System.currentTimeMillis();
LOG.info("Starting Cache Load");
cache.loadTree(stream, filter);
long timeEnd = System.currentTimeMillis();
LOG.info("Load Took:" + (timeEnd-timeStart) + "MS");
} catch (IOException e) {
LOG.error(CCStringUtils.stringifyException(e));
}
}
finally {
if (stream != null) {
try {
stream.close();
} catch (IOException e) {
LOG.error(CCStringUtils.stringifyException(e));
}
}
}
}
private void preloadGoodHostDNSCache() {
File logFilePath= new File(getLogDirectory() + "/dnsServiceDNSSuccess.log");
File checkpointFile = getGoodNamesCheckpointFileName();
if (checkpointFile.exists()) {
try{
loadCacheFromCheckpointFile(NIODNSResolver.getDNSCache(),checkpointFile, new NIODNSCache.LoadFilter() {
@Override
public boolean loadItem(String hostName, String ipAddress,String name, long expireTime,long lastTouchedTime) {
FilterResults filterResults = new FilterResults();
if (expireTime >= System.currentTimeMillis()) {
String rootName = URLUtils.extractRootDomainName(hostName);
if (rootName != null) {
return (_noCacheFilter == null || _noCacheFilter.filterItem(rootName,hostName, null, null,filterResults) == FilterResult.Filter_NoAction);
}
}
return false;
}
@Override
public String validateName(String hostName) {
FilterResults filterResults = new FilterResults();
String rootName = URLUtils.extractRootDomainName(hostName);
if (rootName != null) {
// rewrite if necessary
if (_rewriteFilter != null && _rewriteFilter.filterItem(rootName,hostName, null, null, filterResults) == FilterResult.Filter_Modified) {
// LOG.info("Rewrote:" + hostName + " to:" + filterResults.getRewrittenDomainName());
return filterResults.getRewrittenDomainName();
}
}
return hostName;
}
});
}
catch (IOException e) {
LOG.error("Good Host Cache Load from Checkpoint File failed with:" + CCStringUtils.stringifyException(e));
}
}
else {
LOG.info("Pre-loading DNS Cache from Log File:" + logFilePath);
JVMStats.dumpMemoryStats();
FileReader reader = null;
NIODNSCache cache = NIODNSResolver.getDNSCache();
try {
reader = new FileReader(logFilePath);
BufferedReader lineReader = new BufferedReader(reader);
int lineCount =0;
String line = null;
HashSet<String> hostSet = new HashSet<String>();
while ((line = lineReader.readLine()) != null) {
int firstDelimiterIdx = line.indexOf(",/");
int secondDelimiterIdx = -1;
int thirdDelimiterIdx = -1;
if (firstDelimiterIdx != -1) {
secondDelimiterIdx = line.indexOf(',',firstDelimiterIdx+2);
thirdDelimiterIdx = line.indexOf(',',secondDelimiterIdx + 1);
}
if (firstDelimiterIdx != -1 && secondDelimiterIdx != -1) {
String hostName = line.substring(0,firstDelimiterIdx);
String ipAddress = line.substring(firstDelimiterIdx + 2,secondDelimiterIdx);
String cname = null;
long ttl = -1;
if (thirdDelimiterIdx != -1) {
ttl = Long.parseLong(line.substring(secondDelimiterIdx + 1,thirdDelimiterIdx));
cname = line.substring(thirdDelimiterIdx + 1);
}
else {
ttl = Long.parseLong(line.substring(secondDelimiterIdx + 1));
}
int ipAddressInteger = IPAddressUtils.IPV4AddressStrToInteger(ipAddress);
FilterResults filterResults = new FilterResults();
String rootName = URLUtils.extractRootDomainName(hostName);
if (rootName != null) {
// rewrite if necessary
if (_rewriteFilter != null && _rewriteFilter.filterItem(rootName,hostName, null, null, filterResults) == FilterResult.Filter_Modified) {
// LOG.info("Rewrote:" + hostName + " to:" + filterResults.getRewrittenDomainName());
hostName = filterResults.getRewrittenDomainName();
}
}
boolean skipCache = (_noCacheFilter != null && rootName != null && _noCacheFilter.filterItem(rootName,hostName, null, null,filterResults) == FilterResult.Filter_Accept);
if (skipCache) {
LOG.info("Skiiping Cache check for name:" + hostName);
}
else {
cache.cacheIPAddressForHost(hostName, ipAddressInteger, ttl, cname);
}
}
if (++lineCount % 1000 == 0)
LOG.info("Processed " + lineCount + " lines");
}
LOG.info("Done Processing Cache Log. Total Lines Processed:" + lineCount + " DNSCacheNode Count:" + cache.getActiveNodeCount());
JVMStats.dumpMemoryStats();
cache.dumpIPAddressTree(System.out);
} catch (IOException e) {
LOG.error(CCStringUtils.stringifyException(e));
}
finally {
try {
reader.close();
} catch (IOException e) {
LOG.error(CCStringUtils.stringifyException(e));
}
}
}
}
private void preloadBadHostDNSCache() {
File logFilePath= new File(getLogDirectory() + "/dnsServiceDNSFailures.log");
File checkpointFile = getBadNamesCheckpointFileName();
if (checkpointFile.exists()) {
try{
loadCacheFromCheckpointFile(NIODNSResolver.getBadHostCache(),checkpointFile,new NIODNSCache.LoadFilter() {
@Override
public boolean loadItem(String hostName, String ipAddress,String name, long expireTime,long lastTouchedTime) {
return (expireTime >= System.currentTimeMillis());
}
@Override
public String validateName(String hostName) {
return hostName;
}
});
}
catch (IOException e) {
LOG.error("Bad Host Cache Load from Checkpoint File failed with:" + CCStringUtils.stringifyException(e));
}
}
else {
LOG.info("Pre-loading Bad Host DNS Cache from Log File:" + logFilePath);
JVMStats.dumpMemoryStats();
FileReader reader = null;
NIODNSCache cache = NIODNSResolver.getBadHostCache();
try {
reader = new FileReader(logFilePath);
BufferedReader lineReader = new BufferedReader(reader);
int lineCount =0;
String line = null;
while ((line = lineReader.readLine()) != null) {
int lastDelimiterIdx = line.lastIndexOf(",");
if (lastDelimiterIdx != -1) {
String hostName = line.substring(0,lastDelimiterIdx);
String errorCode = line.substring(lastDelimiterIdx + 1);
if (errorCode.length() != 0) {
if (errorCode.equals("NXDOMAIN")) {
cache.cacheIPAddressForHost(hostName,0,System.currentTimeMillis() + NIODNSResolver.NXDOMAIN_FAIL_BAD_HOST_LIFETIME,null);
}
else {
cache.cacheIPAddressForHost(hostName,0,System.currentTimeMillis() + NIODNSResolver.SERVER_FAIL_BAD_HOST_LIFETIME ,null);
}
}
}
if (++lineCount % 1000 == 0)
LOG.info("Processed " + lineCount + " lines");
}
LOG.info("Done Processing Bad Host Cache Log. Total Lines Processed:" + lineCount + " DNSCacheNode Count:" + cache.getActiveNodeCount());
JVMStats.dumpMemoryStats();
} catch (IOException e) {
LOG.error(CCStringUtils.stringifyException(e));
}
finally {
try {
reader.close();
} catch (IOException e) {
LOG.error(CCStringUtils.stringifyException(e));
}
}
}
}
void parseServersFile()throws IOException {
LOG.info("Loading Servers File from:" + _serversFile);
InputStream stream =null;
URL resourceURL = CrawlEnvironment.getHadoopConfig().getResource(_serversFile);
if (resourceURL != null) {
stream = resourceURL.openStream();
}
// try as filename
else {
LOG.info("Could not load resource as an URL. Trying as an absolute pathname");
stream = new FileInputStream(new File(_serversFile));
}
if (stream == null) {
throw new FileNotFoundException();
}
BufferedReader reader = new BufferedReader(new InputStreamReader(new BufferedInputStream(stream)));
String dnsServerAddress = null;
int serverCount = 0;
LOG.info("Loading servers file");
ArrayList<String> servers = Lists.newArrayList();
while ((dnsServerAddress = reader.readLine()) != null) {
if (!dnsServerAddress.startsWith("#")) {
servers.add(InetAddress.getByName(dnsServerAddress).getHostAddress());
}
}
LOG.info("Servers list is:" + servers);
if (servers.size() == 0) {
throw new IOException("Empty Servers List!");
}
// allocate resolver queue
_resolverQueue = new PriorityQueue<NIODNSResolver>(_resolverQueueSize);
// populate resolvers queue
for (int i=0;i<_resolverQueueSize;++i) {
_resolverQueue.add(new NIODNSAsyncResolver(_eventLoop, servers.get(i%servers.size())));
}
}
@Override
public void initialize(AsyncContext<DirectoryServiceRegistrationInfo, NullMessage> rpcContext) throws RPCException {
LOG.info("Received Initialization Request on Callback Channel");
if (rpcContext.getInput().getRegistrationCookie() == _directoryServiceCallbackCookie) {
LOG.info("Cookies Match! Sending Subscription information");
rpcContext.completeRequest();
DirectoryServiceSubscriptionInfo subscription = new DirectoryServiceSubscriptionInfo();
subscription.setSubscriptionPath("/lists/dns_.*");
LOG.info("Subscribing to /lists/dns_.*");
_directoryServiceStub.subscribe(subscription,new AsyncRequest.Callback<DirectoryServiceSubscriptionInfo,DirectoryServiceItemList>() {
@Override
public void requestComplete(AsyncRequest<DirectoryServiceSubscriptionInfo, DirectoryServiceItemList> request) {
if (request.getStatus() == AsyncRequest.Status.Success){
LOG.info("Subscription Successfull!");
}
else {
LOG.info("Subscription Failed!");
}
}
});
}
}
@Override
public void itemChanged(AsyncContext<DirectoryServiceItemList, NullMessage> rpcContext) throws RPCException {
LOG.info("Received item changed from directory service");
reloadDNSFilters();
rpcContext.completeRequest();
}
@Override
public void OutgoingChannelConnected(AsyncClientChannel channel) {
LOG.info("Connected to Directory Server. Registering for Callbacks");
DirectoryServiceRegistrationInfo registerationInfo = new DirectoryServiceRegistrationInfo();
_directoryServiceCallbackCookie = System.currentTimeMillis();
registerationInfo.setConnectionString(getServerAddress().getAddress().getHostAddress() + ":" + getServerAddress().getPort());
registerationInfo.setRegistrationCookie(_directoryServiceCallbackCookie);
registerationInfo.setConnectionName("DNS Service");
try {
_directoryServiceStub.register(registerationInfo, new AsyncRequest.Callback<DirectoryServiceRegistrationInfo,NullMessage>() {
@Override
public void requestComplete(AsyncRequest<DirectoryServiceRegistrationInfo, NullMessage> request) {
LOG.info("Received Registration Compelte Callback from Directory Server with Status:" + request.getStatus());
}
});
} catch (RPCException e) {
LOG.error(CCStringUtils.stringifyException(e));
}
}
@Override
public boolean OutgoingChannelDisconnected(AsyncClientChannel channel) {
return false;
}
void reloadDNSFilters() {
LOG.info("Loading DNS Filters");
_noCacheFilter = new DNSNoCacheFilter();
_rewriteFilter = new DNSRewriteFilter();
try {
_noCacheFilter.loadFromPath(_directoryServiceAddress,CrawlEnvironment.DNS_NOCACHE_RULES ,false);
_rewriteFilter.loadFromPath(_directoryServiceAddress, CrawlEnvironment.DNS_REWRITE_RULES, false);
}
catch (IOException e) {
LOG.error(CCStringUtils.stringifyException(e));
}
}
}