/** * Copyright 2008 - CommonCrawl Foundation * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * **/ package org.commoncrawl.service.queryserver.slave; import java.io.ByteArrayInputStream; import java.io.DataInputStream; import java.io.File; import java.io.IOException; import java.text.NumberFormat; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; import java.util.Map; import java.util.Vector; import java.util.concurrent.Callable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import com.hadoop.compression.lzo.LzoCodec; import org.commoncrawl.async.Callback; import org.commoncrawl.async.ConcurrentTask; import org.commoncrawl.async.Timer; import org.commoncrawl.crawl.common.internal.CrawlEnvironment; import org.commoncrawl.rpc.base.internal.AsyncClientChannel; import org.commoncrawl.rpc.base.internal.AsyncContext; import org.commoncrawl.rpc.base.internal.AsyncRequest; import org.commoncrawl.rpc.base.internal.AsyncServerChannel; import org.commoncrawl.rpc.base.internal.NullMessage; import org.commoncrawl.rpc.base.shared.BinaryProtocol; import org.commoncrawl.rpc.base.shared.RPCException; import org.commoncrawl.rpc.base.shared.RPCStruct; import org.commoncrawl.server.CommonCrawlServer; import org.commoncrawl.service.queryserver.BaseConfig; import org.commoncrawl.service.queryserver.Common; import org.commoncrawl.service.queryserver.QueryCommon; import org.commoncrawl.service.queryserver.QueryServerSlave; import org.commoncrawl.service.queryserver.QueryStatus; import org.commoncrawl.service.queryserver.RemoteQueryInfo; import org.commoncrawl.service.queryserver.SlaveStatus; import org.commoncrawl.service.queryserver.index.DatabaseIndexV2; import org.commoncrawl.service.queryserver.query.Query; import org.commoncrawl.service.queryserver.query.QueryProgressCallback; import org.commoncrawl.service.queryserver.query.RemoteQueryCompletionCallback; import org.commoncrawl.util.CCStringUtils; import org.commoncrawl.util.FileUtils; @SuppressWarnings("unchecked") /** * @author rana */ public class SlaveServer extends CommonCrawlServer implements QueryServerSlave, AsyncServerChannel.ConnectionCallback, RemoteQueryCompletionCallback, QueryProgressCallback { static final String QUERY_THREAD_POOL_ID = "query.thread.pool"; private static final int MIN_INSTANCE_ID = 0; private static final int MAX_INSTANCE_ID = 9; private static final int DEFAULT_THREAD_POOL_SIZE=8*4; private int _instanceId = -1; private int _threadPoolSize = DEFAULT_THREAD_POOL_SIZE; private boolean _cancelling = false; private BaseConfig _baseConfig; private SlaveStatus _slaveStatus = new SlaveStatus(); private FileSystem _fileSystem = null; private static final NumberFormat NUMBER_FORMAT = NumberFormat.getInstance(); private File _tempFileDir = null; LzoCodec codec; private LinkedList<Query> _pendingQueries = new LinkedList<Query>(); private Map<Long,Query> _activeQueries = new HashMap<Long,Query>(); private HashSet<Long> _cancelledQueries = new HashSet<Long>(); DatabaseIndexV2.SlaveDatabaseIndex _index; SlaveState _slaveState; static { NUMBER_FORMAT.setMinimumIntegerDigits(5); NUMBER_FORMAT.setGroupingUsed(false); } public FileSystem getFileSystem() { return _fileSystem; } public BaseConfig getBaseConfig() { return _baseConfig; } public static String getPartId(int shardIndex) { return "part-" + NUMBER_FORMAT.format(shardIndex); } public File getJobLocalPath() { return new File(getDataDirectory(),"jobLocal"); } @Override protected String getDefaultHttpInterface() { return CrawlEnvironment.DEFAULT_HTTP_INTERFACE; } @Override protected int getDefaultHttpPort() { return CrawlEnvironment.DEFAULT_QUERY_SLAVE_HTTP_PORT + (_instanceId * 2); } @Override protected String getDefaultLogFileName() { return "prslave.log"; } @Override protected String getDefaultRPCInterface() { return CrawlEnvironment.DEFAULT_RPC_INTERFACE; } @Override protected int getDefaultRPCPort() { return CrawlEnvironment.DEFAULT_QUERY_SLAVE_RPC_PORT + (_instanceId * 2); } @Override protected String getWebAppName() { return CrawlEnvironment.QUERY_SLAVE_WEBAPP_NAME; } @SuppressWarnings("deprecation") @Override protected boolean initServer() { codec = new LzoCodec(); if (_tempFileDir == null) { _tempFileDir = new File(getDataDirectory(),"qslave_temp"); LOG.info("Temp File Dir does not existing. Defaulting to:"+ _tempFileDir.getAbsolutePath()); } // create server channel ... AsyncServerChannel channel = new AsyncServerChannel(this, this.getEventLoop(), this.getServerAddress(),this); // register RPC services it supports ... registerService(channel,QueryServerSlave.spec); // make job local directory getJobLocalPath().mkdirs(); return true; } @Override protected boolean parseArguements(String[] argv) { for(int i=0; i < argv.length;++i) { if (argv[i].equalsIgnoreCase("--instance")) { if (i+1 < argv.length) { _instanceId = Integer.parseInt(argv[++i]); if (_instanceId < MIN_INSTANCE_ID || _instanceId > MAX_INSTANCE_ID) { System.err.println("Invalid Instance Id specified. Instance Id must be between " + MIN_INSTANCE_ID + " and " + MAX_INSTANCE_ID); return false; } } } else if (argv[i].equalsIgnoreCase("--tempFileDir")) { _tempFileDir = new File(argv[++i]); _tempFileDir.mkdirs(); if (!_tempFileDir.isDirectory()) { LOG.error("Invalid Temp Directory Specified:" + _tempFileDir.getAbsolutePath()); return false; } } else if (argv[i].equalsIgnoreCase("--threadPoolSize")) { if (i+1 < argv.length) { _threadPoolSize = Integer.parseInt(argv[++i]); } } } if (_instanceId == -1) { System.err.println("Instance Id (--instance) and (optional) Thread Pool Size (--threadPoolSize) are required parameters."); return false; } return true; } @Override protected void overrideConfig(Configuration conf) { conf.setInt("org.commoncrawl.threadpool.max.threads", _threadPoolSize); } @Override protected void printUsage() { // TODO Auto-generated method stub } @Override protected boolean startDaemons() { return true; } @Override protected void stopDaemons() { } @Override public void initialize(final AsyncContext<BaseConfig, SlaveStatus> rpcContext)throws RPCException { // terminate all active queries ... terminateAndFlushAllQueries( new Callback() { @Override public void execute() { // we are still in the async thread here ... all existing queries have been cancelled at this point ... // clear query info _activeQueries.clear(); _pendingQueries.clear(); // clear out state ... _slaveStatus.clear(); _slaveStatus.setState(SlaveStatus.State.INITIALIZING); // reset cancel flag _cancelling = false; // set up base config ... try { _baseConfig = (BaseConfig) rpcContext.getInput().clone(); } catch (CloneNotSupportedException e) { } // initialize the file system ... try { _fileSystem = CrawlEnvironment.getDefaultFileSystem(); } catch (Exception e) { // log the error LOG.error(CCStringUtils.stringifyException(e)); // and fail the request ... failRequest(rpcContext, "Unable to Initialize FileSystem.\n" + CCStringUtils.stringifyException(e)); return; } if (!_baseConfig.isFieldDirty(BaseConfig.Field_QUERYDBPATH)) { LOG.error("No QueryDB Path Specified in BaseConfig"); failRequest(rpcContext, "No QueryDB Path Specified in BaseConfig"); } new Thread(new Runnable() { @Override public void run() { boolean loaded = false; try { LOG.info("Loading SlaveDatabase Index"); _index = new DatabaseIndexV2.SlaveDatabaseIndex(_configuration, _fileSystem, _baseConfig.getDatabaseTimestamp()); LOG.info("Loaded Database Index"); // register thread pool loaded = true; } catch (IOException e) { LOG.error("Data File Load Failed with exception:" +CCStringUtils.stringifyException(e)); } final boolean loadedStatus = loaded; getEventLoop().setTimer(new Timer(1,false,new Timer.Callback() { @Override public void timerFired(Timer timer) { if (loadedStatus) { LOG.info("All Data Files successfully loaded. finishing initialization"); finishInitialize(rpcContext); } else { failRequest(rpcContext, "Failed to load Data Files"); } } })); } }).start(); } }); } private File copyAcrossQueryDBFile(Path remotePath)throws IOException { FileSystem fileSystem = CrawlEnvironment.getDefaultFileSystem(); // get the status of the specified file FileStatus fileStatus = fileSystem.getFileStatus(remotePath); File localDirectory = new File(getJobLocalPath(),remotePath.getParent().getName()); if (!localDirectory.exists()) { localDirectory.mkdirs(); } File localFile = new File(localDirectory,remotePath.getName()); if (localFile.exists() == false || localFile.length() != fileStatus.getLen()) { localFile.delete(); LOG.info("Copying Remote File:" + remotePath + " to " + localFile); fileSystem.copyToLocalFile(remotePath, new Path(localFile.getAbsolutePath())); } else { LOG.info("Skipping Copy of Remote File:" + remotePath + " to " + localFile); } return localFile; } private File getTempDirForQuery(long queryId) { return new File(_tempFileDir,Long.toString(queryId)); } private void finishInitialize(AsyncContext<BaseConfig, SlaveStatus> rpcContext) { // and update slave status state _slaveStatus.setState(SlaveStatus.State.READY); // create a slave state object ... _slaveState = new SlaveState(getHostName(),_index); sendStatusResponse(rpcContext); } private void sendStatusResponse(AsyncContext<? extends RPCStruct,SlaveStatus> context) { try { // get base status context.setOutput((SlaveStatus) _slaveStatus.clone()); // log it ... if (context.getOutput().getQueryStatus().size() != 0) { LOG.info("Sending a non-zero query status list in heartbeat response"); } // clear query status in slave status ... _slaveStatus.getQueryStatus().clear(); } catch (CloneNotSupportedException e) { } try { context.completeRequest(); } catch (RPCException e) { LOG.error("fail to send StatusResponse to incoming RPC. CLOSING RPC Channel"); try { context.getClientChannel().close(); } catch (IOException e1) { LOG.error(e1); } } } private void potentiallyStartNextQuery() { while (_activeQueries.size() < Common.MAX_CONCURRENT_QUERIES && _pendingQueries.size() != 0) { // remove next from queue Query queryObject = _pendingQueries.removeFirst(); // and activate activateQuery(queryObject); } } private void activateQuery(Query queryObject) { LOG.info("Activating Query:" + queryObject.getQueryId()); _activeQueries.put(queryObject.getQueryId(),queryObject); // create temporary work directory File queryTempDir = getTempDirForQuery(queryObject.getQueryId()); LOG.info("Query TempDir for Query:" + queryObject.getQueryId() + " is:" + queryTempDir.getAbsolutePath()); try { LOG.info("Deleting Query TempDir"); FileUtils.recursivelyDeleteFile(queryTempDir); LOG.info("Re-creating TempDir"); queryTempDir.mkdirs(); LOG.info("Starting Slave Query for Query:" + queryObject.getQueryId()); // start the query thread ... queryObject.startSlaveQuery(this._fileSystem,this._configuration,getEventLoop(),_index,queryTempDir,this, this); // and update the status. updateSlaveStatusForQueryObject(queryObject); } catch (IOException e) { LOG.error("Query Activation for Query:"+ queryObject.getQueryId() +" Failed with Exception:" + CCStringUtils.stringifyException(e)); // remove from active list ... _activeQueries.remove(queryObject.getQueryId()); // mark as failed ... queryObject.getQueryStatus().setStatus(QueryStatus.Status.ERROR); queryObject.getQueryStatus().setOptErrorReason(CCStringUtils.stringifyException(e)); FileUtils.recursivelyDeleteFile(queryTempDir); updateSlaveStatusForQueryObject(queryObject); } } private void updateSlaveStatusForQueryObject(Query theQueryObject) { boolean found = false; LOG.info("updateSlaveStatusForQueryObject called for Query:" + theQueryObject.getQueryId()); LOG.info("Updating Query Status for Query:" + theQueryObject.getQueryId() + " Status:" + QueryStatus.Status.toString(theQueryObject.getQueryStatus().getStatus())); for (QueryStatus status : _slaveStatus.getQueryStatus()) { // if query ids match if (status.getQueryId() == theQueryObject.getQueryId()) { try { LOG.info("Merging into Existing Query Status"); status.merge(theQueryObject.getQueryStatus()); found = true; } catch (CloneNotSupportedException e) { LOG.error(CCStringUtils.stringifyException(e)); } break; } } if (!found) { QueryStatus queryStatus = null; try { LOG.info("Cloning a NEW Query Status"); queryStatus = (QueryStatus) theQueryObject.getQueryStatus().clone(); } catch (CloneNotSupportedException e) { LOG.error(CCStringUtils.stringifyException(e)); } _slaveStatus.getQueryStatus().add(queryStatus); } } @Override public void doQuery(AsyncContext<RemoteQueryInfo, QueryStatus> rpcContext) throws RPCException { LOG.info("Adding Query Type:"+ rpcContext.getInput().getQueryClassType() + "Id:" + rpcContext.getInput().getCommonInfo().getQueryId() + " to Queue."); try { // extract object type String queryObjectType = rpcContext.getInput().getQueryClassType(); LOG.info("QueryId:" + rpcContext.getInput().getCommonInfo().getQueryId() + " ObjectType:" + queryObjectType); // and data type String queryDataType = rpcContext.getInput().getQueryDataClassType(); LOG.info("QueryId:" + rpcContext.getInput().getCommonInfo().getQueryId() + " QueryDataType:" + queryDataType); // allocate the object data type .. RPCStruct queryData = (RPCStruct) Class.forName(queryDataType).newInstance(); LOG.info("QueryId:" + rpcContext.getInput().getCommonInfo().getQueryId() + " DeSerializing Query Data"); // allocate an input stream DataInputStream inputStream = new DataInputStream(new ByteArrayInputStream(rpcContext.getInput().getQueryDataBuffer().getReadOnlyBytes())); // and deserialize into the structure queryData.deserialize(inputStream,new BinaryProtocol()); LOG.info("QueryId:" + rpcContext.getInput().getCommonInfo().getQueryId() + " Allocating Query Object"); // now allocate query object Query queryObject = (Query) Class.forName(queryObjectType).newInstance(); LOG.info("QueryId:" + rpcContext.getInput().getCommonInfo().getQueryId() + " Initializing QueryObject"); // initialize query queryObject.initializeRemoteQuery(rpcContext.getInput().getClientQueryData(), _slaveState,rpcContext.getInput().getShardMapping(),rpcContext.getInput().getCommonInfo(),queryData); LOG.info("QueryId:" + rpcContext.getInput().getCommonInfo().getQueryId() + " Adding to Pending Queue"); //TODO: SEE IF WE CAN IMMEDIATELY EXECUTE QUERY ... if (queryObject.isHighPriorityQuery()) { // high priority query ... dispatch immediately ... activateQuery(queryObject); } else { // add to pending set ... _pendingQueries.add(queryObject); } // add query to query status structure ... updateSlaveStatusForQueryObject(queryObject); // now potentially start next query ... potentiallyStartNextQuery(); // now send the query's current status back to caller rpcContext.getOutput().merge(queryObject.getQueryStatus()); } catch (Exception e) { LOG.error(CCStringUtils.stringifyException(e)); LOG.error("Query Dispatch for Query Id:" + rpcContext.getInput().getCommonInfo().getQueryId() + " Failed with Exception:" + CCStringUtils.stringifyException(e)); rpcContext.setStatus(AsyncRequest.Status.Error_RequestFailed); rpcContext.setErrorDesc(CCStringUtils.stringifyException(e)); } // complete request ... rpcContext.completeRequest(); } @Override public void heartbeat(AsyncContext<NullMessage, SlaveStatus> rpcContext)throws RPCException { //LOG.info("Got Heartbeat from Master - Sending Status to Master"); sendStatusResponse(rpcContext); } private final void failRequest(AsyncContext<? extends RPCStruct,? extends RPCStruct> rpcContext,String reason) { LOG.info("failRequest called"); // not good... time to fail the request ... rpcContext.setStatus(AsyncRequest.Status.Error_RequestFailed); rpcContext.setErrorDesc(reason); try { rpcContext.completeRequest(); } catch (RPCException e) { LOG.error(CCStringUtils.stringifyException(e)); try { rpcContext.getClientChannel().close(); } catch (IOException e2) { } } } @Override protected String getDefaultDataDir() { return "data"; } @Override public void IncomingClientConnected(AsyncClientChannel channel) { LOG.info("Incoming Channel Connected"); } @Override public void IncomingClientDisconnected(AsyncClientChannel channel) { LOG.info("Channel Disconnected"); } private void terminateAndFlushAllQueries(final Callback callback) { _cancelling = true; if (_activeQueries.size() == 0) { // execute callback immediately callback.execute(); } else { // otherwise terminate queries in a background thread ... final Vector<Query> activeQueries = new Vector<Query>(_activeQueries.values()); getDefaultThreadPool().submit(new ConcurrentTask<Boolean>(_eventLoop,new Callable<Boolean>() { @Override public Boolean call() throws Exception { LOG.info("Starting Cancel Thread"); for (Query query : activeQueries) { LOG.info("Cancelling Query:" + query.getQueryId()); try { query.cancelSlaveQuery(); } catch (Exception e) { LOG.error("Error Cancelling Query:" + query.getQueryId() + " Error:" + CCStringUtils.stringifyException(e)); } LOG.info("Cancelled Query:" + query.getQueryId()); } return true; } }, new ConcurrentTask.CompletionCallback<Boolean>() { @Override public void taskComplete(Boolean loadResult) { _cancelling = false; callback.execute(); } @Override public void taskFailed(Exception e) { _cancelling = false; LOG.error(CCStringUtils.stringifyException(e)); callback.execute(); } })); } } @Override public void queryComplete(Query theQueryObject, long resultCount) { LOG.info("QueyComplete received for Query:" + theQueryObject.getQueryId() + " resultCount:" + resultCount); // this callback occurs in the context of the async thread ... if (!_cancelling) { synchronized (_cancelledQueries) { // if this query was cancelled ... if (_cancelledQueries.contains(theQueryObject.getQueryId())) { // clear out the entry in the array _cancelledQueries.remove(theQueryObject.getQueryId()); LOG.info("Query Seems to have been cancelled. Explicitly cancelling Query:" + theQueryObject.getQueryId()); // override status theQueryObject.getQueryStatus().setStatus(QueryStatus.Status.CANCELLED); } } // update the slave status according to the query status updateSlaveStatusForQueryObject(theQueryObject); // remove the query from the active queue ... _activeQueries.remove(theQueryObject.getQueryId()); FileUtils.recursivelyDeleteFile(getTempDirForQuery(theQueryObject.getQueryId())); } } @Override public void queryFailed(Query theQueryObject, String reason) { LOG.info("QueryFailed received for Query:" + theQueryObject.getQueryId() + " reason:" + reason); if (!_cancelling) { synchronized (_cancelledQueries) { // if this query was cancelled ... if (_cancelledQueries.contains(theQueryObject.getQueryId())) { // clear out the entry in the array _cancelledQueries.remove(theQueryObject.getQueryId()); // override status theQueryObject.getQueryStatus().setStatus(QueryStatus.Status.CANCELLED); } } // update the slave status according to the query status updateSlaveStatusForQueryObject(theQueryObject); // remove the query from the active queue ... _activeQueries.remove(theQueryObject.getQueryId()); FileUtils.recursivelyDeleteFile(getTempDirForQuery(theQueryObject.getQueryId())); } } @Override public boolean updateProgress(final Query theQueryObject, float percentComplete) { LOG.info("Update Progress Received for Query:" + theQueryObject.getQueryId() + "pctComplete:" + percentComplete); //TODO: WE NEED TO UPDATE slave status for this query here . if (!_cancelling) { synchronized (_cancelledQueries) { // if the query object is in the cancelled set ... if (_cancelledQueries.contains(theQueryObject.getQueryId())) { // remove it from the cancel set _cancelledQueries.remove(theQueryObject.getQueryId()); // return false to indicate that query execution should terminate prematurely return false; } // return true to indicate that query execution should continue return true; } } else { // return false to indicate that query execution should terminate prematurely return false; } } @Override public void cancelQuery(AsyncContext<QueryCommon, NullMessage> rpcContext)throws RPCException { if (_activeQueries.containsKey(rpcContext.getInput().getQueryId())) { //TODO: WE WILL NEED TO PERIODICALLY FLUSH THIS SET ... _cancelledQueries.add(rpcContext.getInput().getQueryId()); } } }