/** * Copyright 2008 - CommonCrawl Foundation * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * **/ package org.commoncrawl.service.directory; import java.io.BufferedReader; import java.io.CharArrayReader; import java.io.File; import java.io.IOException; import java.net.InetSocketAddress; import java.net.URI; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.Charset; import java.util.Map; import java.util.TreeMap; import java.util.Vector; import java.util.regex.Pattern; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.commoncrawl.crawl.common.internal.CrawlEnvironment; import org.commoncrawl.rpc.base.internal.AsyncClientChannel; import org.commoncrawl.rpc.base.internal.AsyncContext; import org.commoncrawl.rpc.base.internal.AsyncRequest; import org.commoncrawl.rpc.base.internal.AsyncServerChannel; import org.commoncrawl.rpc.base.internal.NullMessage; import org.commoncrawl.rpc.base.shared.BinaryProtocol; import org.commoncrawl.rpc.base.shared.RPCException; import org.commoncrawl.server.CommonCrawlServer; import org.commoncrawl.service.directory.DirectoryService; import org.commoncrawl.service.directory.DirectoryServiceItem; import org.commoncrawl.service.directory.DirectoryServiceItemList; import org.commoncrawl.service.directory.DirectoryServiceQuery; import org.commoncrawl.service.directory.DirectoryServiceRegistrationInfo; import org.commoncrawl.service.directory.DirectoryServiceSubscriptionInfo; import org.commoncrawl.util.CCStringUtils; /** * * @author rana * */ public class DirectoryServiceServer extends CommonCrawlServer implements DirectoryService, AsyncServerChannel.ConnectionCallback { private static final String SYSTEM_DATA_ROOT = "sys"; private static final String SYSTEM_DATA_ROOT_PATH = CrawlEnvironment.DIRECTORY_SERVICE_HDFS_ROOT + "/sys"; private static final String USER_DATA_ROOT = "user"; private static final String USER_DATA_ROOT_PATH = CrawlEnvironment.DIRECTORY_SERVICE_HDFS_ROOT + "/user"; private static final String IN_MEMORYPATHS_FILE = "/InMemoryPaths.txt"; private FileSystem _fileSystem = null; private File _tempFileDir = null; private Path _baseStoragePath; private Vector<Pattern> memoryOnlyPaths = new Vector<Pattern>(); private static class ClientConnection { } private Map<String,DirectoryServiceItem> _userItems = new TreeMap<String,DirectoryServiceItem>(); private Map<String,DirectoryServiceItem> _systemItems = new TreeMap<String,DirectoryServiceItem>(); private Map<AsyncClientChannel,DirectoryServiceListener> _listeners = new TreeMap<AsyncClientChannel,DirectoryServiceListener>(); private Map<AsyncClientChannel,DirectoryServiceListener> _pendingListeners = new TreeMap<AsyncClientChannel,DirectoryServiceListener>(); public FileSystem getFileSystem() { return _fileSystem; } @Override protected String getDefaultHttpInterface() { return CrawlEnvironment.DEFAULT_HTTP_INTERFACE; } @Override protected int getDefaultHttpPort() { return CrawlEnvironment.DIRECTORY_SERVICE_HTTP_PORT; } @Override protected String getDefaultLogFileName() { return "dservice.log"; } @Override protected String getDefaultRPCInterface() { return CrawlEnvironment.DEFAULT_RPC_INTERFACE; } @Override protected int getDefaultRPCPort() { return CrawlEnvironment.DIRECTORY_SERVICE_RPC_PORT; } @Override protected String getWebAppName() { return CrawlEnvironment.DIRECTORY_SERVICE_WEBAPP_NAME; } @Override protected boolean initServer() { try { // load system files loadSystemPaths(); LOG.info("Processing System Files"); // process system files processSystemFiles(); LOG.info("Loading User Items"); // load user files loadUserItems(); // create server channel ... AsyncServerChannel channel = new AsyncServerChannel(this, this.getEventLoop(), this.getServerAddress(),this); // register RPC services it supports ... registerService(channel,DirectoryService.spec); return true; } catch (IOException e) { LOG.error(CCStringUtils.stringifyException(e)); } return false; } @Override protected boolean parseArguements(String[] argv) { for(int i=0; i < argv.length;++i) { if (argv[i].equalsIgnoreCase("--fileSystem")) { try { _fileSystem = FileSystem.get(new URI(argv[++i]),getConfig()); } catch (Exception e) { LOG.error(CCStringUtils.stringifyException(e)); } } else if (argv[i].equalsIgnoreCase("--storageBase")) { _baseStoragePath = new Path(argv[++i]); } } if (_fileSystem != null && _baseStoragePath != null) { return true; } else { System.out.println("--fileSystem and --storageBase are required parameters!"); return false; } } @Override protected void overrideConfig(Configuration conf) { } @Override protected void printUsage() { } @Override protected boolean startDaemons() { return true; } @Override protected void stopDaemons() { } @Override protected String getDefaultDataDir() { return "./data"; } final boolean isItemPersistent(DirectoryServiceItem item) { for (Pattern pattern : memoryOnlyPaths) { if (pattern.matcher(item.getItemPath()).matches()) { return false; } } return true; } @Override public void publish(AsyncContext<DirectoryServiceItem, DirectoryServiceItem> rpcContext) throws RPCException { DirectoryServiceItem item = rpcContext.getInput(); LOG.info("Received publish request for item:" + item.getItemPath()); if (!item.isFieldDirty(DirectoryServiceItem.Field_ITEMPATH) || !item.getItemPath().startsWith("/") || item.getItemPath().endsWith("/")) { rpcContext.setStatus(AsyncRequest.Status.Error_RequestFailed); rpcContext.setErrorDesc("Invalid Path"); LOG.error("Request Failed:Invalid Path(" + item.getItemPath()+")"); } else if (!item.isFieldDirty(DirectoryServiceItem.Field_ITEMDATA) || item.getItemData().getCount() == 0) { rpcContext.setStatus(AsyncRequest.Status.Error_RequestFailed); rpcContext.setErrorDesc("Invalid Data Buffer"); LOG.error("Request Failed:Invalid Data Buffer(" + item.getItemPath()+")"); } else { DirectoryServiceItem existingItem = _userItems.get(item.getItemPath()); if (existingItem != null) { item.setVersionNumber(existingItem.getVersionNumber() + 1); LOG.info("Incrementing Item:" + item.getItemPath() + " Version Number to:" + item.getVersionNumber()); } try { if (isItemPersistent(item)) { Path fullPath = buildFullUserItemPath(item.getItemPath(),item.getVersionNumber()); LOG.info("Publish Reuest. Item:" + item.getItemPath()+" is persistent. Persisting to disk. Path is:" + fullPath); FSDataOutputStream outputStream = _fileSystem.create(fullPath); try { item.serialize(outputStream, new BinaryProtocol()); } catch (IOException e) { LOG.error("Error writing item:" + item.getItemPath() + " to disk"); LOG.error(CCStringUtils.stringifyException(e)); _fileSystem.delete(fullPath,false); throw e; } finally { outputStream.flush(); outputStream.close(); } } _userItems.put(item.getItemPath(),item); rpcContext.setStatus(AsyncRequest.Status.Success); try { rpcContext.getOutput().merge(item); } catch (CloneNotSupportedException e) { } rpcContext.getOutput().setFieldClean(DirectoryServiceItem.Field_ITEMDATA); rpcContext.setStatus(AsyncRequest.Status.Success); try { broadcastToSubscribers((DirectoryServiceItem)item.clone()); } catch (CloneNotSupportedException e) { } } catch (IOException e) { LOG.error(CCStringUtils.stringifyException(e)); rpcContext.setStatus(AsyncRequest.Status.Error_RequestFailed); rpcContext.setErrorDesc(CCStringUtils.stringifyException(e)); } } rpcContext.completeRequest(); } private void broadcastToSubscribers(DirectoryServiceItem item) { DirectoryServiceItemList list = new DirectoryServiceItemList(); list.getItems().add(item); LOG.info("Searching Listener List for match for changed path:" + item.getItemPath()); for (DirectoryServiceListener listener: _listeners.values()) { Map<String,Pattern> patternMap = listener.getSubscriptions(); for (Pattern pattern : patternMap.values()) { LOG.info("Comparing Against Pattern:" + pattern.toString() + " for Listener:" + listener.getName()); if (pattern.matcher(item.getItemPath()).matches()) { LOG.info("Pattern Matched. Dispatching Request to Listener"); listener.dispatchItemsChangedMessage(list); } } } } private final Path buildFullUserItemPath(String itemName,long versionNumber) { return buildAbsolutePath(new Path(USER_DATA_ROOT_PATH + itemName + "$" + Long.toString(versionNumber))); } @Override public void register(AsyncContext<DirectoryServiceRegistrationInfo, NullMessage> rpcContext)throws RPCException { LOG.info("Received Register Request from:" + rpcContext.getInput().getConnectionString()); DirectoryServiceListener existingListener = _listeners.get(rpcContext.getClientChannel()); // remove existing listener if any if (existingListener != null) { _listeners.remove(rpcContext.getClientChannel()); existingListener.disconnect(); } // allocte new listener object InetSocketAddress address = CCStringUtils.parseSocketAddress(rpcContext.getInput().getConnectionString()); if (address == null) { RPCException e = new RPCException("Invalid Connection String in Client Registration Request: " + rpcContext.getInput().getConnectionString()); LOG.error(CCStringUtils.stringifyException(e)); throw e; } DirectoryServiceListener listener = new DirectoryServiceListener(this,rpcContext.getClientChannel(),address,rpcContext.getInput().getRegistrationCookie()); try { listener.connect(); _pendingListeners.put(rpcContext.getClientChannel(),listener); rpcContext.completeRequest(); } catch (IOException e) { LOG.error(CCStringUtils.stringifyException(e)); } } @Override public void query(AsyncContext<DirectoryServiceQuery, DirectoryServiceItemList> rpcContext) throws RPCException { LOG.info("Received Query Request from: " + rpcContext.getClientChannel()); queryItems(rpcContext.getInput().getItemPath(),rpcContext.getOutput()); rpcContext.setStatus(AsyncRequest.Status.Success); rpcContext.completeRequest(); } @Override public void subscribe(AsyncContext<DirectoryServiceSubscriptionInfo, DirectoryServiceItemList> rpcContext)throws RPCException { LOG.info("Received Subscription Request from: " + rpcContext.getClientChannel()); DirectoryServiceListener listener = _listeners.get(rpcContext.getClientChannel()); if (listener != null) { listener.addSubscription(rpcContext.getInput().getSubscriptionPath()); } queryItems(rpcContext.getInput().getSubscriptionPath(),rpcContext.getOutput()); rpcContext.setStatus(AsyncRequest.Status.Success); rpcContext.completeRequest(); } @Override public void unscubscribe(AsyncContext<DirectoryServiceSubscriptionInfo, NullMessage> rpcContext) throws RPCException { LOG.info("Received Unsubscribe Request from: " + rpcContext.getClientChannel()); DirectoryServiceListener listener = _listeners.get(rpcContext.getClientChannel()); if (listener != null) { listener.removeSubscription(rpcContext.getInput().getSubscriptionPath()); } rpcContext.setStatus(AsyncRequest.Status.Success); rpcContext.completeRequest(); } @Override public void IncomingClientConnected(AsyncClientChannel channel) { } @Override public void IncomingClientDisconnected(AsyncClientChannel channel) { removeListeners(channel); } private void queryItems(String queryString,DirectoryServiceItemList listOut) { Pattern pattern = Pattern.compile(queryString); for (DirectoryServiceItem item : _userItems.values()) { if (pattern.matcher(item.getItemPath()).matches()) { listOut.getItems().add(item); } } } private Path buildAbsolutePath(Path relativeRootPath){ return new Path(_baseStoragePath,relativeRootPath); } private void loadSystemPaths() throws IOException { loadItems(buildAbsolutePath(new Path(SYSTEM_DATA_ROOT_PATH)),buildAbsolutePath(new Path(SYSTEM_DATA_ROOT_PATH)),_systemItems,false); } private void loadUserItems() throws IOException { loadItems(buildAbsolutePath(new Path(USER_DATA_ROOT_PATH)),buildAbsolutePath(new Path(USER_DATA_ROOT_PATH)),_userItems,true); } private void loadItems(Path itemRootPath,Path currentPath,Map<String,DirectoryServiceItem> map,boolean hasVersioning) throws IOException { FileStatus paths[] = _fileSystem.globStatus(new Path(currentPath,"*")); LOG.info("Pre-Loading Items from FileSystem at root path:" + currentPath); for (FileStatus itemPath : paths) { if (!itemPath.isDir()) { DirectoryServiceItem item = preLoadItemInfoFromPath(itemRootPath,itemPath.getPath(),hasVersioning); if (item != null) { LOG.info("Found Item:" + item.getItemPath() + " Version:" + item.getVersionNumber()); DirectoryServiceItem existingItem = map.get(item.getItemPath()); if (existingItem == null){ map.put(item.getItemPath(), item); } else { if (existingItem.getVersionNumber() < item.getVersionNumber()) { map.put(item.getItemPath(), item); } } } } else { loadItems(itemRootPath,itemPath.getPath(),map,hasVersioning); } } // now load the actual versions for (DirectoryServiceItem item : map.values()) { try { Path path = new Path(itemRootPath + item.getItemPath()+ "$" + item.getVersionNumber()); LOG.info("Loading Item:" + item.getItemPath() + " Verison:" + item.getVersionNumber() + " from Path:" + path); FSDataInputStream inputStream = _fileSystem.open(path); try { item.deserialize(inputStream,new BinaryProtocol()); LOG.info("Loaded Item:" + item.getItemPath() + " Version:" + item.getVersionNumber() + " BufferSize:" + item.getItemData().getCount()); } finally { inputStream.close(); } } catch (IOException e) { LOG.error(CCStringUtils.stringifyException(e)); item.setFlags(DirectoryServiceItem.Flags.ItemLoadFailed); } } } public final String normalizeOutputPath(Path rootPath,String inputPath) { String rootPathStr = rootPath.toString(); int indexOfRoot = inputPath.indexOf(rootPathStr); return inputPath.substring(indexOfRoot+rootPathStr.length()); } public DirectoryServiceItem preLoadItemInfoFromPath(Path rootPath,Path path,boolean hasVersioning) throws IOException { // extract version information String strPath = path.toString(); LOG.info("preloading item:" + strPath); long versionNumber = -1; if (hasVersioning) { int versionDelimiter = strPath.lastIndexOf('$'); String versionStr = strPath.substring(versionDelimiter + 1); strPath = strPath.substring(0,versionDelimiter); try { versionNumber = Long.parseLong(versionStr); } catch (NumberFormatException e) { LOG.error(CCStringUtils.stringifyException(e)); return null; } } DirectoryServiceItem itemOut = new DirectoryServiceItem(); itemOut.setItemPath(normalizeOutputPath(rootPath,strPath)); itemOut.setVersionNumber(versionNumber); return itemOut; } private final void processSystemFiles() throws IOException { for (DirectoryServiceItem item : _systemItems.values()) { if (item.getItemPath().equals(IN_MEMORYPATHS_FILE)) { processInMemoryPathsFile(item.getItemData().getReadOnlyBytes()); } } _systemItems.clear(); } private final void processInMemoryPathsFile(byte[] inMemoryFileData)throws IOException { CharBuffer charBuf = Charset.forName("UTF8").decode(ByteBuffer.wrap(inMemoryFileData)); BufferedReader reader = new BufferedReader(new CharArrayReader(charBuf.array(),0,charBuf.limit())); String nextLine = null; while ((nextLine = reader.readLine()) != null) { LOG.info("Compiling in memory path pattern:" + nextLine); memoryOnlyPaths.add(Pattern.compile(nextLine)); } } void activateListener(AsyncClientChannel sourceChannel,DirectoryServiceListener listener) { if (_pendingListeners.get(sourceChannel) == listener) _pendingListeners.remove(sourceChannel); _listeners.put(sourceChannel, listener); } void removeListeners(AsyncClientChannel sourceChannel) { DirectoryServiceListener pendingListener = _pendingListeners.get(sourceChannel); _pendingListeners.remove(sourceChannel); DirectoryServiceListener registeredListener = _listeners.get(sourceChannel); _listeners.remove(sourceChannel); if (pendingListener != null) { pendingListener.disconnect(); } if (registeredListener != null) { registeredListener.disconnect(); } } void removeListener(AsyncClientChannel sourceChannel,DirectoryServiceListener listener) { if (_pendingListeners.get(sourceChannel) == listener) { _pendingListeners.remove(sourceChannel); } if (_listeners.get(sourceChannel) == listener) { _listeners.remove(sourceChannel); } } }