/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.zookeeper_voltpatches.server; import java.io.IOException; import java.io.PrintWriter; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import org.apache.jute_voltpatches.Index; import org.apache.jute_voltpatches.InputArchive; import org.apache.jute_voltpatches.OutputArchive; import org.apache.jute_voltpatches.Record; import org.apache.log4j.Logger; import org.apache.zookeeper_voltpatches.server.DataNode; import org.apache.zookeeper_voltpatches.server.DataTree; import org.apache.zookeeper_voltpatches.server.WatchManager; import org.apache.zookeeper_voltpatches.server.ZooTrace; import org.apache.zookeeper_voltpatches.KeeperException; import org.apache.zookeeper_voltpatches.Quotas; import org.apache.zookeeper_voltpatches.StatsTrack; import org.apache.zookeeper_voltpatches.WatchedEvent; import org.apache.zookeeper_voltpatches.Watcher; import org.apache.zookeeper_voltpatches.KeeperException.Code; import org.apache.zookeeper_voltpatches.KeeperException.NoNodeException; import org.apache.zookeeper_voltpatches.Watcher.Event; import org.apache.zookeeper_voltpatches.Watcher.Event.EventType; import org.apache.zookeeper_voltpatches.Watcher.Event.KeeperState; import org.apache.zookeeper_voltpatches.ZooDefs.Ids; import org.apache.zookeeper_voltpatches.ZooDefs.OpCode; import org.apache.zookeeper_voltpatches.common.PathTrie; import org.apache.zookeeper_voltpatches.data.ACL; import org.apache.zookeeper_voltpatches.data.Stat; import org.apache.zookeeper_voltpatches.data.StatPersisted; import org.apache.zookeeper_voltpatches.txn.CreateTxn; import org.apache.zookeeper_voltpatches.txn.DeleteTxn; import org.apache.zookeeper_voltpatches.txn.ErrorTxn; import org.apache.zookeeper_voltpatches.txn.SetACLTxn; import org.apache.zookeeper_voltpatches.txn.SetDataTxn; import org.apache.zookeeper_voltpatches.txn.TxnHeader; import org.voltcore.logging.VoltLogger; /** * This class maintains the tree data structure. It doesn't have any networking * or client connection code in it so that it can be tested in a stand alone * way. * <p> * The tree maintains two parallel data structures: a hashtable that maps from * full paths to DataNodes and a tree of DataNodes. All accesses to a path is * through the hashtable. The tree is traversed only when serializing to disk. */ public class DataTree { private static final VoltLogger LOG = new VoltLogger(DataTree.class.getSimpleName()); /** * This hashtable provides a fast lookup to the datanodes. The tree is the * source of truth and is where all the locking occurs */ private final ConcurrentHashMap<String, DataNode> nodes = new ConcurrentHashMap<String, DataNode>(); private final WatchManager dataWatches = new WatchManager(); private final WatchManager childWatches = new WatchManager(); /** the root of zookeeper tree */ private static final String rootZookeeper = "/"; /** the zookeeper nodes that acts as the management and status node **/ private static final String procZookeeper = Quotas.procZookeeper; /** this will be the string thats stored as a child of root */ private static final String procChildZookeeper = procZookeeper.substring(1); /** * the zookeeper quota node that acts as the quota management node for * zookeeper */ private static final String quotaZookeeper = Quotas.quotaZookeeper; /** this will be the string thats stored as a child of /zookeeper */ private static final String quotaChildZookeeper = quotaZookeeper .substring(procZookeeper.length() + 1); /** * the path trie that keeps track fo the quota nodes in this datatree */ private final PathTrie pTrie = new PathTrie(); /** * This hashtable lists the paths of the ephemeral nodes of a session. */ private final Map<Long, HashSet<String>> ephemerals = new ConcurrentHashMap<Long, HashSet<String>>(); /** * this is map from longs to acl's. It saves acl's being stored for each * datanode. */ public final Map<Long, List<ACL>> longKeyMap = new HashMap<Long, List<ACL>>(); /** * this a map from acls to long. */ public final Map<List<ACL>, Long> aclKeyMap = new HashMap<List<ACL>, Long>(); /** * these are the number of acls that we have in the datatree */ protected long aclIndex = 0; @SuppressWarnings("unchecked") public HashSet<String> getEphemerals(long sessionId) { HashSet<String> retv = ephemerals.get(sessionId); if (retv == null) { return new HashSet<String>(); } HashSet<String> cloned = null; synchronized (retv) { cloned = (HashSet<String>) retv.clone(); } return cloned; } public Map<Long, HashSet<String>> getEphemeralsMap() { return ephemerals; } private long incrementIndex() { return ++aclIndex; } /** * compare two list of acls. if there elements are in the same order and the * same size then return true else return false * * @param lista * the list to be compared * @param listb * the list to be compared * @return true if and only if the lists are of the same size and the * elements are in the same order in lista and listb */ private boolean listACLEquals(List<ACL> lista, List<ACL> listb) { if (lista.size() != listb.size()) { return false; } for (int i = 0; i < lista.size(); i++) { ACL a = lista.get(i); ACL b = listb.get(i); if (!a.equals(b)) { return false; } } return true; } /** * converts the list of acls to a list of longs. * * @param acls * @return a list of longs that map to the acls */ public synchronized Long convertAcls(List<ACL> acls) { if (acls == null) return -1L; // get the value from the map Long ret = aclKeyMap.get(acls); // could not find the map if (ret != null) return ret; long val = incrementIndex(); longKeyMap.put(val, acls); aclKeyMap.put(acls, val); return val; } /** * converts a list of longs to a list of acls. * * @param longs * the list of longs * @return a list of ACLs that map to longs */ public synchronized List<ACL> convertLong(Long longVal) { if (longVal == null) return null; if (longVal == -1L) return Ids.OPEN_ACL_UNSAFE; List<ACL> acls = longKeyMap.get(longVal); if (acls == null) { LOG.error("ERROR: ACL not available for long " + longVal); throw new RuntimeException("Failed to fetch acls for " + longVal); } return acls; } public Collection<Long> getSessions() { return ephemerals.keySet(); } /** * just an accessor method to allow raw creation of datatree's from a bunch * of datanodes * * @param path * the path of the datanode * @param node * the datanode corresponding to this path */ public void addDataNode(String path, DataNode node) { nodes.put(path, node); } public DataNode getNode(String path) { return nodes.get(path); } public int getNodeCount() { return nodes.size(); } public int getWatchCount() { return dataWatches.size() + childWatches.size(); } /** * Get the size of the nodes based on path and data length. * * @return size of the data */ public long approximateDataSize() { long result = 0; for (Map.Entry<String, DataNode> entry : nodes.entrySet()) { DataNode value = entry.getValue(); synchronized (value) { result += entry.getKey().length(); result += (value.data == null ? 0 : value.data.length); } } return result; } /** * This is a pointer to the root of the DataTree. It is the source of truth, * but we usually use the nodes hashmap to find nodes in the tree. */ private DataNode root = new DataNode(null, new byte[0], -1L, new StatPersisted()); /** * create a /zookeeper filesystem that is the proc filesystem of zookeeper */ private final DataNode procDataNode = new DataNode(root, new byte[0], -1L, new StatPersisted()); /** * create a /zookeeper/quota node for maintaining quota properties for * zookeeper */ private final DataNode quotaDataNode = new DataNode(procDataNode, new byte[0], -1L, new StatPersisted()); public DataTree() { /* Rather than fight it, let root have an alias */ nodes.put("", root); nodes.put(rootZookeeper, root); /** add the proc node and quota node */ root.addChild(procChildZookeeper); nodes.put(procZookeeper, procDataNode); procDataNode.addChild(quotaChildZookeeper); nodes.put(quotaZookeeper, quotaDataNode); } /** * is the path one of the special paths owned by zookeeper. * * @param path * the path to be checked * @return true if a special path. false if not. */ boolean isSpecialPath(String path) { if (rootZookeeper.equals(path) || procZookeeper.equals(path) || quotaZookeeper.equals(path)) { return true; } return false; } static public void copyStatPersisted(StatPersisted from, StatPersisted to) { to.setAversion(from.getAversion()); to.setCtime(from.getCtime()); to.setCversion(from.getCversion()); to.setCzxid(from.getCzxid()); to.setMtime(from.getMtime()); to.setMzxid(from.getMzxid()); to.setPzxid(from.getPzxid()); to.setVersion(from.getVersion()); to.setEphemeralOwner(from.getEphemeralOwner()); } static public void copyStat(Stat from, Stat to) { to.setAversion(from.getAversion()); to.setCtime(from.getCtime()); to.setCversion(from.getCversion()); to.setCzxid(from.getCzxid()); to.setMtime(from.getMtime()); to.setMzxid(from.getMzxid()); to.setPzxid(from.getPzxid()); to.setVersion(from.getVersion()); to.setEphemeralOwner(from.getEphemeralOwner()); to.setDataLength(from.getDataLength()); to.setNumChildren(from.getNumChildren()); } /** * update the count of this stat datanode * * @param lastPrefix * the path of the node that is quotaed. * @param diff * the diff to be added to the count */ public void updateCount(String lastPrefix, int diff) { String statNode = Quotas.statPath(lastPrefix); DataNode node = nodes.get(statNode); StatsTrack updatedStat = null; if (node == null) { // should not happen LOG.error("Missing count node for stat " + statNode); return; } synchronized (node) { updatedStat = new StatsTrack(new String(node.data)); updatedStat.setCount(updatedStat.getCount() + diff); node.data = updatedStat.toString().getBytes(); } // now check if the counts match the quota String quotaNode = Quotas.quotaPath(lastPrefix); node = nodes.get(quotaNode); StatsTrack thisStats = null; if (node == null) { // should not happen LOG.error("Missing count node for quota " + quotaNode); return; } synchronized (node) { thisStats = new StatsTrack(new String(node.data)); } if (thisStats.getCount() < updatedStat.getCount()) { LOG .warn("Quota exceeded: " + lastPrefix + " count=" + updatedStat.getCount() + " limit=" + thisStats.getCount()); } } /** * update the count of bytes of this stat datanode * * @param lastPrefix * the path of the node that is quotaed * @param diff * the diff to added to number of bytes * @throws IOException * if path is not found */ public void updateBytes(String lastPrefix, long diff) { String statNode = Quotas.statPath(lastPrefix); DataNode node = nodes.get(statNode); if (node == null) { // should never be null but just to make // findbugs happy LOG.error("Missing stat node for bytes " + statNode); return; } StatsTrack updatedStat = null; synchronized (node) { updatedStat = new StatsTrack(new String(node.data)); updatedStat.setBytes(updatedStat.getBytes() + diff); node.data = updatedStat.toString().getBytes(); } // now check if the bytes match the quota String quotaNode = Quotas.quotaPath(lastPrefix); node = nodes.get(quotaNode); if (node == null) { // should never be null but just to make // findbugs happy LOG.error("Missing quota node for bytes " + quotaNode); return; } StatsTrack thisStats = null; synchronized (node) { thisStats = new StatsTrack(new String(node.data)); } if (thisStats.getBytes() < updatedStat.getBytes()) { LOG .warn("Quota exceeded: " + lastPrefix + " bytes=" + updatedStat.getBytes() + " limit=" + thisStats.getBytes()); } } /** * @param path * @param data * @param acl * @param ephemeralOwner * the session id that owns this node. -1 indicates this is not * an ephemeral node. * @param zxid * @param time * @return the patch of the created node * @throws KeeperException */ public String createNode(String path, byte data[], List<ACL> acl, long ephemeralOwner, long zxid, long time) throws KeeperException.NoNodeException, KeeperException.NodeExistsException { int lastSlash = path.lastIndexOf('/'); String parentName = path.substring(0, lastSlash); String childName = path.substring(lastSlash + 1); StatPersisted stat = new StatPersisted(); stat.setCtime(time); stat.setMtime(time); stat.setCzxid(zxid); stat.setMzxid(zxid); stat.setPzxid(zxid); stat.setVersion(0); stat.setAversion(0); stat.setEphemeralOwner(ephemeralOwner); DataNode parent = nodes.get(parentName); if (parent == null) { throw new KeeperException.NoNodeException(); } synchronized (parent) { Set<String> children = parent.getChildren(); if (children != null) { if (children.contains(childName)) { throw new KeeperException.NodeExistsException(); } } int cver = parent.stat.getCversion(); cver++; parent.stat.setCversion(cver); parent.stat.setPzxid(zxid); Long longval = convertAcls(acl); DataNode child = new DataNode(parent, data, longval, stat); parent.addChild(childName); nodes.put(path, child); if (ephemeralOwner != 0) { HashSet<String> list = ephemerals.get(ephemeralOwner); if (list == null) { list = new HashSet<String>(); ephemerals.put(ephemeralOwner, list); } synchronized (list) { list.add(path); } } } // now check if its one of the zookeeper node child if (parentName.startsWith(quotaZookeeper)) { // now check if its the limit node if (Quotas.limitNode.equals(childName)) { // this is the limit node // get the parent and add it to the trie pTrie.addPath(parentName.substring(quotaZookeeper.length())); } if (Quotas.statNode.equals(childName)) { updateQuotaForPath(parentName .substring(quotaZookeeper.length())); } } // also check to update the quotas for this node String lastPrefix = pTrie.findMaxPrefix(path); if (!rootZookeeper.equals(lastPrefix) && !("".equals(lastPrefix))) { // ok we have some match and need to update updateCount(lastPrefix, 1); updateBytes(lastPrefix, data == null ? 0 : data.length); } dataWatches.triggerWatch(path, Event.EventType.NodeCreated); childWatches.triggerWatch(parentName.equals("") ? "/" : parentName, Event.EventType.NodeChildrenChanged); return path; } /** * remove the path from the datatree * * @param path * the path to of the node to be deleted * @param zxid * the current zxid * @throws KeeperException.NoNodeException */ public void deleteNode(String path, long zxid) throws KeeperException.NoNodeException { int lastSlash = path.lastIndexOf('/'); String parentName = path.substring(0, lastSlash); String childName = path.substring(lastSlash + 1); DataNode node = nodes.get(path); if (node == null) { throw new KeeperException.NoNodeException(); } nodes.remove(path); DataNode parent = nodes.get(parentName); if (parent == null) { throw new KeeperException.NoNodeException(); } synchronized (parent) { parent.removeChild(childName); parent.stat.setCversion(parent.stat.getCversion() + 1); parent.stat.setPzxid(zxid); long eowner = node.stat.getEphemeralOwner(); if (eowner != 0) { HashSet<String> nodes = ephemerals.get(eowner); if (nodes != null) { synchronized (nodes) { nodes.remove(path); } } } node.parent = null; } if (parentName.startsWith(procZookeeper)) { // delete the node in the trie. if (Quotas.limitNode.equals(childName)) { // we need to update the trie // as well pTrie.deletePath(parentName.substring(quotaZookeeper.length())); } } // also check to update the quotas for this node String lastPrefix = pTrie.findMaxPrefix(path); if (!rootZookeeper.equals(lastPrefix) && !("".equals(lastPrefix))) { // ok we have some match and need to update updateCount(lastPrefix, -1); int bytes = 0; synchronized (node) { bytes = (node.data == null ? 0 : -(node.data.length)); } updateBytes(lastPrefix, bytes); } if (LOG.isTraceEnabled()) { ZooTrace.logTraceMessage(LOG, ZooTrace.EVENT_DELIVERY_TRACE_MASK, "dataWatches.triggerWatch " + path); ZooTrace.logTraceMessage(LOG, ZooTrace.EVENT_DELIVERY_TRACE_MASK, "childWatches.triggerWatch " + parentName); } Set<Watcher> processed = dataWatches.triggerWatch(path, EventType.NodeDeleted); childWatches.triggerWatch(path, EventType.NodeDeleted, processed); childWatches.triggerWatch(parentName.equals("") ? "/" : parentName, EventType.NodeChildrenChanged); } public Stat setData(String path, byte data[], int version, long zxid, long time) throws KeeperException.NoNodeException { Stat s = new Stat(); DataNode n = nodes.get(path); if (n == null) { throw new KeeperException.NoNodeException(); } byte lastdata[] = null; synchronized (n) { lastdata = n.data; n.data = data; n.stat.setMtime(time); n.stat.setMzxid(zxid); n.stat.setVersion(version); n.copyStat(s); } // now update if the path is in a quota subtree. String lastPrefix = pTrie.findMaxPrefix(path); // do nothing for the root. // we are not keeping a quota on the zookeeper // root node for now. if (!rootZookeeper.equals(lastPrefix) && !("".equals(lastPrefix))) { this.updateBytes(lastPrefix, (data == null ? 0 : data.length) - (lastdata == null ? 0 : lastdata.length)); } dataWatches.triggerWatch(path, EventType.NodeDataChanged); return s; } public byte[] getData(String path, Stat stat, Watcher watcher) throws KeeperException.NoNodeException { DataNode n = nodes.get(path); if (n == null) { throw new KeeperException.NoNodeException(); } synchronized (n) { n.copyStat(stat); if (watcher != null) { dataWatches.addWatch(path, watcher); } return n.data; } } public Stat statNode(String path, Watcher watcher) throws KeeperException.NoNodeException { Stat stat = new Stat(); DataNode n = nodes.get(path); if (watcher != null) { dataWatches.addWatch(path, watcher); } if (n == null) { throw new KeeperException.NoNodeException(); } synchronized (n) { n.copyStat(stat); return stat; } } public List<String> getChildren(String path, Stat stat, Watcher watcher) throws KeeperException.NoNodeException { DataNode n = nodes.get(path); if (n == null) { throw new KeeperException.NoNodeException(); } synchronized (n) { if (stat != null) { n.copyStat(stat); } ArrayList<String> children; Set<String> childs = n.getChildren(); if (childs != null) { children = new ArrayList<String>(childs.size()); children.addAll(childs); } else { children = new ArrayList<String>(0); } if (watcher != null) { childWatches.addWatch(path, watcher); } return children; } } public Stat setACL(String path, List<ACL> acl, int version) throws KeeperException.NoNodeException { Stat stat = new Stat(); DataNode n = nodes.get(path); if (n == null) { throw new KeeperException.NoNodeException(); } synchronized (n) { n.stat.setAversion(version); n.acl = convertAcls(acl); n.copyStat(stat); return stat; } } @SuppressWarnings("unchecked") public List<ACL> getACL(String path, Stat stat) throws KeeperException.NoNodeException { DataNode n = nodes.get(path); if (n == null) { throw new KeeperException.NoNodeException(); } synchronized (n) { n.copyStat(stat); return new ArrayList<ACL>(convertLong(n.acl)); } } static public class ProcessTxnResult { public long clientId; public int cxid; public long zxid; public int err; public int type; public String path; public Stat stat; /** * Equality is defined as the clientId and the cxid being the same. This * allows us to use hash tables to track completion of transactions. * * @see java.lang.Object#equals(java.lang.Object) */ @Override public boolean equals(Object o) { if (o instanceof ProcessTxnResult) { ProcessTxnResult other = (ProcessTxnResult) o; return other.clientId == clientId && other.cxid == cxid; } return false; } /** * See equals() to find the rational for how this hashcode is generated. * * @see ProcessTxnResult#equals(Object) * @see java.lang.Object#hashCode() */ @Override public int hashCode() { return (int) ((clientId ^ cxid) % Integer.MAX_VALUE); } } public volatile long lastProcessedZxid = 0; public ProcessTxnResult processTxn(TxnHeader header, Record txn) { ProcessTxnResult rc = new ProcessTxnResult(); String debug = ""; try { rc.clientId = header.getClientId(); rc.cxid = header.getCxid(); rc.zxid = header.getZxid(); rc.type = header.getType(); rc.err = 0; if (rc.zxid > lastProcessedZxid) { lastProcessedZxid = rc.zxid; } switch (header.getType()) { case OpCode.create: CreateTxn createTxn = (CreateTxn) txn; debug = "Create transaction for " + createTxn.getPath(); createNode( createTxn.getPath(), createTxn.getData(), createTxn.getAcl(), createTxn.getEphemeral() ? header.getClientId() : 0, header.getZxid(), header.getTime()); rc.path = createTxn.getPath(); break; case OpCode.delete: DeleteTxn deleteTxn = (DeleteTxn) txn; debug = "Delete transaction for " + deleteTxn.getPath(); deleteNode(deleteTxn.getPath(), header.getZxid()); break; case OpCode.setData: SetDataTxn setDataTxn = (SetDataTxn) txn; debug = "Set data for transaction for " + setDataTxn.getPath(); rc.stat = setData(setDataTxn.getPath(), setDataTxn .getData(), setDataTxn.getVersion(), header .getZxid(), header.getTime()); break; case OpCode.setACL: SetACLTxn setACLTxn = (SetACLTxn) txn; debug = "Set ACL for transaction for " + setACLTxn.getPath(); rc.stat = setACL(setACLTxn.getPath(), setACLTxn.getAcl(), setACLTxn.getVersion()); break; case OpCode.closeSession: killSession(header.getClientId(), header.getZxid()); break; case OpCode.error: ErrorTxn errTxn = (ErrorTxn) txn; rc.err = errTxn.getErr(); break; } } catch (KeeperException e) { // These are expected errors since we take a lazy snapshot if (initialized || (e.code() != Code.NONODE && e.code() != Code.NODEEXISTS)) { LOG.warn("Failed:" + debug, e); } } return rc; } void killSession(long session, long zxid) { // the list is already removed from the ephemerals // so we do not have to worry about synchronizing on // the list. This is only called from FinalRequestProcessor // so there is no need for synchronization. The list is not // changed here. Only create and delete change the list which // are again called from FinalRequestProcessor in sequence. HashSet<String> list = ephemerals.remove(session); if (list != null) { for (String path : list) { try { deleteNode(path, zxid); if (LOG.isDebugEnabled()) { LOG .debug("Deleting ephemeral node " + path + " for session 0x" + Long.toHexString(session)); } } catch (NoNodeException e) { LOG.warn("Ignoring NoNodeException for path " + path + " while removing ephemeral for dead session 0x" + Long.toHexString(session)); } } } } /** * a encapsultaing class for return value */ private static class Counts { long bytes; int count; } /** * this method gets the count of nodes and the bytes under a subtree * * @param path * the path to be used * @param bytes * the long bytes * @param count * the int count */ private void getCounts(String path, Counts counts) { DataNode node = getNode(path); if (node == null) { return; } String[] children = null; int len = 0; synchronized (node) { Set<String> childs = node.getChildren(); if (childs != null) { children = childs.toArray(new String[childs.size()]); } len = (node.data == null ? 0 : node.data.length); } // add itself counts.count += 1; counts.bytes += len; if (children == null || children.length == 0) { return; } for (String child : children) { getCounts(path + "/" + child, counts); } } /** * update the quota for the given path * * @param path * the path to be used */ private void updateQuotaForPath(String path) { Counts c = new Counts(); getCounts(path, c); StatsTrack strack = new StatsTrack(); strack.setBytes(c.bytes); strack.setCount(c.count); String statPath = Quotas.quotaZookeeper + path + "/" + Quotas.statNode; DataNode node = getNode(statPath); // it should exist if (node == null) { LOG.warn("Missing quota stat node " + statPath); return; } synchronized (node) { node.data = strack.toString().getBytes(); } } /** * this method traverses the quota path and update the path trie and sets * * @param path */ private void traverseNode(String path) { DataNode node = getNode(path); String children[] = null; synchronized (node) { Set<String> childs = node.getChildren(); if (childs != null) { children = childs.toArray(new String[childs.size()]); } } if (children != null) { if (children.length == 0) { // this node does not have a child // is the leaf node // check if its the leaf node String endString = "/" + Quotas.limitNode; if (path.endsWith(endString)) { // ok this is the limit node // get the real node and update // the count and the bytes String realPath = path.substring(Quotas.quotaZookeeper .length(), path.indexOf(endString)); updateQuotaForPath(realPath); this.pTrie.addPath(realPath); } return; } for (String child : children) { traverseNode(path + "/" + child); } } } /** * this method sets up the path trie and sets up stats for quota nodes */ private void setupQuota() { String quotaPath = Quotas.quotaZookeeper; DataNode node = getNode(quotaPath); if (node == null) { return; } traverseNode(quotaPath); } /** * this method uses a stringbuilder to create a new path for children. This * is faster than string appends ( str1 + str2). * * @param oa * OutputArchive to write to. * @param path * a string builder. * @throws IOException * @throws InterruptedException */ void serializeNode(OutputArchive oa, StringBuilder path) throws IOException { String pathString = path.toString(); DataNode node = getNode(pathString); if (node == null) { return; } String children[] = null; synchronized (node) { scount++; oa.writeString(pathString, "path"); oa.writeRecord(node, "node"); Set<String> childs = node.getChildren(); if (childs != null) { children = childs.toArray(new String[childs.size()]); } } path.append('/'); int off = path.length(); if (children != null) { for (String child : children) { // since this is single buffer being resused // we need // to truncate the previous bytes of string. path.delete(off, Integer.MAX_VALUE); path.append(child); serializeNode(oa, path); } } } int scount; public boolean initialized = false; private void deserializeList(Map<Long, List<ACL>> longKeyMap, InputArchive ia) throws IOException { int i = ia.readInt("map"); while (i > 0) { Long val = ia.readLong("long"); if (aclIndex < val) { aclIndex = val; } List<ACL> aclList = new ArrayList<ACL>(); Index j = ia.startVector("acls"); while (!j.done()) { ACL acl = new ACL(); acl.deserialize(ia, "acl"); aclList.add(acl); j.incr(); } longKeyMap.put(val, aclList); aclKeyMap.put(aclList, val); i--; } } private synchronized void serializeList(Map<Long, List<ACL>> longKeyMap, OutputArchive oa) throws IOException { oa.writeInt(longKeyMap.size(), "map"); Set<Map.Entry<Long, List<ACL>>> set = longKeyMap.entrySet(); for (Map.Entry<Long, List<ACL>> val : set) { oa.writeLong(val.getKey(), "long"); List<ACL> aclList = val.getValue(); oa.startVector(aclList, "acls"); for (ACL acl : aclList) { acl.serialize(oa, "acl"); } oa.endVector(aclList, "acls"); } } public void serialize(OutputArchive oa, String tag) throws IOException { scount = 0; serializeList(longKeyMap, oa); serializeNode(oa, new StringBuilder("")); // / marks end of stream // we need to check if clear had been called in between the snapshot. if (root != null) { oa.writeString("/", "path"); } } public void deserialize(InputArchive ia, String tag) throws IOException { deserializeList(longKeyMap, ia); nodes.clear(); String path = ia.readString("path"); while (!path.equals("/")) { DataNode node = new DataNode(); ia.readRecord(node, "node"); nodes.put(path, node); int lastSlash = path.lastIndexOf('/'); if (lastSlash == -1) { root = node; } else { String parentPath = path.substring(0, lastSlash); node.parent = nodes.get(parentPath); if (node.parent == null) { throw new IOException("Invalid Datatree, unable to find " + "parent " + parentPath + " of path " + path); } node.parent.addChild(path.substring(lastSlash + 1)); long eowner = node.stat.getEphemeralOwner(); if (eowner != 0) { HashSet<String> list = ephemerals.get(eowner); if (list == null) { list = new HashSet<String>(); ephemerals.put(eowner, list); } list.add(path); } } path = ia.readString("path"); } nodes.put("/", root); // we are done with deserializing the // the datatree // update the quotas - create path trie // and also update the stat nodes setupQuota(); } /** * Summary of the watches on the datatree. * @param pwriter the output to write to */ public synchronized void dumpWatchesSummary(PrintWriter pwriter) { pwriter.print(dataWatches.toString()); } /** * Write a text dump of all the watches on the datatree. * Warning, this is expensive, use sparingly! * @param pwriter the output to write to */ public synchronized void dumpWatches(PrintWriter pwriter, boolean byPath) { dataWatches.dumpWatches(pwriter, byPath); } /** * Write a text dump of all the ephemerals in the datatree. * @param pwriter the output to write to */ public void dumpEphemerals(PrintWriter pwriter) { Set<Long> keys = ephemerals.keySet(); pwriter.println("Sessions with Ephemerals (" + keys.size() + "):"); for (long k : keys) { pwriter.print("0x" + Long.toHexString(k)); pwriter.println(":"); HashSet<String> tmp = ephemerals.get(k); synchronized (tmp) { for (String path : tmp) { pwriter.println("\t" + path); } } } } public void removeCnxn(Watcher watcher) { dataWatches.removeWatcher(watcher); childWatches.removeWatcher(watcher); } public void clear() { root = null; nodes.clear(); ephemerals.clear(); // dataWatches = null; // childWatches = null; } public void setWatches(long relativeZxid, List<String> dataWatches, List<String> existWatches, List<String> childWatches, Watcher watcher) { for (String path : dataWatches) { DataNode node = getNode(path); WatchedEvent e = null; if (node == null) { e = new WatchedEvent(EventType.NodeDeleted, KeeperState.SyncConnected, path); } else if (node.stat.getCzxid() > relativeZxid) { e = new WatchedEvent(EventType.NodeCreated, KeeperState.SyncConnected, path); } else if (node.stat.getMzxid() > relativeZxid) { e = new WatchedEvent(EventType.NodeDataChanged, KeeperState.SyncConnected, path); } if (e != null) { watcher.process(e); } else { this.dataWatches.addWatch(path, watcher); } } for (String path : existWatches) { DataNode node = getNode(path); WatchedEvent e = null; if (node == null) { // This is the case when the watch was registered } else if (node.stat.getMzxid() > relativeZxid) { e = new WatchedEvent(EventType.NodeDataChanged, KeeperState.SyncConnected, path); } else { e = new WatchedEvent(EventType.NodeCreated, KeeperState.SyncConnected, path); } if (e != null) { watcher.process(e); } else { this.dataWatches.addWatch(path, watcher); } } for (String path : childWatches) { DataNode node = getNode(path); WatchedEvent e = null; if (node == null) { e = new WatchedEvent(EventType.NodeDeleted, KeeperState.SyncConnected, path); } else if (node.stat.getPzxid() > relativeZxid) { e = new WatchedEvent(EventType.NodeChildrenChanged, KeeperState.SyncConnected, path); } if (e != null) { watcher.process(e); } else { this.childWatches.addWatch(path, watcher); } } } }