/* * Copyright (C) 2015 hops.io. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package io.hops.transaction.lock; import com.google.common.base.Joiner; import io.hops.common.INodeResolver; import io.hops.exception.StorageException; import io.hops.exception.TransactionContextException; import io.hops.leader_election.node.ActiveNode; import io.hops.metadata.hdfs.dal.INodeDataAccess; import io.hops.resolvingcache.Cache; import io.hops.resolvingcache.OptimalMemcache; import io.hops.resolvingcache.PathMemcache; import io.hops.security.Users; import io.hops.transaction.EntityManager; import org.apache.commons.math3.stat.StatUtils; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.protocol.UnresolvedPathException; import org.apache.hadoop.hdfs.server.namenode.INode; import org.apache.hadoop.hdfs.server.namenode.INodeDirectory; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.LinkedList; import java.util.List; class INodeLock extends BaseINodeLock { private final TransactionLockTypes.INodeLockType lockType; private final TransactionLockTypes.INodeResolveType resolveType; private final boolean resolveLink; protected final String[] paths; private final Collection<ActiveNode> activeNamenodes; private final boolean ignoreLocalSubtreeLocks; private final long namenodeId; protected final boolean skipReadingQuotaAttr; INodeLock(TransactionLockTypes.INodeLockType lockType, TransactionLockTypes.INodeResolveType resolveType, boolean resolveLink, boolean ignoreLocalSubtreeLocks, boolean skipReadingQuotaAttr, long namenodeId, Collection<ActiveNode> activeNamenodes, String... paths) { super(); this.lockType = lockType; this.resolveType = resolveType; this.resolveLink = resolveLink; this.activeNamenodes = activeNamenodes; this.ignoreLocalSubtreeLocks = ignoreLocalSubtreeLocks; this.namenodeId = namenodeId; this.paths = paths; this.skipReadingQuotaAttr = skipReadingQuotaAttr; } INodeLock(boolean skipReadingQuotaAttr, TransactionLockTypes.INodeLockType lockType, TransactionLockTypes.INodeResolveType resolveType, boolean resolveLink, Collection<ActiveNode> activeNamenodes, String... paths) { this(lockType, resolveType, resolveLink, false, skipReadingQuotaAttr, -1, activeNamenodes, paths); } INodeLock(TransactionLockTypes.INodeLockType lockType, TransactionLockTypes.INodeResolveType resolveType, Collection<ActiveNode> activeNamenodes, String... paths) { this(lockType, resolveType, true, false, false, -1, activeNamenodes, paths); } private CacheResolver instance = null; private CacheResolver getCacheResolver(){ if(instance == null){ if(Cache.getInstance() instanceof OptimalMemcache){ instance = new OptimalPathResolver(); } else if(Cache.getInstance() instanceof PathMemcache){ instance = new FullPathResolver(); }else { instance = new PartialPathResolver(); } } return instance; } private abstract class CacheResolver { abstract List<INode> fetchINodes(String path) throws IOException; protected int verifyINodesFull(final List<INode> inodes, final String[] names, final int[] parentIds, final int[] inodeIds) throws IOException { int index = -1; if (names.length == parentIds.length) { if (inodes.size() == names.length) { index = verifyINodesPartial(inodes, names, parentIds, inodeIds); } } return index; } protected int verifyINodesPartial(final List<INode> inodes, final String[] names, final int[] parentIds, final int[] inodeIds) throws IOException { int index = (int)StatUtils.min(new double[]{inodes.size(), inodeIds .length, parentIds.length, names.length}); for (int i = 0; i < index; i++) { INode inode = inodes.get(i); boolean noChangeInInodes = inode != null && inode.getLocalName().equals(names[i]) && inode.getParentId() == parentIds[i] && inode.getId() == inodeIds[i]; if (!noChangeInInodes) { index = i; break; } } return index; } protected int[] getParentIds(int[] inodeIds) { return getParentIds(inodeIds, false); } protected int[] getParentIds(int[] inodeIds, boolean partial) { int[] parentIds = new int[partial ? inodeIds.length + 1 : inodeIds.length]; parentIds[0] = INodeDirectory.ROOT_PARENT_ID; System.arraycopy(inodeIds, 0, parentIds, 1, (partial ? inodeIds.length : inodeIds.length - 1)); return parentIds; } protected void setPartitionKey(int[] inodeIds, int parentIds[], int partitionIds[], boolean partial) throws TransactionContextException, StorageException { Integer partId = null; if(partial){ if (setRandomParitionKeyEnabled && partId == null) { LOG.debug("Setting Random PartitionKey"); partId = Math.abs(rand.nextInt()); } }else{ partId = inodeIds[inodeIds.length - 1]; } setPartitioningKey(partId); } } private class FullPathResolver extends CacheResolver { @Override List<INode> fetchINodes(String path) throws IOException { int[] inodeIds = Cache.getInstance().get(path); if (inodeIds != null) { final String[] names = INode.getPathNames(path); final boolean partial = names.length > inodeIds.length; final int[] parentIds = getParentIds(inodeIds); final int[] partitionIds = new int[parentIds.length]; short depth = INodeDirectory.ROOT_DIR_DEPTH; partitionIds[0] = INodeDirectory.getRootDirPartitionKey(); for(int i = 1; i < partitionIds.length; i++){ depth++; partitionIds[i] = INode.calculatePartitionId(parentIds[i], names[i], depth); } setPartitionKey(inodeIds,parentIds,partitionIds,partial); List<INode> inodes = readINodesWhileRespectingLocks(path,names, parentIds,partitionIds); if (inodes != null) { if (verifyINodes(inodes, names, parentIds, inodeIds)) { addPathINodes(path, inodes); return inodes; } else { Cache.getInstance().delete(path); } } } return null; } private boolean verifyINodes(List<INode> inodes, String[] names, int[] parentIds, int[] inodeIds) throws IOException { return verifyINodesFull(inodes, names, parentIds, inodeIds) == inodes .size(); } protected List<INode> readINodesWhileRespectingLocks(final String path, final String[] names, final int[] parentIds, final int[] partitionIds) throws TransactionContextException, StorageException, UnresolvedPathException { int rowsToReadWithDefaultLock = names.length; if (!lockType.equals(DEFAULT_INODE_LOCK_TYPE)) { if (lockType.equals( TransactionLockTypes.INodeLockType.WRITE_ON_TARGET_AND_PARENT)) { rowsToReadWithDefaultLock -= 2; } else { rowsToReadWithDefaultLock -= 1; } } rowsToReadWithDefaultLock = Math.min(rowsToReadWithDefaultLock, parentIds.length); List<INode> inodes = null; if (rowsToReadWithDefaultLock > 0) { inodes = find(DEFAULT_INODE_LOCK_TYPE, Arrays.copyOf(names, rowsToReadWithDefaultLock), Arrays.copyOf(parentIds, rowsToReadWithDefaultLock), Arrays.copyOf(partitionIds, rowsToReadWithDefaultLock), true); } if(inodes != null) { for (INode inode : inodes) { addLockedINodes(inode, DEFAULT_INODE_LOCK_TYPE); } } if(rowsToReadWithDefaultLock == names.length){ return inodes; } boolean partialPath = parentIds.length < names.length; if (inodes != null && !partialPath) { resolveRestOfThePath(path, inodes); } return inodes; } protected void resolveRestOfThePath(String path, List<INode> inodes) throws StorageException, TransactionContextException, UnresolvedPathException { byte[][] components = INode.getPathComponents(path); INode currentINode = inodes.get(inodes.size() - 1); INodeResolver resolver = new INodeResolver(components, currentINode, resolveLink, true, inodes.size() - 1); while (resolver.hasNext()) { TransactionLockTypes.INodeLockType currentINodeLock = identifyLockType(resolver.getCount() + 1, components); setINodeLockType(currentINodeLock); currentINode = resolver.next(); if (currentINode != null) { addLockedINodes(currentINode, currentINodeLock); inodes.add(currentINode); } } } } private class PartialPathResolver extends FullPathResolver { @Override List<INode> fetchINodes(String path) throws IOException { int[] inodeIds = Cache.getInstance().get(path); if (inodeIds != null) { final String[] names = INode.getPathNames(path); final boolean partial = names.length > inodeIds.length; final int[] parentIds = getParentIds(inodeIds, partial); final int[] partitionIds = new int[parentIds.length]; short depth = INodeDirectory.ROOT_DIR_DEPTH; partitionIds[0] = INodeDirectory.getRootDirPartitionKey(); for(int i = 1; i < partitionIds.length;i++){ depth++; partitionIds[i] = INode.calculatePartitionId(parentIds[i], names[i], depth); } setPartitionKey(inodeIds, parentIds, partitionIds, partial); List<INode> inodes = readINodesWhileRespectingLocks(path, names, parentIds, partitionIds); if (inodes != null && !inodes.isEmpty()) { final int unverifiedInode = verifyINodesPartial(inodes, names, parentIds, inodeIds); int diff = inodes.size() - unverifiedInode; while (diff > 0){ INode node = inodes.remove(inodes.size() - 1); Cache.getInstance().delete(node); diff--; } if(unverifiedInode <= 1) return null; tryResolvingTheRest(path, inodes, inodes.size() - unverifiedInode); return inodes; } } return null; } protected void tryResolvingTheRest(String path, List<INode> inodes, int diff) throws TransactionContextException, UnresolvedPathException, StorageException { int offset = inodes.size(); resolveRestOfThePath(path, inodes); addPathINodesWithOffset(path, inodes, offset); } private void addPathINodesWithOffset(String path, List<INode> inodes, int offset){ addPathINodes(path, inodes); if(offset == 0){ updateResolvingCache(path, inodes); }else { if(offset == inodes.size()){ return; } List<INode> newInodes = inodes.subList(offset, inodes.size()); String[] newPath = Arrays.copyOfRange(INode.getPathNames(path), offset, inodes.size()); updateResolvingCache( Joiner.on(Path.SEPARATOR_CHAR).join(newPath), newInodes); } } } private class OptimalPathResolver extends PartialPathResolver{ @Override protected void tryResolvingTheRest(String path, List<INode> inodes, int diff) throws TransactionContextException, UnresolvedPathException, StorageException { resolveRestOfThePath(path, inodes); addPathINodes(path, inodes); if(diff > 1){ Cache.getInstance().delete(path); updateResolvingCache(path, inodes); }else{ updateResolvingCache(inodes.get(inodes.size() - 1)); } } } @Override protected void acquire(TransactionLocks locks) throws IOException { /* * Needs to be sorted in order to avoid deadlocks. Otherwise one transaction * could acquire path0 and path1 in the given order while another one does * it in the opposite order, more precisely path1, path0, what could cause * a dealock situation. */ Arrays.sort(paths); acquireINodeLocks(); if(!skipReadingQuotaAttr){ acquireINodeAttributes(); } } protected void acquireINodeLocks() throws IOException { if (!resolveType.equals(TransactionLockTypes.INodeResolveType.PATH) && !resolveType.equals( TransactionLockTypes.INodeResolveType.PATH_AND_IMMEDIATE_CHILDREN) && !resolveType.equals( TransactionLockTypes.INodeResolveType.PATH_AND_ALL_CHILDREN_RECURSIVELY)) { throw new IllegalArgumentException("Unknown type " + resolveType.name()); } for (int i = 0; i < paths.length; i++) { String path = paths[i]; List<INode> resolvedINodes = resolveUsingMemcache(path); if (resolvedINodes == null) { // path not found in the cache // set random partition key if enabled if(setRandomParitionKeyEnabled){ setPartitioningKey(rand.nextInt()); } resolvedINodes = acquireINodeLockByPath(path); addPathINodesAndUpdateResolvingCache(path, resolvedINodes); } if (resolvedINodes.size() > 0) { INode lastINode = resolvedINodes.get(resolvedINodes.size() - 1); if (resolveType == TransactionLockTypes.INodeResolveType.PATH_AND_IMMEDIATE_CHILDREN) { List<INode> children = findImmediateChildren(lastINode); addChildINodes(path, children); } else if (resolveType == TransactionLockTypes.INodeResolveType.PATH_AND_ALL_CHILDREN_RECURSIVELY) { List<INode> children = findChildrenRecursively(lastINode); addChildINodes(path, children); } } } } private List<INode> resolveUsingMemcache(String path) throws IOException { CacheResolver memcacheResolver = getCacheResolver(); if(memcacheResolver == null) return null; List<INode> resolvedINodes = memcacheResolver.fetchINodes(path); if (resolvedINodes != null) { for (INode iNode : resolvedINodes) { checkSubtreeLock(iNode); } handleLockUpgrade(resolvedINodes, INode.getPathComponents(path), path); } return resolvedINodes; } private List<INode> acquireINodeLockByPath(String path) throws UnresolvedPathException, StorageException, SubtreeLockedException, TransactionContextException { List<INode> resolvedINodes = new ArrayList<INode>(); byte[][] components = INode.getPathComponents(path); INode currentINode; if (isRootTarget(components)) { resolvedINodes.add(acquireLockOnRoot(lockType)); return resolvedINodes; } else if (isRootParent(components) && TransactionLockTypes.impliesParentWriteLock(this.lockType)) { currentINode = acquireLockOnRoot(lockType); } else { currentINode = acquireLockOnRoot(DEFAULT_INODE_LOCK_TYPE); } resolvedINodes.add(currentINode); INodeResolver resolver = new INodeResolver(components, currentINode, resolveLink, true); while (resolver.hasNext()) { TransactionLockTypes.INodeLockType currentINodeLock = identifyLockType(resolver.getCount() + 1, components); setINodeLockType(currentINodeLock); currentINode = resolver.next(); if (currentINode != null) { addLockedINodes(currentINode, currentINodeLock); checkSubtreeLock(currentINode); resolvedINodes.add(currentINode); } } handleLockUpgrade(resolvedINodes, components, path); return resolvedINodes; } private boolean isRootTarget(byte[][] components) { return isTarget(0, components); } private boolean isRootParent(byte[][] components) { return isParent(0, components); } private TransactionLockTypes.INodeLockType identifyLockType(int count, byte[][] components) throws StorageException { TransactionLockTypes.INodeLockType lkType; if (isTarget(count, components)) { lkType = this.lockType; } else if (isParent(count, components) && TransactionLockTypes.impliesParentWriteLock(this.lockType)) { lkType = TransactionLockTypes.INodeLockType.WRITE; } else { lkType = DEFAULT_INODE_LOCK_TYPE; } return lkType; } private boolean isTarget(int count, byte[][] components) { return count == components.length - 1; } private boolean isParent(int count, byte[][] components) { return count == components.length - 2; } private void checkSubtreeLock(INode iNode) throws SubtreeLockedException { if (SubtreeLockHelper .isSubtreeLocked(iNode.isSubtreeLocked(), iNode.getSubtreeLockOwner(), activeNamenodes)) { if (!ignoreLocalSubtreeLocks // && namenodeId != iNode.getSubtreeLockOwner() ) { throw new SubtreeLockedException(iNode.getLocalName(), activeNamenodes); } } } private void handleLockUpgrade(List<INode> resolvedINodes, byte[][] components, String path) throws StorageException, UnresolvedPathException, TransactionContextException { // TODO Handle the case that predecessing nodes get deleted before locking // lock upgrade if the path was not fully resolved if (resolvedINodes.size() != components.length) { // path was not fully resolved INode inodeToReread = null; if (lockType == TransactionLockTypes.INodeLockType.WRITE_ON_TARGET_AND_PARENT) { if (resolvedINodes.size() <= components.length - 2) { inodeToReread = resolvedINodes.get(resolvedINodes.size() - 1); } } else if (lockType == TransactionLockTypes.INodeLockType.WRITE) { inodeToReread = resolvedINodes.get(resolvedINodes.size() - 1); } if (inodeToReread != null) { int partitionIdOfINodeToBeReRead = INode.calculatePartitionId(inodeToReread.getParentId(), inodeToReread .getLocalName(), inodeToReread.myDepth()); INode inode = find(lockType, inodeToReread.getLocalName(), inodeToReread.getParentId(), partitionIdOfINodeToBeReRead); if (inode != null) { // re-read after taking write lock to make sure that no one has created the same inode. addLockedINodes(inode, lockType); String existingPath = buildPath(path, resolvedINodes.size()); List<INode> rest = acquireLockOnRestOfPath(lockType, inode, path, existingPath, false); resolvedINodes.addAll(rest); } } } } private List<INode> acquireLockOnRestOfPath( TransactionLockTypes.INodeLockType lock, INode baseInode, String fullPath, String prefix, boolean resolveLink) throws StorageException, UnresolvedPathException, TransactionContextException { List<INode> resolved = new ArrayList<INode>(); byte[][] fullComps = INode.getPathComponents(fullPath); byte[][] prefixComps = INode.getPathComponents(prefix); INodeResolver resolver = new INodeResolver(fullComps, baseInode, resolveLink, true, prefixComps.length - 1); while (resolver.hasNext()) { setINodeLockType(lock); INode current = resolver.next(); if (current != null) { addLockedINodes(current, lock); resolved.add(current); } } return resolved; } private List<INode> findImmediateChildren(INode lastINode) throws StorageException, TransactionContextException { List<INode> children = new ArrayList<INode>(); if (lastINode != null) { if (lastINode instanceof INodeDirectory) { setINodeLockType(TransactionLockTypes.INodeLockType.READ_COMMITTED); //if the parent is locked then taking lock on all children is not necessary children.addAll(((INodeDirectory) lastINode).getChildren()); } } return children; } private List<INode> findChildrenRecursively(INode lastINode) throws StorageException, TransactionContextException { LinkedList<INode> children = new LinkedList<INode>(); LinkedList<INode> unCheckedDirs = new LinkedList<INode>(); if (lastINode != null) { if (lastINode instanceof INodeDirectory) { unCheckedDirs.add(lastINode); } } // Find all the children in the sub-directories. while (!unCheckedDirs.isEmpty()) { INode next = unCheckedDirs.poll(); if (next instanceof INodeDirectory) { setINodeLockType(TransactionLockTypes.INodeLockType.READ_COMMITTED); //locking the parent is sufficient List<INode> clist = ((INodeDirectory) next).getChildren(); unCheckedDirs.addAll(clist); children.addAll(clist); } } LOG.debug("Added " + children.size() + " children."); return children; } private INode acquireLockOnRoot(TransactionLockTypes.INodeLockType lock) throws StorageException, TransactionContextException { LOG.debug("Acquiring " + lock + " on the root node"); return find(lock, INodeDirectory.ROOT_NAME, INodeDirectory.ROOT_PARENT_ID, INodeDirectory.getRootDirPartitionKey()); } private String buildPath(String path, int size) { StringBuilder builder = new StringBuilder(); byte[][] components = INode.getPathComponents(path); for (int i = 0; i < Math.min(components.length, size); i++) { if (i == 0) { builder.append("/"); } else { if (i != 1) { builder.append("/"); } builder.append(DFSUtil.bytes2String(components[i])); } } return builder.toString(); } protected INode find(String name, int parentId, int partitionId) throws StorageException, TransactionContextException { return find(lockType, name, parentId, partitionId); } }