/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.dfs; import java.io.FileNotFoundException; import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.Collections; import java.util.Arrays; import java.util.List; import java.io.IOException; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.permission.*; import org.apache.hadoop.dfs.BlocksMap.BlockInfo; /** * We keep an in-memory representation of the file/block hierarchy. * This is a base INode class containing common fields for file and * directory inodes. */ abstract class INode implements Comparable<byte[]> { protected byte[] name; protected INodeDirectory parent; protected long modificationTime; //Only updated by updatePermissionStatus(...). //Other codes should not modify it. private long permission; private static enum PermissionStatusFormat { MODE(0, 16), GROUP(MODE.OFFSET + MODE.LENGTH, 25), USER(GROUP.OFFSET + GROUP.LENGTH, 23); final int OFFSET; final int LENGTH; //bit length final long MASK; PermissionStatusFormat(int offset, int length) { OFFSET = offset; LENGTH = length; MASK = ((-1L) >>> (64 - LENGTH)) << OFFSET; } long retrieve(long record) { return (record & MASK) >>> OFFSET; } long combine(long bits, long record) { return (record & ~MASK) | (bits << OFFSET); } } protected INode() { name = null; parent = null; modificationTime = 0; } INode(PermissionStatus permissions, long mTime) { this.name = null; this.parent = null; this.modificationTime = mTime; setPermissionStatus(permissions); } protected INode(String name, PermissionStatus permissions) { this(permissions, 0L); setLocalName(name); } /** copy constructor * * @param other Other node to be copied */ INode(INode other) { setLocalName(other.getLocalName()); this.parent = other.getParent(); setPermissionStatus(other.getPermissionStatus()); setModificationTime(other.getModificationTime()); } /** * Check whether this is the root inode. */ boolean isRoot() { return name.length == 0; } /** Set the {@link PermissionStatus} */ protected void setPermissionStatus(PermissionStatus ps) { setUser(ps.getUserName()); setGroup(ps.getGroupName()); setPermission(ps.getPermission()); } /** Get the {@link PermissionStatus} */ protected PermissionStatus getPermissionStatus() { return new PermissionStatus(getUserName(),getGroupName(),getFsPermission()); } private synchronized void updatePermissionStatus( PermissionStatusFormat f, long n) { permission = f.combine(n, permission); } /** Get user name */ protected String getUserName() { int n = (int)PermissionStatusFormat.USER.retrieve(permission); return SerialNumberManager.INSTANCE.getUser(n); } /** Set user */ protected void setUser(String user) { int n = SerialNumberManager.INSTANCE.getUserSerialNumber(user); updatePermissionStatus(PermissionStatusFormat.USER, n); } /** Get group name */ protected String getGroupName() { int n = (int)PermissionStatusFormat.GROUP.retrieve(permission); return SerialNumberManager.INSTANCE.getGroup(n); } /** Set group */ protected void setGroup(String group) { int n = SerialNumberManager.INSTANCE.getGroupSerialNumber(group); updatePermissionStatus(PermissionStatusFormat.GROUP, n); } /** Get the {@link FsPermission} */ protected FsPermission getFsPermission() { return new FsPermission( (short)PermissionStatusFormat.MODE.retrieve(permission)); } protected short getFsPermissionShort() { return (short)PermissionStatusFormat.MODE.retrieve(permission); } /** Set the {@link FsPermission} of this {@link INode} */ protected void setPermission(FsPermission permission) { updatePermissionStatus(PermissionStatusFormat.MODE, permission.toShort()); } /** * Check whether it's a directory */ abstract boolean isDirectory(); /** * Collect all the blocks in all children of this INode. * Count and return the number of files in the sub tree. * Also clears references since this INode is deleted. */ abstract int collectSubtreeBlocksAndClear(List<Block> v); /** Compute {@link ContentSummary}. */ final ContentSummary computeContentSummary() { long[] a = computeContentSummary(new long[]{0,0,0}); return new ContentSummary(a[0], a[1], a[2], getQuota()); } /** * @return an array of three longs. * 0: length, 1: file count, 2: directory count */ abstract long[] computeContentSummary(long[] summary); /** * Get the quota set for this inode * @return the quota if it is set; -1 otherwise */ long getQuota() { return -1; } /** * Get the total number of names in the tree * rooted at this inode including the root * @return The total number of names in this tree */ long numItemsInTree() { return 1; } /** * Get local file name * @return local file name */ String getLocalName() { return bytes2String(name); } /** * Get local file name * @return local file name */ byte[] getLocalNameBytes() { return name; } /** * Set local file name */ void setLocalName(String name) { this.name = string2Bytes(name); } /** * Set local file name */ void setLocalName(byte[] name) { this.name = name; } /** {@inheritDoc} */ public String toString() { return "\"" + getLocalName() + "\":" + getPermissionStatus(); } /** * Get parent directory * @return parent INode */ INodeDirectory getParent() { return this.parent; } /** * Get last modification time of inode. * @return access time */ long getModificationTime() { return this.modificationTime; } /** * Set last modification time of inode. */ void setModificationTime(long modtime) { assert isDirectory(); if (this.modificationTime <= modtime) { this.modificationTime = modtime; } } /** * Is this inode being constructed? */ boolean isUnderConstruction() { return false; } /** * Breaks file path into components. * @param path * @return array of byte arrays each of which represents * a single path component. */ static byte[][] getPathComponents(String path) { return getPathComponents(getPathNames(path)); } /** Convert strings to byte arrays for path components. */ static byte[][] getPathComponents(String[] strings) { if (strings.length == 0) { return new byte[][]{null}; } byte[][] bytes = new byte[strings.length][]; for (int i = 0; i < strings.length; i++) bytes[i] = string2Bytes(strings[i]); return bytes; } /** * Breaks file path into names. * @param path * @return array of names */ static String[] getPathNames(String path) { if (path == null || !path.startsWith(Path.SEPARATOR)) { return null; } return path.split(Path.SEPARATOR); } boolean removeNode() { if (parent == null) { return false; } else { parent.removeChild(this); parent = null; return true; } } // // Comparable interface // public int compareTo(byte[] o) { return compareBytes(name, o); } public boolean equals(Object o) { if (!(o instanceof INode)) { return false; } return Arrays.equals(this.name, ((INode)o).name); } public int hashCode() { return Arrays.hashCode(this.name); } // // static methods // /** * Compare two byte arrays. * * @return a negative integer, zero, or a positive integer * as defined by {@link #compareTo(byte[])}. */ static int compareBytes(byte[] a1, byte[] a2) { if (a1==a2) return 0; int len1 = (a1==null ? 0 : a1.length); int len2 = (a2==null ? 0 : a2.length); int n = Math.min(len1, len2); byte b1, b2; for (int i=0; i<n; i++) { b1 = a1[i]; b2 = a2[i]; if (b1 != b2) return b1 - b2; } return len1 - len2; } /** * Converts a byte array to a string using UTF8 encoding. */ static String bytes2String(byte[] bytes) { try { return new String(bytes, "UTF8"); } catch(UnsupportedEncodingException e) { assert false : "UTF8 encoding is not supported "; } return null; } /** * Converts a string to a byte array using UTF8 encoding. */ static byte[] string2Bytes(String str) { try { return str.getBytes("UTF8"); } catch(UnsupportedEncodingException e) { assert false : "UTF8 encoding is not supported "; } return null; } } /** * Directory INode class. */ class INodeDirectory extends INode { protected static final int DEFAULT_FILES_PER_DIRECTORY = 5; final static String ROOT_NAME = ""; private List<INode> children; INodeDirectory(String name, PermissionStatus permissions) { super(name, permissions); this.children = null; } INodeDirectory(PermissionStatus permissions, long mTime) { super(permissions, mTime); this.children = null; } /** constructor */ INodeDirectory(byte[] localName, PermissionStatus permissions, long mTime) { this(permissions, mTime); this.name = localName; } /** copy constructor * * @param other */ INodeDirectory(INodeDirectory other) { super(other); this.children = other.getChildren(); } /** * Check whether it's a directory */ boolean isDirectory() { return true; } INode removeChild(INode node) { assert children != null; int low = Collections.binarySearch(children, node.name); if (low >= 0) { return children.remove(low); } else { return null; } } /** Replace a child that has the same name as newChild by newChild. * * @param newChild Child node to be added */ void replaceChild(INode newChild) { if ( children == null ) { throw new IllegalArgumentException("The directory is empty"); } int low = Collections.binarySearch(children, newChild.name); if (low>=0) { // an old child exists so replace by the newChild children.set(low, newChild); } else { throw new IllegalArgumentException("No child exists to be replaced"); } } INode getChild(String name) { return getChildINode(string2Bytes(name)); } private INode getChildINode(byte[] name) { if (children == null) { return null; } int low = Collections.binarySearch(children, name); if (low >= 0) { return children.get(low); } return null; } /** */ private INode getNode(byte[][] components) { INode[] inode = new INode[1]; getExistingPathINodes(components, inode); return inode[0]; } /** * This is the external interface */ INode getNode(String path) { return getNode(getPathComponents(path)); } /** * Retrieve existing INodes from a path. If existing is big enough to store * all path components (existing and non-existing), then existing INodes * will be stored starting from the root INode into existing[0]; if * existing is not big enough to store all path components, then only the * last existing and non existing INodes will be stored so that * existing[existing.length-1] refers to the target INode. * * <p> * Example: <br> * Given the path /c1/c2/c3 where only /c1/c2 exists, resulting in the * following path components: ["","c1","c2","c3"], * * <p> * <code>getExistingPathINodes(["","c1","c2"], [?])</code> should fill the * array with [c2] <br> * <code>getExistingPathINodes(["","c1","c2","c3"], [?])</code> should fill the * array with [null] * * <p> * <code>getExistingPathINodes(["","c1","c2"], [?,?])</code> should fill the * array with [c1,c2] <br> * <code>getExistingPathINodes(["","c1","c2","c3"], [?,?])</code> should fill * the array with [c2,null] * * <p> * <code>getExistingPathINodes(["","c1","c2"], [?,?,?,?])</code> should fill * the array with [rootINode,c1,c2,null], <br> * <code>getExistingPathINodes(["","c1","c2","c3"], [?,?,?,?])</code> should * fill the array with [rootINode,c1,c2,null] * @param components array of path component name * @param existing INode array to fill with existing INodes * @return number of existing INodes in the path */ int getExistingPathINodes(byte[][] components, INode[] existing) { assert compareBytes(this.name, components[0]) == 0 : "Incorrect name " + getLocalName() + " expected " + components[0]; INode curNode = this; int count = 0; int index = existing.length - components.length; if (index > 0) index = 0; while ((count < components.length) && (curNode != null)) { if (index >= 0) existing[index] = curNode; if (!curNode.isDirectory() || (count == components.length - 1)) break; // no more child, stop here INodeDirectory parentDir = (INodeDirectory)curNode; curNode = parentDir.getChildINode(components[count + 1]); count += 1; index += 1; } return count; } /** * Retrieve the existing INodes along the given path. The first INode * always exist and is this INode. * * @param path the path to explore * @return INodes array containing the existing INodes in the order they * appear when following the path from the root INode to the * deepest INodes. The array size will be the number of expected * components in the path, and non existing components will be * filled with null */ INode[] getExistingPathINodes(String path) { byte[][] components = getPathComponents(path); INode[] inodes = new INode[components.length]; this.getExistingPathINodes(components, inodes); return inodes; } /** * Add a child inode to the directory. * * @param node INode to insert * @param inheritPermission inherit permission from parent? * @return null if the child with this name already exists; * inserted INode, otherwise */ <T extends INode> T addChild(final T node, boolean inheritPermission) { if (inheritPermission) { FsPermission p = getFsPermission(); //make sure the permission has wx for the user if (!p.getUserAction().implies(FsAction.WRITE_EXECUTE)) { p = new FsPermission(p.getUserAction().or(FsAction.WRITE_EXECUTE), p.getGroupAction(), p.getOtherAction()); } node.setPermission(p); } if (children == null) { children = new ArrayList<INode>(DEFAULT_FILES_PER_DIRECTORY); } int low = Collections.binarySearch(children, node.name); if(low >= 0) return null; node.parent = this; children.add(-low - 1, node); // update modification time of the parent directory setModificationTime(node.getModificationTime()); if (node.getGroupName() == null) { node.setGroup(getGroupName()); } return node; } /** * Equivalent to addNode(path, newNode, false). * @see #addNode(String, INode, boolean) */ <T extends INode> T addNode(String path, T newNode) throws FileNotFoundException { return addNode(path, newNode, false); } /** * Add new INode to the file tree. * Find the parent and insert * * @param path file path * @param newNode INode to be added * @param inheritPermission If true, copy the parent's permission to newNode. * @return null if the node already exists; inserted INode, otherwise * @throws FileNotFoundException if parent does not exist or * is not a directory. */ <T extends INode> T addNode(String path, T newNode, boolean inheritPermission ) throws FileNotFoundException { if(addToParent(path, newNode, null, inheritPermission) == null) return null; return newNode; } /** * Add new inode to the parent if specified. * Optimized version of addNode() if parent is not null. * * @return parent INode if new inode is inserted * or null if it already exists. * @throws FileNotFoundException if parent does not exist or * is not a directory. */ <T extends INode> INodeDirectory addToParent( String path, T newNode, INodeDirectory parent, boolean inheritPermission ) throws FileNotFoundException { byte[][] pathComponents = getPathComponents(path); assert pathComponents != null : "Incorrect path " + path; int pathLen = pathComponents.length; if (pathLen < 2) // add root return null; if(parent == null) { // Gets the parent INode INode[] inodes = new INode[2]; getExistingPathINodes(pathComponents, inodes); INode inode = inodes[0]; if (inode == null) { throw new FileNotFoundException("Parent path does not exist: "+path); } if (!inode.isDirectory()) { throw new FileNotFoundException("Parent path is not a directory: "+path); } parent = (INodeDirectory)inode; } // insert into the parent children list newNode.name = pathComponents[pathLen-1]; if(parent.addChild(newNode, inheritPermission) == null) return null; return parent; } /** */ long numItemsInTree() { long total = 1L; if (children == null) { return total; } for (INode child : children) { total += child.numItemsInTree(); } return total; } /** {@inheritDoc} */ long[] computeContentSummary(long[] summary) { if (children != null) { for (INode child : children) { child.computeContentSummary(summary); } } summary[2]++; return summary; } /** */ List<INode> getChildren() { return children==null ? new ArrayList<INode>() : children; } List<INode> getChildrenRaw() { return children; } int collectSubtreeBlocksAndClear(List<Block> v) { int total = 1; if (children == null) { return total; } for (INode child : children) { total += child.collectSubtreeBlocksAndClear(v); } parent = null; children = null; return total; } } /** * Directory INode class that has a quota restriction */ class INodeDirectoryWithQuota extends INodeDirectory { private long quota; private long count; /** Convert an existing directory inode to one with the given quota * * @param quota Quota to be assigned to this inode * @param other The other inode from which all other properties are copied */ INodeDirectoryWithQuota(long quota, INodeDirectory other) throws QuotaExceededException { super(other); this.count = other.numItemsInTree(); setQuota(quota); } /** constructor with no quota verification */ INodeDirectoryWithQuota( PermissionStatus permissions, long modificationTime, long quota) { super(permissions, modificationTime); this.quota = quota; } /** constructor with no quota verification */ INodeDirectoryWithQuota(String name, PermissionStatus permissions, long quota) { super(name, permissions); this.quota = quota; } /** Get this directory's quota * @return this directory's quota */ long getQuota() { return quota; } /** Set this directory's quota * * @param quota Quota to be set * @throws QuotaExceededException if the given quota is less than * the size of the tree */ void setQuota(long quota) throws QuotaExceededException { verifyQuota(quota, this.count); this.quota = quota; } /** Get the number of names in the subtree rooted at this directory * @return the size of the subtree rooted at this directory */ long numItemsInTree() { return count; } /** Update the size of the tree * * @param delta the change of the tree size * @throws QuotaExceededException if the changed size is greater * than the quota */ void updateNumItemsInTree(long delta) throws QuotaExceededException { long newCount = this.count + delta; if (delta>0) { verifyQuota(this.quota, newCount); } this.count = newCount; } /** Set the size of the tree rooted at this directory * * @param count size of the directory to be set * @throws QuotaExceededException if the given count is greater than quota */ void setCount(long count) throws QuotaExceededException { verifyQuota(this.quota, count); this.count = count; } /** Verify if the count satisfies the quota restriction * @throws QuotaExceededException if the given quota is less than the count */ private static void verifyQuota(long quota, long count) throws QuotaExceededException { if (quota < count) { throw new QuotaExceededException(quota, count); } } } class INodeFile extends INode { static final FsPermission UMASK = FsPermission.createImmutable((short)0111); protected BlockInfo blocks[] = null; protected short blockReplication; protected long preferredBlockSize; INodeFile(PermissionStatus permissions, int nrBlocks, short replication, long modificationTime, long preferredBlockSize) { this(permissions, new BlockInfo[nrBlocks], replication, modificationTime, preferredBlockSize); } protected INodeFile() { blocks = null; blockReplication = 0; preferredBlockSize = 0; } protected INodeFile(PermissionStatus permissions, BlockInfo[] blklist, short replication, long modificationTime, long preferredBlockSize) { super(permissions, modificationTime); this.blockReplication = replication; this.preferredBlockSize = preferredBlockSize; blocks = blklist; } /** * Set the {@link FsPermission} of this {@link INodeFile}. * Since this is a file, * the {@link FsAction#EXECUTE} action, if any, is ignored. */ protected void setPermission(FsPermission permission) { super.setPermission(permission.applyUMask(UMASK)); } boolean isDirectory() { return false; } /** * Get block replication for the file * @return block replication */ short getReplication() { return this.blockReplication; } void setReplication(short replication) { this.blockReplication = replication; } /** * Get file blocks * @return file blocks */ BlockInfo[] getBlocks() { return this.blocks; } /** * add a block to the block list */ void addBlock(BlockInfo newblock) { if (this.blocks == null) { this.blocks = new BlockInfo[1]; this.blocks[0] = newblock; } else { int size = this.blocks.length; BlockInfo[] newlist = new BlockInfo[size + 1]; for (int i = 0; i < size; i++) { newlist[i] = this.blocks[i]; } newlist[size] = newblock; this.blocks = newlist; } } /** * Set file block */ void setBlock(int idx, BlockInfo blk) { this.blocks[idx] = blk; } int collectSubtreeBlocksAndClear(List<Block> v) { parent = null; for (Block blk : blocks) { v.add(blk); } blocks = null; return 1; } /** {@inheritDoc} */ long[] computeContentSummary(long[] summary) { long bytes = 0; for(Block blk : blocks) { bytes += blk.getNumBytes(); } summary[0] += bytes; summary[1]++; return summary; } /** * Get the preferred block size of the file. * @return the number of bytes */ long getPreferredBlockSize() { return preferredBlockSize; } /** * Return the penultimate allocated block for this file. */ Block getPenultimateBlock() { if (blocks == null || blocks.length <= 1) { return null; } return blocks[blocks.length - 2]; } INodeFileUnderConstruction toINodeFileUnderConstruction( String clientName, String clientMachine, DatanodeDescriptor clientNode ) throws IOException { if (isUnderConstruction()) { return (INodeFileUnderConstruction)this; } return new INodeFileUnderConstruction(name, blockReplication, modificationTime, preferredBlockSize, blocks, getPermissionStatus(), clientName, clientMachine, clientNode); } } class INodeFileUnderConstruction extends INodeFile { StringBytesWritable clientName = null; // lease holder StringBytesWritable clientMachine = null; DatanodeDescriptor clientNode = null; // if client is a cluster node too. private int primaryNodeIndex = -1; //the node working on lease recovery private DatanodeDescriptor[] targets = null; //locations for last block private long lastRecoveryTime = 0; INodeFileUnderConstruction() {} INodeFileUnderConstruction(PermissionStatus permissions, short replication, long preferredBlockSize, long modTime, String clientName, String clientMachine, DatanodeDescriptor clientNode) throws IOException { super(permissions.applyUMask(UMASK), 0, replication, modTime, preferredBlockSize); this.clientName = new StringBytesWritable(clientName); this.clientMachine = new StringBytesWritable(clientMachine); this.clientNode = clientNode; } INodeFileUnderConstruction(byte[] name, short blockReplication, long modificationTime, long preferredBlockSize, BlockInfo[] blocks, PermissionStatus perm, String clientName, String clientMachine, DatanodeDescriptor clientNode) throws IOException { super(perm, blocks, blockReplication, modificationTime, preferredBlockSize); setLocalName(name); this.clientName = new StringBytesWritable(clientName); this.clientMachine = new StringBytesWritable(clientMachine); this.clientNode = clientNode; } String getClientName() throws IOException { return clientName.getString(); } String getClientMachine() throws IOException { return clientMachine.getString(); } DatanodeDescriptor getClientNode() { return clientNode; } /** * Is this inode being constructed? */ @Override boolean isUnderConstruction() { return true; } DatanodeDescriptor[] getTargets() { return targets; } void setTargets(DatanodeDescriptor[] targets) { this.targets = targets; this.primaryNodeIndex = -1; } // // converts a INodeFileUnderConstruction into a INodeFile // INodeFile convertToInodeFile() { INodeFile obj = new INodeFile(getPermissionStatus(), getBlocks(), getReplication(), getModificationTime(), getPreferredBlockSize()); return obj; } /** * remove a block from the block list. This block should be * the last one on the list. */ void removeBlock(Block oldblock) throws IOException { if (blocks == null) { throw new IOException("Trying to delete non-existant block " + oldblock); } int size_1 = blocks.length - 1; if (!blocks[size_1].equals(oldblock)) { throw new IOException("Trying to delete non-last block " + oldblock); } //copy to a new list BlockInfo[] newlist = new BlockInfo[size_1]; System.arraycopy(blocks, 0, newlist, 0, size_1); blocks = newlist; // Remove the block locations for the last block. targets = null; } synchronized void setLastBlock(BlockInfo newblock, DatanodeDescriptor[] newtargets ) throws IOException { if (blocks == null) { throw new IOException("Trying to update non-existant block (newblock=" + newblock + ")"); } blocks[blocks.length - 1] = newblock; setTargets(newtargets); lastRecoveryTime = 0; } /** * Initialize lease recovery for this object */ void assignPrimaryDatanode() { //assign the first alive datanode as the primary datanode if (targets.length == 0) { NameNode.stateChangeLog.warn("BLOCK*" + " INodeFileUnderConstruction.initLeaseRecovery:" + " No blocks found, lease removed."); } int previous = primaryNodeIndex; //find an alive datanode beginning from previous for(int i = 1; i <= targets.length; i++) { int j = (previous + i)%targets.length; if (targets[j].isAlive) { DatanodeDescriptor primary = targets[primaryNodeIndex = j]; primary.addBlockToBeRecovered(blocks[blocks.length - 1], targets); NameNode.stateChangeLog.info("BLOCK* " + blocks[blocks.length - 1] + " recovery started, primary=" + primary); return; } } } /** * Update lastRecoveryTime if expired. * @return true if lastRecoveryTimeis updated. */ synchronized boolean setLastRecoveryTime(long now) { boolean expired = now - lastRecoveryTime > NameNode.LEASE_RECOVER_PERIOD; if (expired) { lastRecoveryTime = now; } return expired; } }