/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hdfs.server.namenode; import com.google.common.primitives.SignedBytes; import io.hops.erasure_coding.ErasureCodingManager; import io.hops.exception.StorageException; import io.hops.exception.TransactionContextException; import io.hops.metadata.HdfsStorageFactory; import io.hops.metadata.common.FinderType; import io.hops.metadata.hdfs.dal.AccessTimeLogDataAccess; import io.hops.metadata.hdfs.entity.AccessTimeLogEntry; import io.hops.metadata.hdfs.entity.EncodingStatus; import io.hops.metadata.hdfs.entity.MetadataLogEntry; import io.hops.security.UsersGroups; import io.hops.transaction.EntityManager; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.permission.PermissionStatus; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.util.StringUtils; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; /** * We keep an in-memory representation of the file/block hierarchy. * This is a base INode class containing common fields for file and * directory inodes. */ @InterfaceAudience.Private public abstract class INode implements Comparable<byte[]> { static final List<INode> EMPTY_LIST = Collections.unmodifiableList(new ArrayList<INode>()); public static enum Finder implements FinderType<INode> { ByINodeIdFTIS,//FTIS full table index scan ByParentIdFTIS, ByParentIdAndPartitionId, ByNameParentIdAndPartitionId, ByNamesParentIdsAndPartitionIdsCheckLocal, ByNamesParentIdsAndPartitionIds; @Override public Class getType() { return INode.class; } @Override public Annotation getAnnotated() { switch (this) { case ByINodeIdFTIS: return Annotation.IndexScan; case ByParentIdFTIS: return Annotation.IndexScan; case ByParentIdAndPartitionId: return Annotation.PrunedIndexScan; case ByNameParentIdAndPartitionId: return Annotation.PrimaryKey; case ByNamesParentIdsAndPartitionIds: return Annotation.Batched; case ByNamesParentIdsAndPartitionIdsCheckLocal:CheckLocal: return Annotation.Batched; default: throw new IllegalStateException(); } } } public static enum Order implements Comparator<INode> { ByName() { @Override public int compare(INode o1, INode o2) { // TODO - JIM why not compare by ID - more efficient? return o1.compareTo(o2.getLocalNameBytes()); } }; @Override public abstract int compare(INode o1, INode o2); public Comparator acsending() { return this; } public Comparator descending() { return Collections.reverseOrder(this); } } /** * The inode name is in java UTF8 encoding; * The name in HdfsFileStatus should keep the same encoding as this. * if this encoding is changed, implicitly getFileInfo and listStatus in * clientProtocol are changed; The decoding at the client * side should change accordingly. */ protected byte[] name; protected INodeDirectory parent; protected long modificationTime; protected long accessTime; public static final int NON_EXISTING_ID = 0; protected int id = NON_EXISTING_ID; protected int parentId = NON_EXISTING_ID; public static int RANDOM_PARTITIONING_MAX_LEVEL=1; protected Integer partitionId; protected boolean subtreeLocked; protected long subtreeLockOwner; //Number of bits for Block size final static short BLOCK_BITS = 48; final static short REPLICATION_BITS = 8; final static short BOOLEAN_BITS = 8; final static short HAS_BLKS_BITS = 1; // this is out of the 8 bits for the storing booleans //Header mask 64-bit representation //Format:[8 bits for flags][8 bits for replication degree][48 bits for PreferredBlockSize] final static long BLOCK_SIZE_MASK = 0x0000FFFFFFFFFFFFL; final static long REPLICATION_MASK = 0x00FF000000000000L; final static long FLAGS_MASK = 0xFF00000000000000L; final static long HAS_BLKS_MASK = 0x0100000000000000L; //[8 bits for flags] //0 bit : 1 if the file has blocks. 0 blocks //remaining bits are not yet used long header; /** * Simple wrapper for two counters : * nsCount (namespace consumed) and dsCount (diskspace consumed). */ static class DirCounts { long nsCount = 0; long dsCount = 0; /** * returns namespace count */ long getNsCount() { return nsCount; } /** * returns diskspace count */ long getDsCount() { return dsCount; } } private String userName; private String groupName; private int userId; private int groupId; private FsPermission permission; INode(PermissionStatus permissions, long mTime, long atime) throws IOException { this.setLocalNameNoPersistance((byte[]) null); this.parent = null; this.modificationTime = mTime; setAccessTimeNoPersistance(atime); setPermissionStatusNoPersistance(permissions); } protected INode(String name, PermissionStatus permissions) throws IOException { this(permissions, 0L, 0L); setLocalNameNoPersistance(name); } /** * copy constructor * * @param other * Other node to be copied */ INode(INode other) throws IOException { setLocalNameNoPersistance(other.getLocalName()); this.parent = other.getParent(); setPermissionStatusNoPersistance(other.getPermissionStatus()); setModificationTimeNoPersistance(other.getModificationTime()); setAccessTimeNoPersistance(other.getAccessTime()); this.parentId = other.getParentId(); this.id = other.getId(); } /** * Check whether this is the root inode. */ boolean isRoot() { return name.length == 0; } /** * Set the {@link PermissionStatus} */ private void setPermissionStatusNoPersistance(PermissionStatus ps) throws IOException { setUserNoPersistance(ps.getUserName()); setGroupNoPersistance(ps.getGroupName()); setPermissionNoPersistance(ps.getPermission()); } /** * Get the {@link PermissionStatus} */ public PermissionStatus getPermissionStatus() throws IOException { return new PermissionStatus(getUserName(), getGroupName(), getFsPermission()); } /** * Get user name */ public String getUserName() throws IOException { if(userName == null || userName.isEmpty()){ userName = UsersGroups.getUser(userId); } return userName; } public int getUserID(){ return userId; } public void setUserIDNoPersistance(int userId){ this.userId = userId; } /** * Set user */ public void setUserNoPersistance(String user) throws IOException { this.userName = user; this.userId = UsersGroups.getUserID(user); } /** * Get group name */ public String getGroupName() throws IOException { if(groupName == null || groupName.isEmpty()){ groupName = UsersGroups.getGroup(groupId); } return groupName; } public int getGroupID(){ return groupId; } public void setGroupIDNoPersistance(int groupId){ this.groupId = groupId; } /** * Set group */ public void setGroupNoPersistance(String group) throws IOException { this.groupName = group; this.groupId = UsersGroups.getGroupID(group); } /** * Get the {@link FsPermission} */ public FsPermission getFsPermission() { return permission; } protected short getFsPermissionShort() { return permission.toShort(); } /** * Set the {@link FsPermission} of this {@link INode} */ private void setPermissionNoPersistance(FsPermission permission) { this.permission = permission; } /** * Check whether it's a directory */ public boolean isDirectory() { return false; } /** * Collect all the blocks in all children of this INode. * Count and return the number of files in the sub tree. * Also clears references since this INode is deleted. */ abstract int collectSubtreeBlocksAndClear(List<Block> v) throws StorageException, TransactionContextException; /** * Compute {@link ContentSummary}. */ public final ContentSummary computeContentSummary() throws StorageException, TransactionContextException { long[] a = computeContentSummary(new long[]{0, 0, 0, 0}); return new ContentSummary(a[0], a[1], a[2], getNsQuota(), a[3], getDsQuota()); } /** * @return an array of three longs. * 0: length, 1: file count, 2: directory count 3: disk space */ abstract long[] computeContentSummary(long[] summary) throws StorageException, TransactionContextException; /** * Get the quota set for this inode * * @return the quota if it is set; -1 otherwise */ public long getNsQuota() throws StorageException, TransactionContextException { return -1; } public long getDsQuota() throws StorageException, TransactionContextException { return -1; } boolean isQuotaSet() throws StorageException, TransactionContextException { return getNsQuota() >= 0 || getDsQuota() >= 0; } /** * Adds total number of names and total disk space taken under * this tree to counts. * Returns updated counts object. */ abstract DirCounts spaceConsumedInTree(DirCounts counts) throws StorageException, TransactionContextException; /** * Get local file name * * @return local file name */ public String getLocalName() { return DFSUtil.bytes2String(name); } String getLocalParentDir() throws StorageException, TransactionContextException { INode inode = isRoot() ? this : getParent(); String parentDir = ""; if (inode != null) { parentDir = inode.getFullPathName(); } return (parentDir != null) ? parentDir : ""; } /** * Get local file name * * @return local file name */ byte[] getLocalNameBytes() { return name; } /** * Set local file name */ public void setLocalNameNoPersistance(String name) { this.name = DFSUtil.string2Bytes(name); } /** * Set local file name */ public void setLocalNameNoPersistance(byte[] name) { this.name = name; } public String getFullPathName() throws StorageException, TransactionContextException { // Get the full path name of this inode. return FSDirectory.getFullPathName(this); } @Override public String toString() { try { return "\"" + getFullPathName() + "\":" + getUserName() + ":" + getGroupName() + ":" + (isDirectory() ? "d" : "-") + getFsPermission(); } catch (IOException ex) { Logger.getLogger(INode.class.getName()).log(Level.SEVERE, null, ex); } return null; } /** * Get parent directory * * @return parent INode */ INodeDirectory getParent() throws StorageException, TransactionContextException { if (isRoot()) { return null; } if (parent == null) { parent = (INodeDirectory) EntityManager .find(INode.Finder.ByINodeIdFTIS, getParentId()); } return this.parent; } /** * Get last modification time of inode. * * @return access time */ public long getModificationTime() { return this.modificationTime; } /** * Set last modification time of inode. */ public void setModificationTimeNoPersistance(long modtime) { if (this.modificationTime <= modtime) { this.modificationTime = modtime; } } /** * Always set the last modification time of inode. */ protected void setModificationTimeForceNoPersistance(long modtime) { this.modificationTime = modtime; } /** * Get access time of inode. * * @return access time */ public long getAccessTime() { return accessTime; } /** * Set last access time of inode. */ public void setAccessTimeNoPersistance(long atime) { accessTime = atime; } /** * Is this inode being constructed? */ public boolean isUnderConstruction() { return false; } /** * Check whether it's a symlink */ public boolean isSymlink() { return false; } /** * Breaks file path into components. * * @param path * @return array of byte arrays each of which represents * a single path component. */ public static byte[][] getPathComponents(String path) { return getPathComponents(getPathNames(path)); } /** * Convert strings to byte arrays for path components. */ public static byte[][] getPathComponents(String[] strings) { if (strings.length == 0) { return new byte[][]{null}; } byte[][] bytes = new byte[strings.length][]; for (int i = 0; i < strings.length; i++) { bytes[i] = DFSUtil.string2Bytes(strings[i]); } return bytes; } /** * Splits an absolute path into an array of path components. * * @param path * @return array of path components. * @throws AssertionError * if the given path is invalid. */ public static String[] getPathNames(String path) { if (path == null || !path.startsWith(Path.SEPARATOR)) { throw new AssertionError("Absolute path required"); } return StringUtils.split(path, Path.SEPARATOR_CHAR); } /** * Given some components, create a path name. * * @param components * The path components * @param start * index * @param end * index * @return concatenated path */ static String constructPath(byte[][] components, int start, int end) { StringBuilder buf = new StringBuilder(); for (int i = start; i < end; i++) { buf.append(DFSUtil.bytes2String(components[i])); if (i < end - 1) { buf.append(Path.SEPARATOR); } } return buf.toString(); } boolean removeNode() throws StorageException, TransactionContextException { if (parent == null) { return false; } else { parent.removeChild(this); parent = null; return true; } } private static final byte[] EMPTY_BYTES = {}; @Override public final int compareTo(byte[] bytes) { final byte[] left = name == null ? EMPTY_BYTES : name; final byte[] right = bytes == null ? EMPTY_BYTES : bytes; return SignedBytes.lexicographicalComparator().compare(left, right); } @Override public final boolean equals(Object that) { if (this == that) { return true; } if (that == null || !(that instanceof INode)) { return false; } if (Arrays.equals(this.name, ((INode) that).name) && this.id == ((INode) that).id && this.parentId == ((INode) that).parentId) { return true; } return false; } @Override public final int hashCode() { return Arrays.hashCode(this.name); } public final void setIdNoPersistance(int id) { this.id = id; } public int getId() { return this.id; } public void setParent(INodeDirectory p) throws StorageException, TransactionContextException { setParentNoPersistance(p); save(); } public void setParentNoPersistance(INodeDirectory p) { this.parent = p; this.parentId = p.getId(); } public void setParentIdNoPersistance(int pid) { this.parentId = pid; } public int getParentId() { return this.parentId; } public static String nameParentKey(Integer parentId, String name) { return parentId + name; } public String nameParentKey() { return nameParentKey(parentId, getLocalName()); } /** * Set user */ protected void setUser(String user) throws IOException { setUserNoPersistance(user); save(); } protected void setGroup(String group) throws IOException { setGroupNoPersistance(group); save(); } void setPermission(FsPermission permission) throws StorageException, TransactionContextException { setPermissionNoPersistance(permission); save(); } protected void setPermissionStatus(PermissionStatus ps) throws IOException { setUser(ps.getUserName()); setGroup(ps.getGroupName()); setPermission(ps.getPermission()); } public void setLocalName(String name) throws StorageException, TransactionContextException { setLocalNameNoPersistance(name); save(); } public void setLocalName(byte[] name) throws StorageException, TransactionContextException { setLocalNameNoPersistance(name); save(); } public void setModificationTime(long modtime) throws StorageException, TransactionContextException { setModificationTimeNoPersistance(modtime); save(); } public void setAccessTime(long atime) throws TransactionContextException, StorageException { setAccessTimeNoPersistance(atime); if (isPathMetaEnabled()) { // log the operation for epipe AccessTimeLogDataAccess da = (AccessTimeLogDataAccess) HdfsStorageFactory.getDataAccess(AccessTimeLogDataAccess.class); int userId = -1; // TODO get userId da.add(new AccessTimeLogEntry(getId(), userId, atime)); } save(); } void setModificationTimeForce(long modtime) throws StorageException, TransactionContextException { setModificationTimeForceNoPersistance(modtime); save(); } public boolean exists() { if (id == NON_EXISTING_ID) { return false; } return true; } protected void save() throws StorageException, TransactionContextException { save(this); } protected void save(INode node) throws StorageException, TransactionContextException { EntityManager.update(node); } protected void remove() throws StorageException, TransactionContextException { remove(this); } protected void remove(INode node) throws StorageException, TransactionContextException { EntityManager.remove(node); //if This inode is of type INodeDirectoryWithQuota then also delete the INode Attribute table if (node instanceof INodeDirectoryWithQuota) { ((INodeDirectoryWithQuota) node).removeAttributes(); } cleanParity(node); } private void cleanParity(INode node) throws StorageException, TransactionContextException { if (ErasureCodingManager.isEnabled()) { EncodingStatus status = EntityManager.find(EncodingStatus.Finder.ByInodeId, node.getId()); if (status != null) { status.setStatus(EncodingStatus.Status.DELETED); EntityManager.update(status); return; } } } public boolean isSubtreeLocked() { return subtreeLocked; } public void setSubtreeLocked(boolean subtreeLocked) { this.subtreeLocked = subtreeLocked; } public long getSubtreeLockOwner() { return subtreeLockOwner; } public void setSubtreeLockOwner(long subtreeLockOwner) { this.subtreeLockOwner = subtreeLockOwner; } public void lockSubtree(long subtreeLockOwner) { setSubtreeLocked(true); setSubtreeLockOwner(subtreeLockOwner); } public void unlockSubtree() { setSubtreeLocked(false); } public boolean isFile() { return !isDirectory() && !isSymlink(); } public void logMetadataEvent(MetadataLogEntry.Operation operation) throws StorageException, TransactionContextException { if(isUnderConstruction()){ return; } if (isPathMetaEnabled()) { if(getPartitionId() == null){ throw new RuntimeException("Trying to log metadata for an inode that " + "wasn't commited to the database"); } INodeDirectory datasetDir = getMetaEnabledParent(); EntityManager.add(new MetadataLogEntry(datasetDir.getId(), getId(), getPartitionId(), getParentId(), getLocalName(), operation)); } } boolean isPathMetaEnabled() throws TransactionContextException, StorageException { return getMetaEnabledParent() != null ? true : false; } INodeDirectory getMetaEnabledParent() throws TransactionContextException, StorageException { INodeDirectory dir = getParent(); while (!isRoot() && !dir.isRoot()) { if (dir.isMetaEnabled()) { return dir; } dir = dir.getParent(); } return null; } public Integer getPartitionId(){ return partitionId; } public void setPartitionIdNoPersistance(Integer partitionId){ this.partitionId = partitionId; } public void setPartitionId(Integer partitionId) throws TransactionContextException, StorageException { setPartitionIdNoPersistance(partitionId); save(); } public void calculateAndSetPartitionIdNoPersistance(int parentId, String name, short depth){ setPartitionIdNoPersistance(calculatePartitionId(parentId,name,depth)); } public void calculateAndSetPartitionId(int parentId, String name, short depth) throws TransactionContextException, StorageException { setPartitionIdNoPersistance(calculatePartitionId(parentId,name,depth)); save(); } public static int calculatePartitionId(int parentId, String name, short depth){ if(isTreeLevelRandomPartitioned(depth)){ return partitionIdHashFunction(parentId,name,depth); }else{ return parentId; } } private static int partitionIdHashFunction(int parentId, String name, short depth){ if(depth == INodeDirectory.ROOT_DIR_DEPTH){ return INodeDirectory.ROOT_DIR_PARTITION_KEY; }else{ return (name+parentId).hashCode(); // String partitionid = String.format("%04d%04d",parentId,depth); // return Integer.parseInt(partitionid); } } public static boolean isTreeLevelRandomPartitioned(short depth){ if(depth > RANDOM_PARTITIONING_MAX_LEVEL){ return false; }else{ return true; } } public static short getBlockReplication(long header) { long val = (header & REPLICATION_MASK); long val2 = val >> BLOCK_BITS; return (short) val2; } void setReplicationNoPersistance(short replication) { if (replication <= 0 || replication > (Math.pow(2, REPLICATION_BITS) - 1)) { throw new IllegalArgumentException("Unexpected value for the " + "replication [" + replication + "]. Expected [1:" + (Math.pow(2, REPLICATION_BITS) - 1) + "]"); } header = ((long) replication << BLOCK_BITS) | (header & ~REPLICATION_MASK); } public static long getPreferredBlockSize(long header) { return header & BLOCK_SIZE_MASK; } protected void setPreferredBlockSizeNoPersistance(long preferredBlkSize) { if ((preferredBlkSize < 0) || (preferredBlkSize > (Math.pow(2, BLOCK_BITS) - 1))) { throw new IllegalArgumentException("Unexpected value for the block " + "size [" + preferredBlkSize + "]. Expected [1:" + (Math.pow(2, BLOCK_BITS) - 1) + "]"); } header = (header & ~BLOCK_SIZE_MASK) | (preferredBlkSize & BLOCK_SIZE_MASK); } public long getHeader() { return header; } public void setHeaderNoPersistance(long header) { long preferecBlkSize = getPreferredBlockSize(header); short replication = getBlockReplication(header); if (preferecBlkSize < 0) { throw new IllegalArgumentException("Unexpected value for the " + "block size [" + preferecBlkSize + "]"); } if (replication < 0) { throw new IllegalArgumentException("Unexpected value for the " + "replication [" + replication + "]"); } this.header = header; } public void setHasBlocks(boolean hasBlocks) throws TransactionContextException, StorageException { setHasBlocksNoPersistance(hasBlocks); save(); } public void setHasBlocksNoPersistance(boolean hasBlocks) { long val = (hasBlocks) ? 1 : 0; header = ((long) val << (BLOCK_BITS + REPLICATION_BITS)) | (header & ~HAS_BLKS_MASK); } public static boolean hasBlocks(long header) { long val = (header & HAS_BLKS_MASK); long val2 = val >> (BLOCK_BITS + REPLICATION_BITS); if (val2 == 1) { return true; } else if (val2 == 0) { return false; } else { throw new IllegalStateException("Flags in the inode header are messed up"); } } public boolean hasBlocks(){ return hasBlocks(header); } public short myDepth() throws TransactionContextException, StorageException { if(id == NON_EXISTING_ID){ throw new IllegalStateException("INode is not connected to the file system tree yet"); } if(id == INodeDirectory.ROOT_ID){ return INodeDirectory.ROOT_DIR_DEPTH; } INode parentInode = EntityManager.find(Finder.ByINodeIdFTIS, getParentId()); return (short) (parentInode.myDepth()+1); } }