/** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.util; import java.io.ByteArrayInputStream; import java.io.DataInputStream; import java.io.EOFException; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.net.URI; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.regex.Pattern; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hbase.ClusterId; import org.apache.hadoop.hbase.DeserializationException; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HDFSBlocksDistribution; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.RemoteExceptionHandler; import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.protobuf.generated.FSProtos; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.StringUtils; import com.google.common.primitives.Ints; import com.google.protobuf.InvalidProtocolBufferException; /** * Utility methods for interacting with the underlying file system. */ @InterfaceAudience.Public @InterfaceStability.Evolving public abstract class FSUtils { private static final Log LOG = LogFactory.getLog(FSUtils.class); /** Full access permissions (starting point for a umask) */ private static final String FULL_RWX_PERMISSIONS = "777"; protected FSUtils() { super(); } public static FSUtils getInstance(FileSystem fs, Configuration conf) { String scheme = fs.getUri().getScheme(); if (scheme == null) { LOG.warn("Could not find scheme for uri " + fs.getUri() + ", default to hdfs"); scheme = "hdfs"; } Class<?> fsUtilsClass = conf.getClass("hbase.fsutil." + scheme + ".impl", FSHDFSUtils.class); // Default to HDFS impl FSUtils fsUtils = (FSUtils)ReflectionUtils.newInstance(fsUtilsClass, conf); return fsUtils; } /** * Delete if exists. * @param fs filesystem object * @param dir directory to delete * @return True if deleted <code>dir</code> * @throws IOException e */ public static boolean deleteDirectory(final FileSystem fs, final Path dir) throws IOException { return fs.exists(dir) && fs.delete(dir, true); } /** * Check if directory exists. If it does not, create it. * @param fs filesystem object * @param dir path to check * @return Path * @throws IOException e */ public Path checkdir(final FileSystem fs, final Path dir) throws IOException { if (!fs.exists(dir)) { fs.mkdirs(dir); } return dir; } /** * Create the specified file on the filesystem. By default, this will: * <ol> * <li>overwrite the file if it exists</li> * <li>apply the umask in the configuration (if it is enabled)</li> * <li>use the fs configured buffer size (or {@value DEFAULT_BUFFER_SIZE} if * not set)</li> * <li>use the default replication</li> * <li>use the default block size</li> * <li>not track progress</li> * </ol> * * @param fs {@link FileSystem} on which to write the file * @param path {@link Path} to the file to write * @return output stream to the created file * @throws IOException if the file cannot be created */ public static FSDataOutputStream create(FileSystem fs, Path path, FsPermission perm) throws IOException { return create(fs, path, perm, true); } /** * Create the specified file on the filesystem. By default, this will: * <ol> * <li>apply the umask in the configuration (if it is enabled)</li> * <li>use the fs configured buffer size (or {@value DEFAULT_BUFFER_SIZE} if * not set)</li> * <li>use the default replication</li> * <li>use the default block size</li> * <li>not track progress</li> * </ol> * * @param fs {@link FileSystem} on which to write the file * @param path {@link Path} to the file to write * @param perm * @param overwrite Whether or not the created file should be overwritten. * @return output stream to the created file * @throws IOException if the file cannot be created */ public static FSDataOutputStream create(FileSystem fs, Path path, FsPermission perm, boolean overwrite) throws IOException { LOG.debug("Creating file=" + path + " with permission=" + perm); return fs.create(path, perm, overwrite, fs.getConf().getInt("io.file.buffer.size", 4096), fs.getDefaultReplication(), fs.getDefaultBlockSize(), null); } /** * Get the file permissions specified in the configuration, if they are * enabled. * * @param fs filesystem that the file will be created on. * @param conf configuration to read for determining if permissions are * enabled and which to use * @param permssionConfKey property key in the configuration to use when * finding the permission * @return the permission to use when creating a new file on the fs. If * special permissions are not specified in the configuration, then * the default permissions on the the fs will be returned. */ public static FsPermission getFilePermissions(final FileSystem fs, final Configuration conf, final String permssionConfKey) { boolean enablePermissions = conf.getBoolean( HConstants.ENABLE_DATA_FILE_UMASK, false); if (enablePermissions) { try { FsPermission perm = new FsPermission(FULL_RWX_PERMISSIONS); // make sure that we have a mask, if not, go default. String mask = conf.get(permssionConfKey); if (mask == null) return FsPermission.getDefault(); // appy the umask FsPermission umask = new FsPermission(mask); return perm.applyUMask(umask); } catch (IllegalArgumentException e) { LOG.warn( "Incorrect umask attempted to be created: " + conf.get(permssionConfKey) + ", using default file permissions.", e); return FsPermission.getDefault(); } } return FsPermission.getDefault(); } /** * Checks to see if the specified file system is available * * @param fs filesystem * @throws IOException e */ public static void checkFileSystemAvailable(final FileSystem fs) throws IOException { if (!(fs instanceof DistributedFileSystem)) { return; } IOException exception = null; DistributedFileSystem dfs = (DistributedFileSystem) fs; try { if (dfs.exists(new Path("/"))) { return; } } catch (IOException e) { exception = RemoteExceptionHandler.checkIOException(e); } try { fs.close(); } catch (Exception e) { LOG.error("file system close failed: ", e); } IOException io = new IOException("File system is not available"); io.initCause(exception); throw io; } /** * Check whether dfs is in safemode. * @param conf * @throws IOException */ public static void checkDfsSafeMode(final Configuration conf) throws IOException { boolean isInSafeMode = false; FileSystem fs = FileSystem.get(conf); if (fs instanceof DistributedFileSystem) { DistributedFileSystem dfs = (DistributedFileSystem)fs; // Check whether dfs is on safemode. isInSafeMode = dfs.setSafeMode(org.apache.hadoop.hdfs.protocol.FSConstants.SafeModeAction.SAFEMODE_GET); } if (isInSafeMode) { throw new IOException("File system is in safemode, it can't be written now"); } } /** * Verifies current version of file system * * @param fs filesystem object * @param rootdir root hbase directory * @return null if no version file exists, version string otherwise. * @throws IOException e * @throws DeserializationException */ public static String getVersion(FileSystem fs, Path rootdir) throws IOException, DeserializationException { Path versionFile = new Path(rootdir, HConstants.VERSION_FILE_NAME); FileStatus[] status = null; try { // hadoop 2.0 throws FNFE if directory does not exist. // hadoop 1.0 returns null if directory does not exist. status = fs.listStatus(versionFile); } catch (FileNotFoundException fnfe) { return null; } if (status == null || status.length == 0) return null; String version = null; byte [] content = new byte [(int)status[0].getLen()]; FSDataInputStream s = fs.open(versionFile); try { IOUtils.readFully(s, content, 0, content.length); if (ProtobufUtil.isPBMagicPrefix(content)) { version = parseVersionFrom(content); } else { // Presume it pre-pb format. InputStream is = new ByteArrayInputStream(content); DataInputStream dis = new DataInputStream(is); try { version = dis.readUTF(); } finally { dis.close(); } // Update the format LOG.info("Updating the hbase.version file format with version=" + version); setVersion(fs, rootdir, version, 0, HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS); } } catch (EOFException eof) { LOG.warn("Version file was empty, odd, will try to set it."); } finally { s.close(); } return version; } /** * Parse the content of the ${HBASE_ROOTDIR}/hbase.version file. * @param bytes The byte content of the hbase.version file. * @return The version found in the file as a String. * @throws DeserializationException */ static String parseVersionFrom(final byte [] bytes) throws DeserializationException { ProtobufUtil.expectPBMagicPrefix(bytes); int pblen = ProtobufUtil.lengthOfPBMagic(); FSProtos.HBaseVersionFileContent.Builder builder = FSProtos.HBaseVersionFileContent.newBuilder(); FSProtos.HBaseVersionFileContent fileContent; try { fileContent = builder.mergeFrom(bytes, pblen, bytes.length - pblen).build(); return fileContent.getVersion(); } catch (InvalidProtocolBufferException e) { // Convert throw new DeserializationException(e); } } /** * Create the content to write into the ${HBASE_ROOTDIR}/hbase.version file. * @param version Version to persist * @return Serialized protobuf with <code>version</code> content and a bit of pb magic for a prefix. */ static byte [] toVersionByteArray(final String version) { FSProtos.HBaseVersionFileContent.Builder builder = FSProtos.HBaseVersionFileContent.newBuilder(); return ProtobufUtil.prependPBMagic(builder.setVersion(version).build().toByteArray()); } /** * Verifies current version of file system * * @param fs file system * @param rootdir root directory of HBase installation * @param message if true, issues a message on System.out * * @throws IOException e * @throws DeserializationException */ public static void checkVersion(FileSystem fs, Path rootdir, boolean message) throws IOException, DeserializationException { checkVersion(fs, rootdir, message, 0, HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS); } /** * Verifies current version of file system * * @param fs file system * @param rootdir root directory of HBase installation * @param message if true, issues a message on System.out * @param wait wait interval * @param retries number of times to retry * * @throws IOException e * @throws DeserializationException */ public static void checkVersion(FileSystem fs, Path rootdir, boolean message, int wait, int retries) throws IOException, DeserializationException { String version = getVersion(fs, rootdir); if (version == null) { if (!rootRegionExists(fs, rootdir)) { // rootDir is empty (no version file and no root region) // just create new version file (HBASE-1195) setVersion(fs, rootdir, wait, retries); return; } } else if (version.compareTo(HConstants.FILE_SYSTEM_VERSION) == 0) return; // version is deprecated require migration // Output on stdout so user sees it in terminal. String msg = "File system needs to be upgraded." + " You have version " + version + " and I want version " + HConstants.FILE_SYSTEM_VERSION + ". Run the '${HBASE_HOME}/bin/hbase migrate' script."; if (message) { System.out.println("WARNING! " + msg); } throw new FileSystemVersionException(msg); } /** * Sets version of file system * * @param fs filesystem object * @param rootdir hbase root * @throws IOException e */ public static void setVersion(FileSystem fs, Path rootdir) throws IOException { setVersion(fs, rootdir, HConstants.FILE_SYSTEM_VERSION, 0, HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS); } /** * Sets version of file system * * @param fs filesystem object * @param rootdir hbase root * @param wait time to wait for retry * @param retries number of times to retry before failing * @throws IOException e */ public static void setVersion(FileSystem fs, Path rootdir, int wait, int retries) throws IOException { setVersion(fs, rootdir, HConstants.FILE_SYSTEM_VERSION, wait, retries); } /** * Sets version of file system * * @param fs filesystem object * @param rootdir hbase root directory * @param version version to set * @param wait time to wait for retry * @param retries number of times to retry before throwing an IOException * @throws IOException e */ public static void setVersion(FileSystem fs, Path rootdir, String version, int wait, int retries) throws IOException { Path versionFile = new Path(rootdir, HConstants.VERSION_FILE_NAME); while (true) { try { FSDataOutputStream s = fs.create(versionFile); s.write(toVersionByteArray(version)); s.close(); LOG.debug("Created version file at " + rootdir.toString() + " with version=" + version); return; } catch (IOException e) { if (retries > 0) { LOG.warn("Unable to create version file at " + rootdir.toString() + ", retrying", e); fs.delete(versionFile, false); try { if (wait > 0) { Thread.sleep(wait); } } catch (InterruptedException ex) { // ignore } retries--; } else { throw e; } } } } /** * Checks that a cluster ID file exists in the HBase root directory * @param fs the root directory FileSystem * @param rootdir the HBase root directory in HDFS * @param wait how long to wait between retries * @return <code>true</code> if the file exists, otherwise <code>false</code> * @throws IOException if checking the FileSystem fails */ public static boolean checkClusterIdExists(FileSystem fs, Path rootdir, int wait) throws IOException { while (true) { try { Path filePath = new Path(rootdir, HConstants.CLUSTER_ID_FILE_NAME); return fs.exists(filePath); } catch (IOException ioe) { if (wait > 0) { LOG.warn("Unable to check cluster ID file in " + rootdir.toString() + ", retrying in "+wait+"msec: "+StringUtils.stringifyException(ioe)); try { Thread.sleep(wait); } catch (InterruptedException ie) { Thread.interrupted(); break; } } else { throw ioe; } } } return false; } /** * Returns the value of the unique cluster ID stored for this HBase instance. * @param fs the root directory FileSystem * @param rootdir the path to the HBase root directory * @return the unique cluster identifier * @throws IOException if reading the cluster ID file fails */ public static ClusterId getClusterId(FileSystem fs, Path rootdir) throws IOException { Path idPath = new Path(rootdir, HConstants.CLUSTER_ID_FILE_NAME); ClusterId clusterId = null; FileStatus status = fs.exists(idPath)? fs.getFileStatus(idPath): null; if (status != null) { int len = Ints.checkedCast(status.getLen()); byte [] content = new byte[len]; FSDataInputStream in = fs.open(idPath); try { in.readFully(content); } catch (EOFException eof) { LOG.warn("Cluster ID file " + idPath.toString() + " was empty"); } finally{ in.close(); } try { clusterId = ClusterId.parseFrom(content); } catch (DeserializationException e) { throw new IOException("content=" + Bytes.toString(content), e); } // If not pb'd, make it so. if (!ProtobufUtil.isPBMagicPrefix(content)) rewriteAsPb(fs, rootdir, idPath, clusterId); return clusterId; } else { LOG.warn("Cluster ID file does not exist at " + idPath.toString()); } return clusterId; } /** * @param cid * @throws IOException */ private static void rewriteAsPb(final FileSystem fs, final Path rootdir, final Path p, final ClusterId cid) throws IOException { // Rewrite the file as pb. Move aside the old one first, write new // then delete the moved-aside file. Path movedAsideName = new Path(p + "." + System.currentTimeMillis()); if (!fs.rename(p, movedAsideName)) throw new IOException("Failed rename of " + p); setClusterId(fs, rootdir, cid, 100); if (!fs.delete(movedAsideName, false)) { throw new IOException("Failed delete of " + movedAsideName); } LOG.debug("Rewrote the hbase.id file as pb"); } /** * Writes a new unique identifier for this cluster to the "hbase.id" file * in the HBase root directory * @param fs the root directory FileSystem * @param rootdir the path to the HBase root directory * @param clusterId the unique identifier to store * @param wait how long (in milliseconds) to wait between retries * @throws IOException if writing to the FileSystem fails and no wait value */ public static void setClusterId(FileSystem fs, Path rootdir, ClusterId clusterId, int wait) throws IOException { while (true) { try { Path filePath = new Path(rootdir, HConstants.CLUSTER_ID_FILE_NAME); FSDataOutputStream s = fs.create(filePath); try { s.write(clusterId.toByteArray()); } finally { s.close(); } if (LOG.isDebugEnabled()) { LOG.debug("Created cluster ID file at " + filePath.toString() + " with ID: " + clusterId); } return; } catch (IOException ioe) { if (wait > 0) { LOG.warn("Unable to create cluster ID file in " + rootdir.toString() + ", retrying in " + wait + "msec: " + StringUtils.stringifyException(ioe)); try { Thread.sleep(wait); } catch (InterruptedException ie) { Thread.interrupted(); break; } } else { throw ioe; } } } } /** * Verifies root directory path is a valid URI with a scheme * * @param root root directory path * @return Passed <code>root</code> argument. * @throws IOException if not a valid URI with a scheme */ public static Path validateRootPath(Path root) throws IOException { try { URI rootURI = new URI(root.toString()); String scheme = rootURI.getScheme(); if (scheme == null) { throw new IOException("Root directory does not have a scheme"); } return root; } catch (URISyntaxException e) { IOException io = new IOException("Root directory path is not a valid " + "URI -- check your " + HConstants.HBASE_DIR + " configuration"); io.initCause(e); throw io; } } /** * If DFS, check safe mode and if so, wait until we clear it. * @param conf configuration * @param wait Sleep between retries * @throws IOException e */ public static void waitOnSafeMode(final Configuration conf, final long wait) throws IOException { FileSystem fs = FileSystem.get(conf); if (!(fs instanceof DistributedFileSystem)) return; DistributedFileSystem dfs = (DistributedFileSystem)fs; // Make sure dfs is not in safe mode while (dfs.setSafeMode(org.apache.hadoop.hdfs.protocol.FSConstants.SafeModeAction.SAFEMODE_GET)) { LOG.info("Waiting for dfs to exit safe mode..."); try { Thread.sleep(wait); } catch (InterruptedException e) { //continue } } } /** * Return the 'path' component of a Path. In Hadoop, Path is an URI. This * method returns the 'path' component of a Path's URI: e.g. If a Path is * <code>hdfs://example.org:9000/hbase_trunk/TestTable/compaction.dir</code>, * this method returns <code>/hbase_trunk/TestTable/compaction.dir</code>. * This method is useful if you want to print out a Path without qualifying * Filesystem instance. * @param p Filesystem Path whose 'path' component we are to return. * @return Path portion of the Filesystem */ public static String getPath(Path p) { return p.toUri().getPath(); } /** * @param c configuration * @return Path to hbase root directory: i.e. <code>hbase.rootdir</code> from * configuration as a qualified Path. * @throws IOException e */ public static Path getRootDir(final Configuration c) throws IOException { Path p = new Path(c.get(HConstants.HBASE_DIR)); FileSystem fs = p.getFileSystem(c); return p.makeQualified(fs); } public static void setRootDir(final Configuration c, final Path root) throws IOException { c.set(HConstants.HBASE_DIR, root.toString()); } /** * Checks if root region exists * * @param fs file system * @param rootdir root directory of HBase installation * @return true if exists * @throws IOException e */ public static boolean rootRegionExists(FileSystem fs, Path rootdir) throws IOException { Path rootRegionDir = HRegion.getRegionDir(rootdir, HRegionInfo.ROOT_REGIONINFO); return fs.exists(rootRegionDir); } /** * Compute HDFS blocks distribution of a given file, or a portion of the file * @param fs file system * @param status file status of the file * @param start start position of the portion * @param length length of the portion * @return The HDFS blocks distribution */ static public HDFSBlocksDistribution computeHDFSBlocksDistribution( final FileSystem fs, FileStatus status, long start, long length) throws IOException { HDFSBlocksDistribution blocksDistribution = new HDFSBlocksDistribution(); BlockLocation [] blockLocations = fs.getFileBlockLocations(status, start, length); for(BlockLocation bl : blockLocations) { String [] hosts = bl.getHosts(); long len = bl.getLength(); blocksDistribution.addHostsAndBlockWeight(hosts, len); } return blocksDistribution; } /** * Runs through the hbase rootdir and checks all stores have only * one file in them -- that is, they've been major compacted. Looks * at root and meta tables too. * @param fs filesystem * @param hbaseRootDir hbase root directory * @return True if this hbase install is major compacted. * @throws IOException e */ public static boolean isMajorCompacted(final FileSystem fs, final Path hbaseRootDir) throws IOException { // Presumes any directory under hbase.rootdir is a table. FileStatus [] tableDirs = fs.listStatus(hbaseRootDir, new DirFilter(fs)); for (FileStatus tableDir : tableDirs) { // Skip the .log directory. All others should be tables. Inside a table, // there are compaction.dir directories to skip. Otherwise, all else // should be regions. Then in each region, should only be family // directories. Under each of these, should be one file only. Path d = tableDir.getPath(); if (d.getName().equals(HConstants.HREGION_LOGDIR_NAME)) { continue; } FileStatus[] regionDirs = fs.listStatus(d, new DirFilter(fs)); for (FileStatus regionDir : regionDirs) { Path dd = regionDir.getPath(); if (dd.getName().equals(HConstants.HREGION_COMPACTIONDIR_NAME)) { continue; } // Else its a region name. Now look in region for families. FileStatus[] familyDirs = fs.listStatus(dd, new DirFilter(fs)); for (FileStatus familyDir : familyDirs) { Path family = familyDir.getPath(); // Now in family make sure only one file. FileStatus[] familyStatus = fs.listStatus(family); if (familyStatus.length > 1) { LOG.debug(family.toString() + " has " + familyStatus.length + " files."); return false; } } } } return true; } // TODO move this method OUT of FSUtils. No dependencies to HMaster /** * Returns the total overall fragmentation percentage. Includes .META. and * -ROOT- as well. * * @param master The master defining the HBase root and file system. * @return A map for each table and its percentage. * @throws IOException When scanning the directory fails. */ public static int getTotalTableFragmentation(final HMaster master) throws IOException { Map<String, Integer> map = getTableFragmentation(master); return map != null && map.size() > 0 ? map.get("-TOTAL-") : -1; } /** * Runs through the HBase rootdir and checks how many stores for each table * have more than one file in them. Checks -ROOT- and .META. too. The total * percentage across all tables is stored under the special key "-TOTAL-". * * @param master The master defining the HBase root and file system. * @return A map for each table and its percentage. * * @throws IOException When scanning the directory fails. */ public static Map<String, Integer> getTableFragmentation( final HMaster master) throws IOException { Path path = getRootDir(master.getConfiguration()); // since HMaster.getFileSystem() is package private FileSystem fs = path.getFileSystem(master.getConfiguration()); return getTableFragmentation(fs, path); } /** * Runs through the HBase rootdir and checks how many stores for each table * have more than one file in them. Checks -ROOT- and .META. too. The total * percentage across all tables is stored under the special key "-TOTAL-". * * @param fs The file system to use. * @param hbaseRootDir The root directory to scan. * @return A map for each table and its percentage. * @throws IOException When scanning the directory fails. */ public static Map<String, Integer> getTableFragmentation( final FileSystem fs, final Path hbaseRootDir) throws IOException { Map<String, Integer> frags = new HashMap<String, Integer>(); int cfCountTotal = 0; int cfFragTotal = 0; DirFilter df = new DirFilter(fs); // presumes any directory under hbase.rootdir is a table FileStatus [] tableDirs = fs.listStatus(hbaseRootDir, df); for (FileStatus tableDir : tableDirs) { // Skip the .log directory. All others should be tables. Inside a table, // there are compaction.dir directories to skip. Otherwise, all else // should be regions. Then in each region, should only be family // directories. Under each of these, should be one file only. Path d = tableDir.getPath(); if (d.getName().equals(HConstants.HREGION_LOGDIR_NAME)) { continue; } int cfCount = 0; int cfFrag = 0; FileStatus[] regionDirs = fs.listStatus(d, df); for (FileStatus regionDir : regionDirs) { Path dd = regionDir.getPath(); if (dd.getName().equals(HConstants.HREGION_COMPACTIONDIR_NAME)) { continue; } // else its a region name, now look in region for families FileStatus[] familyDirs = fs.listStatus(dd, df); for (FileStatus familyDir : familyDirs) { cfCount++; cfCountTotal++; Path family = familyDir.getPath(); // now in family make sure only one file FileStatus[] familyStatus = fs.listStatus(family); if (familyStatus.length > 1) { cfFrag++; cfFragTotal++; } } } // compute percentage per table and store in result list frags.put(d.getName(), Math.round((float) cfFrag / cfCount * 100)); } // set overall percentage for all tables frags.put("-TOTAL-", Math.round((float) cfFragTotal / cfCountTotal * 100)); return frags; } /** * Expects to find -ROOT- directory. * @param fs filesystem * @param hbaseRootDir hbase root directory * @return True if this a pre020 layout. * @throws IOException e */ public static boolean isPre020FileLayout(final FileSystem fs, final Path hbaseRootDir) throws IOException { Path mapfiles = new Path(new Path(new Path(new Path(hbaseRootDir, "-ROOT-"), "70236052"), "info"), "mapfiles"); return fs.exists(mapfiles); } /** * Runs through the hbase rootdir and checks all stores have only * one file in them -- that is, they've been major compacted. Looks * at root and meta tables too. This version differs from * {@link #isMajorCompacted(FileSystem, Path)} in that it expects a * pre-0.20.0 hbase layout on the filesystem. Used migrating. * @param fs filesystem * @param hbaseRootDir hbase root directory * @return True if this hbase install is major compacted. * @throws IOException e */ public static boolean isMajorCompactedPre020(final FileSystem fs, final Path hbaseRootDir) throws IOException { // Presumes any directory under hbase.rootdir is a table. FileStatus [] tableDirs = fs.listStatus(hbaseRootDir, new DirFilter(fs)); for (FileStatus tableDir : tableDirs) { // Inside a table, there are compaction.dir directories to skip. // Otherwise, all else should be regions. Then in each region, should // only be family directories. Under each of these, should be a mapfile // and info directory and in these only one file. Path d = tableDir.getPath(); if (d.getName().equals(HConstants.HREGION_LOGDIR_NAME)) { continue; } FileStatus[] regionDirs = fs.listStatus(d, new DirFilter(fs)); for (FileStatus regionDir : regionDirs) { Path dd = regionDir.getPath(); if (dd.getName().equals(HConstants.HREGION_COMPACTIONDIR_NAME)) { continue; } // Else its a region name. Now look in region for families. FileStatus[] familyDirs = fs.listStatus(dd, new DirFilter(fs)); for (FileStatus familyDir : familyDirs) { Path family = familyDir.getPath(); FileStatus[] infoAndMapfile = fs.listStatus(family); // Assert that only info and mapfile in family dir. if (infoAndMapfile.length != 0 && infoAndMapfile.length != 2) { LOG.debug(family.toString() + " has more than just info and mapfile: " + infoAndMapfile.length); return false; } // Make sure directory named info or mapfile. for (int ll = 0; ll < 2; ll++) { if (infoAndMapfile[ll].getPath().getName().equals("info") || infoAndMapfile[ll].getPath().getName().equals("mapfiles")) continue; LOG.debug("Unexpected directory name: " + infoAndMapfile[ll].getPath()); return false; } // Now in family, there are 'mapfile' and 'info' subdirs. Just // look in the 'mapfile' subdir. FileStatus[] familyStatus = fs.listStatus(new Path(family, "mapfiles")); if (familyStatus.length > 1) { LOG.debug(family.toString() + " has " + familyStatus.length + " files."); return false; } } } } return true; } /** * A {@link PathFilter} that returns directories. */ public static class DirFilter implements PathFilter { private final FileSystem fs; public DirFilter(final FileSystem fs) { this.fs = fs; } public boolean accept(Path p) { boolean isValid = false; try { if (HConstants.HBASE_NON_USER_TABLE_DIRS.contains(p.toString())) { isValid = false; } else { isValid = this.fs.getFileStatus(p).isDir(); } } catch (IOException e) { LOG.warn("An error occurred while verifying if [" + p.toString() + "] is a valid directory. Returning 'not valid' and continuing.", e); } return isValid; } } /** * Heuristic to determine whether is safe or not to open a file for append * Looks both for dfs.support.append and use reflection to search * for SequenceFile.Writer.syncFs() or FSDataOutputStream.hflush() * @param conf * @return True if append support */ public static boolean isAppendSupported(final Configuration conf) { boolean append = conf.getBoolean("dfs.support.append", false); if (append) { try { // TODO: The implementation that comes back when we do a createWriter // may not be using SequenceFile so the below is not a definitive test. // Will do for now (hdfs-200). SequenceFile.Writer.class.getMethod("syncFs", new Class<?> []{}); append = true; } catch (SecurityException e) { } catch (NoSuchMethodException e) { append = false; } } if (!append) { // Look for the 0.21, 0.22, new-style append evidence. try { FSDataOutputStream.class.getMethod("hflush", new Class<?> []{}); append = true; } catch (NoSuchMethodException e) { append = false; } } return append; } /** * @param conf * @return True if this filesystem whose scheme is 'hdfs'. * @throws IOException */ public static boolean isHDFS(final Configuration conf) throws IOException { FileSystem fs = FileSystem.get(conf); String scheme = fs.getUri().getScheme(); return scheme.equalsIgnoreCase("hdfs"); } /** * Recover file lease. Used when a file might be suspect * to be had been left open by another process. * @param fs FileSystem handle * @param p Path of file to recover lease * @param conf Configuration handle * @throws IOException */ public abstract void recoverFileLease(final FileSystem fs, final Path p, Configuration conf) throws IOException; /** * @param fs * @param rootdir * @return All the table directories under <code>rootdir</code>. Ignore non table hbase folders such as * .logs, .oldlogs, .corrupt, .META., and -ROOT- folders. * @throws IOException */ public static List<Path> getTableDirs(final FileSystem fs, final Path rootdir) throws IOException { // presumes any directory under hbase.rootdir is a table FileStatus [] dirs = fs.listStatus(rootdir, new DirFilter(fs)); List<Path> tabledirs = new ArrayList<Path>(dirs.length); for (FileStatus dir: dirs) { Path p = dir.getPath(); String tableName = p.getName(); if (!HConstants.HBASE_NON_USER_TABLE_DIRS.contains(tableName)) { tabledirs.add(p); } } return tabledirs; } public static Path getTablePath(Path rootdir, byte [] tableName) { return getTablePath(rootdir, Bytes.toString(tableName)); } public static Path getTablePath(Path rootdir, final String tableName) { return new Path(rootdir, tableName); } /** * Filter for all dirs that don't start with '.' */ public static class RegionDirFilter implements PathFilter { // This pattern will accept 0.90+ style hex region dirs and older numeric region dir names. final public static Pattern regionDirPattern = Pattern.compile("^[0-9a-f]*$"); final FileSystem fs; public RegionDirFilter(FileSystem fs) { this.fs = fs; } @Override public boolean accept(Path rd) { if (!regionDirPattern.matcher(rd.getName()).matches()) { return false; } try { return fs.getFileStatus(rd).isDir(); } catch (IOException ioe) { // Maybe the file was moved or the fs was disconnected. LOG.warn("Skipping file " + rd +" due to IOException", ioe); return false; } } } /** * Given a particular table dir, return all the regiondirs inside it, excluding files such as * .tableinfo * @param fs A file system for the Path * @param tableDir Path to a specific table directory <hbase.rootdir>/<tabledir> * @return List of paths to valid region directories in table dir. * @throws IOException */ public static List<Path> getRegionDirs(final FileSystem fs, final Path tableDir) throws IOException { // assumes we are in a table dir. FileStatus[] rds = fs.listStatus(tableDir, new RegionDirFilter(fs)); List<Path> regionDirs = new ArrayList<Path>(rds.length); for (FileStatus rdfs: rds) { Path rdPath = rdfs.getPath(); regionDirs.add(rdPath); } return regionDirs; } /** * Filter for all dirs that are legal column family names. This is generally used for colfam * dirs <hbase.rootdir>/<tabledir>/<regiondir>/<colfamdir>. */ public static class FamilyDirFilter implements PathFilter { final FileSystem fs; public FamilyDirFilter(FileSystem fs) { this.fs = fs; } @Override public boolean accept(Path rd) { try { // throws IAE if invalid HColumnDescriptor.isLegalFamilyName(Bytes.toBytes(rd.getName())); } catch (IllegalArgumentException iae) { // path name is an invalid family name and thus is excluded. return false; } try { return fs.getFileStatus(rd).isDir(); } catch (IOException ioe) { // Maybe the file was moved or the fs was disconnected. LOG.warn("Skipping file " + rd +" due to IOException", ioe); return false; } } } /** * Given a particular region dir, return all the familydirs inside it * * @param fs A file system for the Path * @param regionDir Path to a specific region directory * @return List of paths to valid family directories in region dir. * @throws IOException */ public static List<Path> getFamilyDirs(final FileSystem fs, final Path regionDir) throws IOException { // assumes we are in a region dir. FileStatus[] fds = fs.listStatus(regionDir, new FamilyDirFilter(fs)); List<Path> familyDirs = new ArrayList<Path>(fds.length); for (FileStatus fdfs: fds) { Path fdPath = fdfs.getPath(); familyDirs.add(fdPath); } return familyDirs; } /** * Filter for HFiles that excludes reference files. */ public static class HFileFilter implements PathFilter { // This pattern will accept 0.90+ style hex hfies files but reject reference files final public static Pattern hfilePattern = Pattern.compile("^([0-9a-f]+)$"); final FileSystem fs; public HFileFilter(FileSystem fs) { this.fs = fs; } @Override public boolean accept(Path rd) { if (!hfilePattern.matcher(rd.getName()).matches()) { return false; } try { // only files return !fs.getFileStatus(rd).isDir(); } catch (IOException ioe) { // Maybe the file was moved or the fs was disconnected. LOG.warn("Skipping file " + rd +" due to IOException", ioe); return false; } } } /** * @param conf * @return Returns the filesystem of the hbase rootdir. * @throws IOException */ public static FileSystem getCurrentFileSystem(Configuration conf) throws IOException { return getRootDir(conf).getFileSystem(conf); } /** * Runs through the HBase rootdir and creates a reverse lookup map for * table StoreFile names to the full Path. * <br> * Example...<br> * Key = 3944417774205889744 <br> * Value = hdfs://localhost:51169/user/userid/-ROOT-/70236052/info/3944417774205889744 * * @param fs The file system to use. * @param hbaseRootDir The root directory to scan. * @return Map keyed by StoreFile name with a value of the full Path. * @throws IOException When scanning the directory fails. */ public static Map<String, Path> getTableStoreFilePathMap( final FileSystem fs, final Path hbaseRootDir) throws IOException { Map<String, Path> map = new HashMap<String, Path>(); // if this method looks similar to 'getTableFragmentation' that is because // it was borrowed from it. DirFilter df = new DirFilter(fs); // presumes any directory under hbase.rootdir is a table FileStatus [] tableDirs = fs.listStatus(hbaseRootDir, df); for (FileStatus tableDir : tableDirs) { // Skip the .log directory. All others should be tables. Inside a table, // there are compaction.dir directories to skip. Otherwise, all else // should be regions. Path d = tableDir.getPath(); if (d.getName().equals(HConstants.HREGION_LOGDIR_NAME)) { continue; } FileStatus[] regionDirs = fs.listStatus(d, df); for (FileStatus regionDir : regionDirs) { Path dd = regionDir.getPath(); if (dd.getName().equals(HConstants.HREGION_COMPACTIONDIR_NAME)) { continue; } // else its a region name, now look in region for families FileStatus[] familyDirs = fs.listStatus(dd, df); for (FileStatus familyDir : familyDirs) { Path family = familyDir.getPath(); // now in family, iterate over the StoreFiles and // put in map FileStatus[] familyStatus = fs.listStatus(family); for (FileStatus sfStatus : familyStatus) { Path sf = sfStatus.getPath(); map.put( sf.getName(), sf); } } } } return map; } /** * Calls fs.listStatus() and treats FileNotFoundException as non-fatal * This accommodates differences between hadoop versions * * @param fs file system * @param dir directory * @param filter path filter * @return null if tabledir doesn't exist, otherwise FileStatus array */ public static FileStatus [] listStatus(final FileSystem fs, final Path dir, final PathFilter filter) throws IOException { FileStatus [] status = null; try { status = filter == null ? fs.listStatus(dir) : fs.listStatus(dir, filter); } catch (FileNotFoundException fnfe) { // if directory doesn't exist, return null LOG.info(dir + " doesn't exist"); } if (status == null || status.length < 1) return null; return status; } /** * Calls fs.listStatus() and treats FileNotFoundException as non-fatal * This would accommodates differences between hadoop versions * * @param fs file system * @param dir directory * @return null if tabledir doesn't exist, otherwise FileStatus array */ public static FileStatus[] listStatus(final FileSystem fs, final Path dir) throws IOException { return listStatus(fs, dir, null); } /** * Calls fs.delete() and returns the value returned by the fs.delete() * * @param fs * @param path * @param recursive * @return * @throws IOException */ public static boolean delete(final FileSystem fs, final Path path, final boolean recursive) throws IOException { return fs.delete(path, recursive); } /** * Calls fs.exists(). Checks if the specified path exists * * @param fs * @param path * @return * @throws IOException */ public static boolean isExists(final FileSystem fs, final Path path) throws IOException { return fs.exists(path); } /** * Log the current state of the filesystem from a certain root directory * @param fs filesystem to investigate * @param root root file/directory to start logging from * @param LOG log to output information * @throws IOException if an unexpected exception occurs */ public static void logFileSystemState(final FileSystem fs, final Path root, Log LOG) throws IOException { LOG.debug("Current file system:"); logFSTree(LOG, fs, root, "|-"); } /** * Throw an exception if an action is not permitted by a user on a file. * * @param ugi * the user * @param file * the file * @param action * the action */ public static void checkAccess(UserGroupInformation ugi, FileStatus file, FsAction action) throws AccessControlException { if (ugi.getShortUserName().equals(file.getOwner())) { if (file.getPermission().getUserAction().implies(action)) { return; } } else if (contains(ugi.getGroupNames(), file.getGroup())) { if (file.getPermission().getGroupAction().implies(action)) { return; } } else if (file.getPermission().getOtherAction().implies(action)) { return; } throw new AccessControlException("Permission denied:" + " action=" + action + " path=" + file.getPath() + " user=" + ugi.getShortUserName()); } private static boolean contains(String[] groups, String user) { for (String group : groups) { if (group.equals(user)) { return true; } } return false; } /** * Recursive helper to log the state of the FS * * @see #logFileSystemState(FileSystem, Path, Log) */ private static void logFSTree(Log LOG, final FileSystem fs, final Path root, String prefix) throws IOException { FileStatus[] files = FSUtils.listStatus(fs, root, null); if (files == null) return; for (FileStatus file : files) { if (file.isDir()) { LOG.debug(prefix + file.getPath().getName() + "/"); logFSTree(LOG, fs, file.getPath(), prefix + "---"); } else { LOG.debug(prefix + file.getPath().getName()); } } } }