/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.util; import java.io.FileNotFoundException; import java.io.IOException; import java.util.Arrays; import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.TreeMap; import java.util.concurrent.ConcurrentHashMap; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.lang.NotImplementedException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.hbase.DeserializationException; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.TableDescriptors; import org.apache.hadoop.hbase.TableInfoMissingException; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import com.google.common.primitives.Ints; /** * Implementation of {@link TableDescriptors} that reads descriptors from the * passed filesystem. It expects descriptors to be in a file under the * table's directory in FS. Can be read-only -- i.e. does not modify * the filesystem or can be read and write. * * <p>Also has utility for keeping up the table descriptors tableinfo file. * The table schema file is kept under the table directory in the filesystem. * It has a {@link #TABLEINFO_NAME} prefix and then a suffix that is the * edit sequenceid: e.g. <code>.tableinfo.0000000003</code>. This sequenceid * is always increasing. It starts at zero. The table schema file with the * highest sequenceid has the most recent schema edit. Usually there is one file * only, the most recent but there may be short periods where there are more * than one file. Old files are eventually cleaned. Presumption is that there * will not be lots of concurrent clients making table schema edits. If so, * the below needs a bit of a reworking and perhaps some supporting api in hdfs. */ @InterfaceAudience.Private public class FSTableDescriptors implements TableDescriptors { private static final Log LOG = LogFactory.getLog(FSTableDescriptors.class); private final FileSystem fs; private final Path rootdir; private final boolean fsreadonly; long cachehits = 0; long invocations = 0; /** The file name used to store HTD in HDFS */ public static final String TABLEINFO_NAME = ".tableinfo"; // This cache does not age out the old stuff. Thinking is that the amount // of data we keep up in here is so small, no need to do occasional purge. // TODO. private final Map<String, TableDescriptorModtime> cache = new ConcurrentHashMap<String, TableDescriptorModtime>(); /** * Data structure to hold modification time and table descriptor. */ static class TableDescriptorModtime { private final HTableDescriptor descriptor; private final long modtime; TableDescriptorModtime(final long modtime, final HTableDescriptor htd) { this.descriptor = htd; this.modtime = modtime; } long getModtime() { return this.modtime; } HTableDescriptor getTableDescriptor() { return this.descriptor; } } public FSTableDescriptors(final FileSystem fs, final Path rootdir) { this(fs, rootdir, false); } /** * @param fs * @param rootdir * @param fsreadOnly True if we are read-only when it comes to filesystem * operations; i.e. on remove, we do not do delete in fs. */ public FSTableDescriptors(final FileSystem fs, final Path rootdir, final boolean fsreadOnly) { super(); this.fs = fs; this.rootdir = rootdir; this.fsreadonly = fsreadOnly; } /* (non-Javadoc) * @see org.apache.hadoop.hbase.TableDescriptors#getHTableDescriptor(java.lang.String) */ @Override public HTableDescriptor get(final byte [] tablename) throws IOException { return get(Bytes.toString(tablename)); } /* (non-Javadoc) * @see org.apache.hadoop.hbase.TableDescriptors#getTableDescriptor(byte[]) */ @Override public HTableDescriptor get(final String tablename) throws IOException { invocations++; if (HTableDescriptor.ROOT_TABLEDESC.getNameAsString().equals(tablename)) { cachehits++; return HTableDescriptor.ROOT_TABLEDESC; } if (HTableDescriptor.META_TABLEDESC.getNameAsString().equals(tablename)) { cachehits++; return HTableDescriptor.META_TABLEDESC; } // .META. and -ROOT- is already handled. If some one tries to get the descriptor for // .logs, .oldlogs or .corrupt throw an exception. if (HConstants.HBASE_NON_USER_TABLE_DIRS.contains(tablename)) { throw new IOException("No descriptor found for table = " + tablename); } // Look in cache of descriptors. TableDescriptorModtime cachedtdm = this.cache.get(tablename); if (cachedtdm != null) { // Check mod time has not changed (this is trip to NN). if (getTableInfoModtime(this.fs, this.rootdir, tablename) <= cachedtdm.getModtime()) { cachehits++; return cachedtdm.getTableDescriptor(); } } TableDescriptorModtime tdmt = null; try { tdmt = getTableDescriptorModtime(this.fs, this.rootdir, tablename); } catch (NullPointerException e) { LOG.debug("Exception during readTableDecriptor. Current table name = " + tablename, e); } catch (IOException ioe) { LOG.debug("Exception during readTableDecriptor. Current table name = " + tablename, ioe); } if (tdmt == null) { LOG.warn("The following folder is in HBase's root directory and " + "doesn't contain a table descriptor, " + "do consider deleting it: " + tablename); } else { this.cache.put(tablename, tdmt); } return tdmt == null ? null : tdmt.getTableDescriptor(); } /* (non-Javadoc) * @see org.apache.hadoop.hbase.TableDescriptors#getTableDescriptors(org.apache.hadoop.fs.FileSystem, org.apache.hadoop.fs.Path) */ @Override public Map<String, HTableDescriptor> getAll() throws IOException { Map<String, HTableDescriptor> htds = new TreeMap<String, HTableDescriptor>(); List<Path> tableDirs = FSUtils.getTableDirs(fs, rootdir); for (Path d: tableDirs) { HTableDescriptor htd = null; try { htd = get(d.getName()); } catch (FileNotFoundException fnfe) { // inability of retrieving one HTD shouldn't stop getting the remaining LOG.warn("Trouble retrieving htd", fnfe); } if (htd == null) continue; htds.put(d.getName(), htd); } return htds; } @Override public void add(HTableDescriptor htd) throws IOException { if (Bytes.equals(HConstants.ROOT_TABLE_NAME, htd.getName())) { throw new NotImplementedException(); } if (Bytes.equals(HConstants.META_TABLE_NAME, htd.getName())) { throw new NotImplementedException(); } if (HConstants.HBASE_NON_USER_TABLE_DIRS.contains(htd.getNameAsString())) { throw new NotImplementedException(); } if (!this.fsreadonly) updateHTableDescriptor(this.fs, this.rootdir, htd); long modtime = getTableInfoModtime(this.fs, this.rootdir, htd.getNameAsString()); this.cache.put(htd.getNameAsString(), new TableDescriptorModtime(modtime, htd)); } @Override public HTableDescriptor remove(final String tablename) throws IOException { if (!this.fsreadonly) { Path tabledir = FSUtils.getTablePath(this.rootdir, tablename); if (this.fs.exists(tabledir)) { if (!this.fs.delete(tabledir, true)) { throw new IOException("Failed delete of " + tabledir.toString()); } } } TableDescriptorModtime tdm = this.cache.remove(tablename); return tdm == null ? null : tdm.getTableDescriptor(); } /** * Checks if <code>.tableinfo<code> exists for given table * * @param fs file system * @param rootdir root directory of HBase installation * @param tableName name of table * @return true if exists * @throws IOException */ public static boolean isTableInfoExists(FileSystem fs, Path rootdir, String tableName) throws IOException { FileStatus status = getTableInfoPath(fs, rootdir, tableName); return status == null? false: fs.exists(status.getPath()); } private static FileStatus getTableInfoPath(final FileSystem fs, final Path rootdir, final String tableName) throws IOException { Path tabledir = FSUtils.getTablePath(rootdir, tableName); return getTableInfoPath(fs, tabledir); } /** * Looks under the table directory in the filesystem for files with a * {@link #TABLEINFO_NAME} prefix. Returns reference to the 'latest' instance. * @param fs * @param tabledir * @return The 'current' tableinfo file. * @throws IOException */ public static FileStatus getTableInfoPath(final FileSystem fs, final Path tabledir) throws IOException { FileStatus [] status = FSUtils.listStatus(fs, tabledir, new PathFilter() { @Override public boolean accept(Path p) { // Accept any file that starts with TABLEINFO_NAME return p.getName().startsWith(TABLEINFO_NAME); } }); if (status == null || status.length < 1) return null; Arrays.sort(status, new FileStatusFileNameComparator()); if (status.length > 1) { // Clean away old versions of .tableinfo for (int i = 1; i < status.length; i++) { Path p = status[i].getPath(); // Clean up old versions if (!fs.delete(p, false)) { LOG.warn("Failed cleanup of " + p); } else { LOG.debug("Cleaned up old tableinfo file " + p); } } } return status[0]; } /** * Compare {@link FileStatus} instances by {@link Path#getName()}. * Returns in reverse order. */ static class FileStatusFileNameComparator implements Comparator<FileStatus> { @Override public int compare(FileStatus left, FileStatus right) { return -left.compareTo(right); } } /** * Width of the sequenceid that is a suffix on a tableinfo file. */ static final int WIDTH_OF_SEQUENCE_ID = 10; /* * @param number Number to use as suffix. * @return Returns zero-prefixed 5-byte wide decimal version of passed * number (Does absolute in case number is negative). */ static String formatTableInfoSequenceId(final int number) { byte [] b = new byte[WIDTH_OF_SEQUENCE_ID]; int d = Math.abs(number); for (int i = b.length - 1; i >= 0; i--) { b[i] = (byte)((d % 10) + '0'); d /= 10; } return Bytes.toString(b); } /** * Regex to eat up sequenceid suffix on a .tableinfo file. * Use regex because may encounter oldstyle .tableinfos where there is no * sequenceid on the end. */ private static final Pattern SUFFIX = Pattern.compile(TABLEINFO_NAME + "(\\.([0-9]{" + WIDTH_OF_SEQUENCE_ID + "}))?$"); /** * @param p Path to a <code>.tableinfo</code> file. * @return The current editid or 0 if none found. */ static int getTableInfoSequenceid(final Path p) { if (p == null) return 0; Matcher m = SUFFIX.matcher(p.getName()); if (!m.matches()) throw new IllegalArgumentException(p.toString()); String suffix = m.group(2); if (suffix == null || suffix.length() <= 0) return 0; return Integer.parseInt(m.group(2)); } /** * @param tabledir * @param sequenceid * @return Name of tableinfo file. */ static Path getTableInfoFileName(final Path tabledir, final int sequenceid) { return new Path(tabledir, TABLEINFO_NAME + "." + formatTableInfoSequenceId(sequenceid)); } /** * @param fs * @param rootdir * @param tableName * @return Modification time for the table {@link #TABLEINFO_NAME} file * or <code>0</code> if no tableinfo file found. * @throws IOException */ static long getTableInfoModtime(final FileSystem fs, final Path rootdir, final String tableName) throws IOException { FileStatus status = getTableInfoPath(fs, rootdir, tableName); return status == null? 0: status.getModificationTime(); } /** * Get HTD from HDFS. * @param fs * @param hbaseRootDir * @param tableName * @return Descriptor or null if none found. * @throws IOException */ public static HTableDescriptor getTableDescriptor(FileSystem fs, Path hbaseRootDir, byte[] tableName) throws IOException { HTableDescriptor htd = null; try { TableDescriptorModtime tdmt = getTableDescriptorModtime(fs, hbaseRootDir, Bytes.toString(tableName)); htd = tdmt == null ? null : tdmt.getTableDescriptor(); } catch (NullPointerException e) { LOG.debug("Exception during readTableDecriptor. Current table name = " + Bytes.toString(tableName), e); } return htd; } static HTableDescriptor getTableDescriptor(FileSystem fs, Path hbaseRootDir, String tableName) throws NullPointerException, IOException { TableDescriptorModtime tdmt = getTableDescriptorModtime(fs, hbaseRootDir, tableName); return tdmt == null ? null : tdmt.getTableDescriptor(); } static TableDescriptorModtime getTableDescriptorModtime(FileSystem fs, Path hbaseRootDir, String tableName) throws NullPointerException, IOException{ // ignore both -ROOT- and .META. tables if (Bytes.compareTo(Bytes.toBytes(tableName), HConstants.ROOT_TABLE_NAME) == 0 || Bytes.compareTo(Bytes.toBytes(tableName), HConstants.META_TABLE_NAME) == 0) { return null; } return getTableDescriptorModtime(fs, FSUtils.getTablePath(hbaseRootDir, tableName)); } static TableDescriptorModtime getTableDescriptorModtime(FileSystem fs, Path tableDir) throws NullPointerException, IOException { if (tableDir == null) throw new NullPointerException(); FileStatus status = getTableInfoPath(fs, tableDir); if (status == null) { throw new TableInfoMissingException("No .tableinfo file under " + tableDir.toUri()); } int len = Ints.checkedCast(status.getLen()); byte [] content = new byte[len]; FSDataInputStream fsDataInputStream = fs.open(status.getPath()); try { fsDataInputStream.readFully(content); } finally { fsDataInputStream.close(); } HTableDescriptor htd = null; try { htd = HTableDescriptor.parseFrom(content); } catch (DeserializationException e) { throw new IOException("content=" + Bytes.toShort(content), e); } if (!ProtobufUtil.isPBMagicPrefix(content)) { // Convert the file over to be pb before leaving here. createTableDescriptor(fs, tableDir.getParent(), htd, true); } return new TableDescriptorModtime(status.getModificationTime(), htd); } public static HTableDescriptor getTableDescriptor(FileSystem fs, Path tableDir) throws IOException, NullPointerException { TableDescriptorModtime tdmt = getTableDescriptorModtime(fs, tableDir); return tdmt == null ? null : tdmt.getTableDescriptor(); } /** * Update table descriptor * @param fs * @param conf * @param hTableDescriptor * @return New tableinfo or null if we failed update. * @throws IOException Thrown if failed update. */ static Path updateHTableDescriptor(FileSystem fs, Path rootdir, HTableDescriptor hTableDescriptor) throws IOException { Path tableDir = FSUtils.getTablePath(rootdir, hTableDescriptor.getName()); Path p = writeTableDescriptor(fs, hTableDescriptor, tableDir, getTableInfoPath(fs, tableDir)); if (p == null) throw new IOException("Failed update"); LOG.info("Updated tableinfo=" + p); return p; } /** * Deletes a table's directory from the file system if exists. Used in unit * tests. */ public static void deleteTableDescriptorIfExists(String tableName, Configuration conf) throws IOException { FileSystem fs = FSUtils.getCurrentFileSystem(conf); FileStatus status = getTableInfoPath(fs, FSUtils.getRootDir(conf), tableName); // The below deleteDirectory works for either file or directory. if (status != null && fs.exists(status.getPath())) { FSUtils.deleteDirectory(fs, status.getPath()); } } /** * @param fs * @param hTableDescriptor * @param tableDir * @param status * @return Descriptor file or null if we failed write. * @throws IOException */ private static Path writeTableDescriptor(final FileSystem fs, final HTableDescriptor hTableDescriptor, final Path tableDir, final FileStatus status) throws IOException { // Get temporary dir into which we'll first write a file to avoid half-written file phenomenon. Path tmpTableDir = new Path(tableDir, ".tmp"); // What is current sequenceid? We read the current sequenceid from // the current file. After we read it, another thread could come in and // compete with us writing out next version of file. The below retries // should help in this case some but its hard to do guarantees in face of // concurrent schema edits. int currentSequenceid = status == null? 0: getTableInfoSequenceid(status.getPath()); int sequenceid = currentSequenceid; // Put arbitrary upperbound on how often we retry int retries = 10; int retrymax = currentSequenceid + retries; Path tableInfoPath = null; do { sequenceid += 1; Path p = getTableInfoFileName(tmpTableDir, sequenceid); if (fs.exists(p)) { LOG.debug(p + " exists; retrying up to " + retries + " times"); continue; } try { writeHTD(fs, p, hTableDescriptor); tableInfoPath = getTableInfoFileName(tableDir, sequenceid); if (!fs.rename(p, tableInfoPath)) { throw new IOException("Failed rename of " + p + " to " + tableInfoPath); } } catch (IOException ioe) { // Presume clash of names or something; go around again. LOG.debug("Failed write and/or rename; retrying", ioe); if (!FSUtils.deleteDirectory(fs, p)) { LOG.warn("Failed cleanup of " + p); } tableInfoPath = null; continue; } // Cleanup old schema file. if (status != null) { if (!FSUtils.deleteDirectory(fs, status.getPath())) { LOG.warn("Failed delete of " + status.getPath() + "; continuing"); } } break; } while (sequenceid < retrymax); return tableInfoPath; } private static void writeHTD(final FileSystem fs, final Path p, final HTableDescriptor htd) throws IOException { FSDataOutputStream out = fs.create(p, false); try { // We used to write this file out as a serialized HTD Writable followed by two '\n's and then // the toString version of HTD. Now we just write out the pb serialization. out.write(htd.toByteArray()); } finally { out.close(); } } /** * Create new HTableDescriptor in HDFS. Happens when we are creating table. * * @param htableDescriptor * @param conf */ public static boolean createTableDescriptor(final HTableDescriptor htableDescriptor, Configuration conf) throws IOException { return createTableDescriptor(htableDescriptor, conf, false); } /** * Create new HTableDescriptor in HDFS. Happens when we are creating table. If * forceCreation is true then even if previous table descriptor is present it * will be overwritten * * @param htableDescriptor * @param conf * @param forceCreation True if we are to overwrite existing file. */ static boolean createTableDescriptor(final HTableDescriptor htableDescriptor, final Configuration conf, boolean forceCreation) throws IOException { FileSystem fs = FSUtils.getCurrentFileSystem(conf); return createTableDescriptor(fs, FSUtils.getRootDir(conf), htableDescriptor, forceCreation); } /** * Create new HTableDescriptor in HDFS. Happens when we are creating table. * Used by tests. * @param fs * @param htableDescriptor * @param rootdir */ public static boolean createTableDescriptor(FileSystem fs, Path rootdir, HTableDescriptor htableDescriptor) throws IOException { return createTableDescriptor(fs, rootdir, htableDescriptor, false); } /** * Create new HTableDescriptor in HDFS. Happens when we are creating table. If * forceCreation is true then even if previous table descriptor is present it * will be overwritten * * @param fs * @param htableDescriptor * @param rootdir * @param forceCreation * @return True if we successfully created file. */ public static boolean createTableDescriptor(FileSystem fs, Path rootdir, HTableDescriptor htableDescriptor, boolean forceCreation) throws IOException { FileStatus status = getTableInfoPath(fs, rootdir, htableDescriptor.getNameAsString()); if (status != null) { LOG.info("Current tableInfoPath = " + status.getPath()); if (!forceCreation) { if (fs.exists(status.getPath()) && status.getLen() > 0) { LOG.info("TableInfo already exists.. Skipping creation"); return false; } } } Path p = writeTableDescriptor(fs, htableDescriptor, FSUtils.getTablePath(rootdir, htableDescriptor.getNameAsString()), status); return p != null; } }