MetaDataPrettyFormatUtils.java example

Explorer
hive-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.metadata.formatting;

import java.util.List;
import java.util.StringTokenizer;

import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.metastore.api.FieldSchema;

/**
 * This class provides methods to format the output of DESCRIBE PRETTY
 * in a human-readable way.
 */
public final class MetaDataPrettyFormatUtils {

  public static final int PRETTY_MAX_INTERCOL_SPACING = 4;
  private static final int PRETTY_ALIGNMENT = 10;
  /**
   * Minimum length of the comment column. This is relevant only when the terminal width
   * or hive.cli.pretty.output.num.cols is too small, or when there are very large column
   * names.
   * 10 was arbitrarily chosen.
   */
  private static final int MIN_COMMENT_COLUMN_LEN = 10;

  private MetaDataPrettyFormatUtils() {
  }

  /**
   * @param prettyOutputNumCols The pretty output is formatted to fit within
   * these many columns.
   */
  public static String getAllColumnsInformation(List<FieldSchema> cols,
      List<FieldSchema> partCols, int prettyOutputNumCols) {
    StringBuilder columnInformation = new StringBuilder(
        MetaDataFormatUtils.DEFAULT_STRINGBUILDER_SIZE);
    int maxColNameLen = findMaxColumnNameLen(cols);
    formatColumnsHeaderPretty(columnInformation, maxColNameLen, prettyOutputNumCols);
    formatAllFieldsPretty(columnInformation, cols, maxColNameLen, prettyOutputNumCols);

    if ((partCols != null) && (!partCols.isEmpty())) {
      columnInformation.append(MetaDataFormatUtils.LINE_DELIM)
                        .append("# Partition Information")
                        .append(MetaDataFormatUtils.LINE_DELIM);
      formatColumnsHeaderPretty(columnInformation, maxColNameLen, prettyOutputNumCols);
      formatAllFieldsPretty(columnInformation, partCols, maxColNameLen, prettyOutputNumCols);
    }

    return columnInformation.toString();
  }

  /**
   * Find the length of the largest column name.
   */
  private static int findMaxColumnNameLen(List<FieldSchema> cols) {
    int maxLen = -1;
    for (FieldSchema col : cols) {
      int colNameLen = col.getName().length();
      if (colNameLen > maxLen) {
        maxLen = colNameLen;
      }
    }
    return maxLen;
  }

  /**
   * @param maxColNameLen The length of the largest column name
   */
  private static void formatColumnsHeaderPretty(StringBuilder columnInformation,
      int maxColNameLen, int prettyOutputNumCols) {
    String columnHeaders[] = MetaDataFormatUtils.getColumnsHeader(null);
    formatOutputPretty(columnHeaders[0], columnHeaders[1], columnHeaders[2],
                        columnInformation, maxColNameLen, prettyOutputNumCols);
    columnInformation.append(MetaDataFormatUtils.FIELD_DELIM)
        .append(MetaDataFormatUtils.FIELD_DELIM).append(MetaDataFormatUtils.LINE_DELIM);
  }

  private static void formatAllFieldsPretty(StringBuilder tableInfo,
      List<FieldSchema> cols, int maxColNameLen, int prettyOutputNumCols) {
    for (FieldSchema col : cols) {
      formatOutputPretty(col.getName(), col.getType(),
          MetaDataFormatUtils.getComment(col), tableInfo, maxColNameLen,
          prettyOutputNumCols);
    }
  }

  /**
   * If the specified comment is too long, add line breaks at appropriate
   * locations.  Note that the comment may already include line-breaks
   * specified by the user at table creation time.
   * @param columnsAlreadyConsumed The number of columns on the current line
   * that have already been consumed by the column name, column type and
   * and the surrounding delimiters.
   * @return The comment with line breaks added at appropriate locations.
   */
  private static String breakCommentIntoMultipleLines(String comment,
      int columnsAlreadyConsumed, int prettyOutputNumCols) {

    if (prettyOutputNumCols == -1) {
      // XXX fixed to 80 to remove jline dep
      prettyOutputNumCols = 80 - 1;
    }

    int commentNumCols = prettyOutputNumCols - columnsAlreadyConsumed;
    if (commentNumCols < MIN_COMMENT_COLUMN_LEN) {
      commentNumCols = MIN_COMMENT_COLUMN_LEN;
    }

    // Track the number of columns allocated for the comment that have
    // already been consumed on the current line.
    int commentNumColsConsumed = 0;

    StringTokenizer st = new StringTokenizer(comment, " \t\n\r\f", true);
    // We use a StringTokenizer instead of a BreakIterator, because
    // table comments often contain text that looks like code. For eg:
    // 'Type0' => 0, // This is Type 0
    // 'Type1' => 1, // This is Type 1
    // BreakIterator is meant for regular text, and was found to give
    // bad line breaks when we tried it out.

    StringBuilder commentBuilder = new StringBuilder(comment.length());
    while (st.hasMoreTokens()) {
      String currWord = st.nextToken();
      if (currWord.equals("\n") || currWord.equals("\r") || currWord.equals("\f")) {
        commentBuilder.append(currWord);
        commentNumColsConsumed = 0;
        continue;
      }
      if (commentNumColsConsumed + currWord.length() > commentNumCols) {
        // currWord won't fit on the current line
        if (currWord.length() > commentNumCols) {
          // currWord is too long to split on a line even all by itself.
          // Hence we have no option but to split it.  The first chunk
          // will go to the end of the current line.  Subsequent chunks
          // will be of length commentNumCols.  The last chunk
          // may be smaller.
          while (currWord.length() > commentNumCols) {
            int remainingLineLen = commentNumCols - commentNumColsConsumed;
            String wordChunk = currWord.substring(0, remainingLineLen);
            commentBuilder.append(wordChunk);
            commentBuilder.append(MetaDataFormatUtils.LINE_DELIM);
            commentNumColsConsumed = 0;
            currWord = currWord.substring(remainingLineLen);
          }
          // Handle the last chunk
          if (currWord.length() > 0) {
            commentBuilder.append(currWord);
            commentNumColsConsumed = currWord.length();
          }
        } else {
          // Start on a new line
          commentBuilder.append(MetaDataFormatUtils.LINE_DELIM);
          if (!currWord.equals(" ")) {
            // When starting a new line, do not start with a space.
            commentBuilder.append(currWord);
            commentNumColsConsumed = currWord.length();
          } else {
            commentNumColsConsumed = 0;
          }
        }
      } else {
        commentBuilder.append(currWord);
        commentNumColsConsumed += currWord.length();
      }
    }
    return commentBuilder.toString();
  }

  /**
   * Appends the specified text with alignment to sb.
   * Also appends an appopriately sized delimiter.
   * @return The number of columns consumed by the aligned string and the
   * delimiter.
   */
  private static int appendFormattedColumn(StringBuilder sb, String text,
      int alignment) {
    String paddedText = String.format("%-" + alignment + "s", text);
    int delimCount = 0;
    if (paddedText.length() < alignment + PRETTY_MAX_INTERCOL_SPACING) {
      delimCount = (alignment + PRETTY_MAX_INTERCOL_SPACING)
                      - paddedText.length();
    } else {
      delimCount = PRETTY_MAX_INTERCOL_SPACING;
    }
    String delim = StringUtils.repeat(" ", delimCount);
    sb.append(paddedText);
    sb.append(delim);
    sb.append(MetaDataFormatUtils.FIELD_DELIM);

    return paddedText.length() + delim.length();
  }

  private static void formatOutputPretty(String colName, String colType,
      String colComment, StringBuilder tableInfo, int maxColNameLength,
      int prettyOutputNumCols) {
    int colsNameConsumed = appendFormattedColumn(tableInfo, colName, maxColNameLength + 1);
    int colsTypeConsumed =appendFormattedColumn(tableInfo, colType, PRETTY_ALIGNMENT);

    colComment = breakCommentIntoMultipleLines(colComment, colsNameConsumed + colsTypeConsumed,
        prettyOutputNumCols);

    /* Comment indent processing for multi-line comments.
     * Comments should be indented the same amount on each line
     * if the first line comment starts indented by k,
     * the following line comments should also be indented by k
     * The following line comments will as a new line,so we need to
     * add colsNameConsumed spaces as the first column and
     * colsTypeConsumed spaces as the second column and the
     * comment as the last column.we use two FIELD_DELIM to
     * split them.
     */
    String[] commentSegments = colComment.split("\n|\r|\r\n");
    tableInfo.append(trimTrailingWS(commentSegments[0]));
    tableInfo.append(MetaDataFormatUtils.LINE_DELIM);
    for (int i = 1; i < commentSegments.length; i++) {
      tableInfo.append(String.format(
          "%" + colsNameConsumed + "s" + MetaDataFormatUtils.FIELD_DELIM + "%" + colsTypeConsumed
              + "s" + MetaDataFormatUtils.FIELD_DELIM + "%s", "", "", commentSegments[i]));
      tableInfo.append(MetaDataFormatUtils.LINE_DELIM);
    }
  }

  private static String trimTrailingWS(String str) {
    return str.replaceAll("\\s+$", "");
  }
}