MySQLManager.java example

Explorer
sqoop-master
- sqoop-trunk
  - src
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.sqoop.manager;

import java.io.IOException;
import java.io.PrintWriter;
import java.net.URI;
import java.net.URISyntaxException;
import java.sql.DatabaseMetaData;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Types;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;

import org.apache.avro.Schema.Type;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import com.cloudera.sqoop.SqoopOptions;
import com.cloudera.sqoop.util.ImportException;
import com.cloudera.sqoop.util.ExportException;
import com.cloudera.sqoop.mapreduce.JdbcUpsertExportJob;
import org.apache.sqoop.mapreduce.mysql.MySQLUpsertOutputFormat;
import org.apache.sqoop.util.LoggingUtils;

/**
 * Manages connections to MySQL databases.
 */
public class MySQLManager
    extends com.cloudera.sqoop.manager.InformationSchemaManager {

  public static final Log LOG = LogFactory.getLog(MySQLManager.class.getName());

  // driver class to ensure is loaded when making db connection.
  private static final String DRIVER_CLASS = "com.mysql.jdbc.Driver";

  // set to true after we warn the user that we can use direct fastpath.
  private static boolean warningPrinted = false;

  private static final String EXPORT_OPERATION = "export";

  public MySQLManager(final SqoopOptions opts) {
    super(DRIVER_CLASS, opts);
  }

  @Override
  protected void initOptionDefaults() {
    if (options.getFetchSize() == null) {
      LOG.info("Preparing to use a MySQL streaming resultset.");
        String operation = options.getToolName();
      if (StringUtils.isNotBlank(operation) && operation.equalsIgnoreCase(EXPORT_OPERATION)) {
          // Set fetch size to zero for export operation, see SQOOP-2787 for more details. Setting fetchsize to 0
          // would not overwhelm the JVM as the queries during export are primarily metadata queries
          options.setFetchSize(0);
      } else {
          options.setFetchSize(Integer.MIN_VALUE);
      }
    } else if (
        !options.getFetchSize().equals(Integer.MIN_VALUE)
        && !options.getFetchSize().equals(0)) {
      LOG.info("Argument '--fetch-size " + options.getFetchSize()
          + "' will probably get ignored by MySQL JDBC driver.");
      // see also
      // http://dev.mysql.com/doc/refman/5.5/en
      //                       /connector-j-reference-implementation-notes.html
    }
  }

  @Override
  protected String getPrimaryKeyQuery(String tableName) {
    return "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS "
         + "WHERE TABLE_SCHEMA = (" + getSchemaQuery() + ") "
         + "AND TABLE_NAME = '"+tableName+"' "
         + "AND COLUMN_KEY = 'PRI'";
  }

  @Override
  protected String getColNamesQuery(String tableName) {
    // Use mysql-specific hints and LIMIT to return fast
    return "SELECT t.* FROM " + escapeTableName(tableName) + " AS t LIMIT 1";
  }

  @Override
  public void importTable(com.cloudera.sqoop.manager.ImportJobContext context)
      throws IOException, ImportException {

    // Check that we're not doing a MapReduce from localhost. If we are, point
    // out that we could use mysqldump.
    if (!MySQLManager.warningPrinted) {
      String connectString = context.getOptions().getConnectString();

      if (null != connectString) {
        // DirectMySQLManager will probably be faster.
        LOG.warn("It looks like you are importing from mysql.");
        LOG.warn("This transfer can be faster! Use the --direct");
        LOG.warn("option to exercise a MySQL-specific fast path.");

        MySQLManager.markWarningPrinted(); // don't display this twice.
      }
    }

    checkDateTimeBehavior(context);

    // Then run the normal importTable() method.
    super.importTable(context);
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public void upsertTable(com.cloudera.sqoop.manager.ExportJobContext context)
      throws IOException, ExportException {
    context.setConnManager(this);
    LOG.warn("MySQL Connector upsert functionality is using INSERT ON");
    LOG.warn("DUPLICATE KEY UPDATE clause that relies on table's unique key.");
    LOG.warn("Insert/update distinction is therefore independent on column");
    LOG.warn("names specified in --update-key parameter. Please see MySQL");
    LOG.warn("documentation for additional limitations.");

    JdbcUpsertExportJob exportJob =
      new JdbcUpsertExportJob(context, MySQLUpsertOutputFormat.class);
    exportJob.runExport();
  }

  @Override
  /**
   * {@inheritDoc}
   */
  public void configureDbOutputColumns(SqoopOptions options) {
    // In case that we're running upsert, we do not want to change column order
    // as we're actually going to use INSERT INTO ... ON DUPLICATE KEY UPDATE
    // clause.
    if (options.getUpdateMode() == SqoopOptions.UpdateMode.AllowInsert) {
      return;
    }

    super.configureDbOutputColumns(options);
  }

  /**
   * Set a flag to prevent printing the --direct warning twice.
   */
  protected static void markWarningPrinted() {
    MySQLManager.warningPrinted = true;
  }

  /**
   * MySQL allows TIMESTAMP fields to have the value '0000-00-00 00:00:00',
   * which causes errors in import. If the user has not set the
   * zeroDateTimeBehavior property already, we set it for them to coerce
   * the type to null.
   */
  private void checkDateTimeBehavior(ImportJobContext context) {
    final String ZERO_BEHAVIOR_STR = "zeroDateTimeBehavior";
    final String CONVERT_TO_NULL = "=convertToNull";

    String connectStr = context.getOptions().getConnectString();
    if (connectStr.indexOf("jdbc:") != 0) {
      // This connect string doesn't have the prefix we expect.
      // We can't parse the rest of it here.
      return;
    }

    // This starts with 'jdbc:mysql://' ... let's remove the 'jdbc:'
    // prefix so that java.net.URI can parse the rest of the line.
    String uriComponent = connectStr.substring(5);
    try {
      URI uri = new URI(uriComponent);
      String query = uri.getQuery(); // get the part after a '?'

      // If they haven't set the zeroBehavior option, set it to
      // squash-null for them.
      if (null == query) {
        connectStr = connectStr + "?" + ZERO_BEHAVIOR_STR + CONVERT_TO_NULL;
        LOG.info("Setting zero DATETIME behavior to convertToNull (mysql)");
      } else if (query.length() == 0) {
        connectStr = connectStr + ZERO_BEHAVIOR_STR + CONVERT_TO_NULL;
        LOG.info("Setting zero DATETIME behavior to convertToNull (mysql)");
      } else if (query.indexOf(ZERO_BEHAVIOR_STR) == -1) {
        if (!connectStr.endsWith("&")) {
          connectStr = connectStr + "&";
        }
        connectStr = connectStr + ZERO_BEHAVIOR_STR + CONVERT_TO_NULL;
        LOG.info("Setting zero DATETIME behavior to convertToNull (mysql)");
      }

      LOG.debug("Rewriting connect string to " + connectStr);
      context.getOptions().setConnectString(connectStr);
    } catch (URISyntaxException use) {
      // Just ignore this. If we can't parse the URI, don't attempt
      // to add any extra flags to it.
      LOG.debug("mysql: Couldn't parse connect str in checkDateTimeBehavior: "
          + use);
    }
  }

  @Override
  public void execAndPrint(String s) {
    // Override default execAndPrint() with a special version that forces
    // use of fully-buffered ResultSets (MySQLManager uses streaming ResultSets
    // in the default execute() method; but the execAndPrint() method needs to
    // issue overlapped queries for metadata.)

    ResultSet results = null;
    try {
      // Explicitly setting fetchSize to zero disables streaming.
      results = super.execute(s, 0);
    } catch (SQLException sqlE) {
      LoggingUtils.logAll(LOG, "Error executing statement: ", sqlE);
      release();
      return;
    }

    PrintWriter pw = new PrintWriter(System.out, true);
    try {
      formatAndPrintResultSet(results, pw);
    } finally {
      pw.close();
    }
  }

  /**
   * When using a column name in a generated SQL query, how (if at all)
   * should we escape that column name? e.g., a column named "table"
   * may need to be quoted with backtiks: "`table`".
   *
   * @param colName the column name as provided by the user, etc.
   * @return how the column name should be rendered in the sql text.
   */
  public String escapeColName(String colName) {
    if (null == colName) {
      return null;
    }
    return "`" + colName + "`";
  }

  /**
   * When using a table name in a generated SQL query, how (if at all)
   * should we escape that column name? e.g., a table named "table"
   * may need to be quoted with backtiks: "`table`".
   *
   * @param tableName the table name as provided by the user, etc.
   * @return how the table name should be rendered in the sql text.
   */
  public String escapeTableName(String tableName) {
    if (null == tableName) {
      return null;
    }
    return "`" + tableName + "`";
  }

  @Override
  public boolean supportsStagingForExport() {
    return true;
  }

  @Override
  public String[] getColumnNamesForProcedure(String procedureName) {
    List<String> ret = new ArrayList<String>();
    try {
      DatabaseMetaData metaData = this.getConnection().getMetaData();
      ResultSet results = metaData.getProcedureColumns(null, null,
        procedureName, null);
      if (null == results) {
        LOG.debug("Get Procedure Columns returns null");
        return null;
      }

      try {
        while (results.next()) {
          if (results.getInt("COLUMN_TYPE")
          != DatabaseMetaData.procedureColumnReturn) {
            String name = results.getString("COLUMN_NAME");
            ret.add(name);
          }
        }
        String[] result = ret.toArray(new String[ret.size()]);
        LOG.debug("getColumnsNamesForProcedure returns "
          + StringUtils.join(ret, ","));
        return result;
      } finally {
        results.close();
        getConnection().commit();
      }
    } catch (SQLException e) {
      LoggingUtils.logAll(LOG, "Error reading procedure metadata: ", e);
      throw new RuntimeException("Can't fetch column names for procedure.", e);
    }
  }

  @Override
  public Map<String, Integer> getColumnTypesForProcedure(String procedureName) {
    Map<String, Integer> ret = new TreeMap<String, Integer>();
    try {
      DatabaseMetaData metaData = this.getConnection().getMetaData();
      ResultSet results = metaData.getProcedureColumns(null, null,
        procedureName, null);
      if (null == results) {
        LOG.debug("getColumnTypesForProcedure returns null");
        return null;
      }

      try {
        while (results.next()) {
          if (results.getInt("COLUMN_TYPE")
          != DatabaseMetaData.procedureColumnReturn) {
            // we don't care if we get several rows for the
            // same ORDINAL_POSITION (e.g. like H2 gives us)
            // as we'll just overwrite the entry in the map:
            ret.put(
              results.getString("COLUMN_NAME"),
              results.getInt("DATA_TYPE"));
          }
        }

        LOG.debug("Columns returned = " + StringUtils.join(ret.keySet(), ","));
        LOG.debug("Types returned = " + StringUtils.join(ret.values(), ","));

        return ret.isEmpty() ? null : ret;
      } finally {
        if (results != null) {
          results.close();
        }
        getConnection().commit();
      }
    } catch (SQLException sqlException) {
      LoggingUtils.logAll(LOG, "Error reading primary key metadata: "
        + sqlException.toString(), sqlException);
      return null;
    }
  }

  @Override
  public Map<String, String>
    getColumnTypeNamesForProcedure(String procedureName) {
    Map<String, String> ret = new TreeMap<String, String>();
    try {
      DatabaseMetaData metaData = this.getConnection().getMetaData();
      ResultSet results = metaData.getProcedureColumns(null, null,
        procedureName, null);
      if (null == results) {
        LOG.debug("getColumnTypesForProcedure returns null");
        return null;
      }

      try {
        while (results.next()) {
          if (results.getInt("COLUMN_TYPE")
          != DatabaseMetaData.procedureColumnReturn) {
            // we don't care if we get several rows for the
            // same ORDINAL_POSITION (e.g. like H2 gives us)
            // as we'll just overwrite the entry in the map:
            ret.put(
              results.getString("COLUMN_NAME"),
              results.getString("TYPE_NAME"));
          }
        }

        LOG.debug("Columns returned = " + StringUtils.join(ret.keySet(), ","));
        LOG.debug(
          "Type names returned = " + StringUtils.join(ret.values(), ","));

        return ret.isEmpty() ? null : ret;
      } finally {
        if (results != null) {
          results.close();
        }
        getConnection().commit();
      }
    } catch (SQLException sqlException) {
      LoggingUtils.logAll(LOG, "Error reading primary key metadata: "
        + sqlException.toString(), sqlException);
      return null;
    }
  }

  @Override
  protected String getListDatabasesQuery() {
    return "SELECT SCHEMA_NAME FROM INFORMATION_SCHEMA.SCHEMATA";
  }

  @Override
  protected String getSchemaQuery() {
    return "SELECT SCHEMA()";
  }

  private Map<String, String> colTypeNames;
  private static final int YEAR_TYPE_OVERWRITE = Types.SMALLINT;

  private int overrideSqlType(String tableName, String columnName,
      int sqlType) {

    if (colTypeNames == null) {
      colTypeNames = getColumnTypeNames(tableName, options.getCall(),
        options.getSqlQuery());
    }

    if ("YEAR".equalsIgnoreCase(colTypeNames.get(columnName))) {
      sqlType = YEAR_TYPE_OVERWRITE;
    }
    return sqlType;
  }

  @Override
  public String toJavaType(String tableName, String columnName, int sqlType) {
    sqlType = overrideSqlType(tableName, columnName, sqlType);
    return super.toJavaType(tableName, columnName, sqlType);
  }

  @Override
  public String toHiveType(String tableName, String columnName, int sqlType) {
    sqlType = overrideSqlType(tableName, columnName, sqlType);
    return super.toHiveType(tableName, columnName, sqlType);
  }

  @Override
  public Type toAvroType(String tableName, String columnName, int sqlType) {
    sqlType = overrideSqlType(tableName, columnName, sqlType);
    return super.toAvroType(tableName, columnName, sqlType);
  }
}