VerticaUtil.java example

Explorer
yarn-comment-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.vertica;

import java.io.IOException;
import java.sql.Connection;
import java.sql.DatabaseMetaData;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.conf.Configuration;

public class VerticaUtil {
  private static final Log LOG = LogFactory.getLog(VerticaUtil.class);

  public static int verticaVersion(Configuration conf, boolean output) throws IOException {
    int ver = -1;
    try {
    VerticaConfiguration vtconfig = new VerticaConfiguration(conf);
    Connection conn = vtconfig.getConnection(output);
    DatabaseMetaData dbmd = conn.getMetaData();
    ver = dbmd.getDatabaseMajorVersion() * 100;
    ver += dbmd.getDatabaseMinorVersion();
    } catch(ClassNotFoundException e) { 
      throw new IOException("Vertica Driver required to use Vertica Input or Output Formatters"); 
    } catch (SQLException e) { throw new IOException(e); }
    return ver;
  }
  
  public static void checkOutputSpecs(Configuration conf) throws IOException {
    VerticaConfiguration vtconfig = new VerticaConfiguration(conf);

    String writerTable = vtconfig.getOutputTableName();
    if (writerTable == null)
      throw new IOException("Vertica output requires a table name defined by "
          + VerticaConfiguration.OUTPUT_TABLE_NAME_PROP);
    String[] def = vtconfig.getOutputTableDef();
    boolean dropTable = vtconfig.getDropTable();

    String schema = null;
    String table = null;
    String[] schemaTable = writerTable.split("\\.");
    if (schemaTable.length == 2) {
      schema = schemaTable[0];
      table = schemaTable[1];
    } else
      table = schemaTable[0];

    Statement stmt = null;
    try {
      Connection conn = vtconfig.getConnection(true);
      DatabaseMetaData dbmd = conn.getMetaData();
      ResultSet rs = dbmd.getTables(null, schema, table, null);
      boolean tableExists = rs.next();

      stmt = conn.createStatement();

      if (tableExists && dropTable) {
        if(verticaVersion(conf, true) >= 305) {
          stmt = conn.createStatement();
          stmt.execute("TRUNCATE TABLE " + writerTable);
        } else {
          // for version < 3.0 drop the table if it exists
          // if def is empty, grab the columns first to redfine the table
          if (def == null) {
            rs = dbmd.getColumns(null, schema, table, null);
            ArrayList<String> defs = new ArrayList<String>();
            while (rs.next())
              defs.add(rs.getString(4) + " " + rs.getString(5));
            def = defs.toArray(new String[0]);
          }
  
          stmt = conn.createStatement();
          stmt.execute("DROP TABLE " + writerTable + " CASCADE");
          tableExists = false; // force create
        }
      }

      // create table if it doesn't exist
      if (!tableExists) {
        if (def == null)
          throw new RuntimeException("Table " + writerTable
              + " does not exist and no table definition provided");
        if (schema != null) {
          rs = dbmd.getSchemas(null, schema);
          if (!rs.next())
            stmt.execute("CREATE SCHEMA " + schema);
        }
        StringBuffer tabledef = new StringBuffer("CREATE TABLE ").append(
            writerTable).append(" (");
        for (String column : def)
          tabledef.append(column).append(",");
        tabledef.replace(tabledef.length() - 1, tabledef.length(), ")");

        stmt.execute(tabledef.toString());
        // TODO: create segmented projections
        stmt.execute("select implement_temp_design('" + writerTable + "')");
      }
    } catch (Exception e) {
      throw new RuntimeException(e);
    } finally {
      if (stmt != null)
        try {
          stmt.close();
        } catch (SQLException e) {
          throw new RuntimeException(e);
        }
    }
  }

  // TODO: catch when params required but missing
  // TODO: better error message when count query is bad
  public static List<InputSplit> getSplits(JobContext context)
      throws IOException {
    Configuration conf = context.getConfiguration();
    int numSplits = conf.getInt("mapreduce.job.maps", 1);
    LOG.debug("creating splits up to " + numSplits);
    List<InputSplit> splits = new ArrayList<InputSplit>();
    int i = 0;
    long start = 0;
    long end = 0;
    boolean limitOffset = true;

    // This is the fancy part of mapping inputs...here's how we figure out
    // splits
    // get the params query or the params
    VerticaConfiguration config = new VerticaConfiguration(conf);
    String inputQuery = config.getInputQuery();

    if (inputQuery == null)
      throw new IOException("Vertica input requires query defined by "
          + VerticaConfiguration.QUERY_PROP);

    String paramsQuery = config.getParamsQuery();
    Collection<List<Object>> params = config.getInputParameters();

    // TODO: limit needs order by unique key
    // TODO: what if there are more parameters than numsplits?
    // prep a count(*) wrapper query and then populate the bind params for each
    String countQuery = "SELECT COUNT(*) FROM (\n" + inputQuery + "\n) count";

    if (paramsQuery != null) {
      LOG.debug("creating splits using paramsQuery :" + paramsQuery);
      Connection conn = null;
      Statement stmt = null;
      try {
        conn = config.getConnection(false);
        stmt = conn.createStatement();
        ResultSet rs = stmt.executeQuery(paramsQuery);
        ResultSetMetaData rsmd = rs.getMetaData();
        while (rs.next()) {
          limitOffset = false;
          List<Object> segmentParams = new ArrayList<Object>();
          for (int j = 1; j <= rsmd.getColumnCount(); j++) {
            segmentParams.add(rs.getObject(j));
          }
          splits.add(new VerticaInputSplit(inputQuery, segmentParams, start,
              end));
        }
      } catch (Exception e) {
        throw new IOException(e);
      } finally {
        try {
          if (stmt != null)
            stmt.close();
        } catch (SQLException e) {
          throw new IOException(e);
        }
      }
    } else if (params != null && params.size() > 0) {
      LOG.debug("creating splits using " + params.size() + " params");
      limitOffset = false;
      for (List<Object> segmentParams : params) {
        // if there are more numSplits than params we're going to introduce some
        // limit and offsets
        // TODO: write code to generate the start/end pairs for each group
        splits
            .add(new VerticaInputSplit(inputQuery, segmentParams, start, end));
      }
    }

    if (limitOffset) {
      LOG.debug("creating splits using limit and offset");
      Connection conn = null;
      Statement stmt = null;
      long count = 0;

      try {
        conn = config.getConnection(false);
        stmt = conn.createStatement();
        ResultSet rs = stmt.executeQuery(countQuery);
        rs.next();
        count = rs.getLong(1);
      } catch (Exception e) {
        throw new IOException(e);
      } finally {
        try {
          if (stmt != null)
            stmt.close();
        } catch (SQLException e) {
          throw new IOException(e);
        }
      }

      long splitSize = count / numSplits;
      end = splitSize;

      LOG.debug("creating " + numSplits + " splits for " + count + " records");

      for (i = 0; i < numSplits; i++) {
        splits.add(new VerticaInputSplit(inputQuery, null, start, end));
        start += splitSize;
        end += splitSize;
      }
    }

    LOG.debug("returning " + splits.size() + " final splits");
    return splits;
  }
}