/* Copyright (c) 2005 - 2012 Vertica, an HP company -*- Java -*- */ package com.vertica.hadoop.deprecated; import java.io.ByteArrayInputStream; import java.io.IOException; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.DatabaseMetaData; import java.sql.ResultSet; import java.text.ParseException; import java.util.Vector; import java.util.Map; import java.util.HashMap; import java.util.StringTokenizer; import java.util.regex.Pattern; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.RecordWriter; import org.apache.hadoop.mapred.Reporter; import com.vertica.hadoop.VerticaRecord; import com.vertica.hadoop.Relation; import com.vertica.hadoop.VerticaConfiguration; public class VerticaStreamingRecordWriter implements RecordWriter<Text, Text> { private static final Log LOG = LogFactory.getLog("com.vertica.hadoop"); Relation vTable = null; Connection connection = null; PreparedStatement statement = null; long batchSize = 0; long numRecords = 0; String delimiter = VerticaConfiguration.DELIMITER; String terminator = VerticaConfiguration.RECORD_TERMINATOR; VerticaRecord record = null; public VerticaStreamingRecordWriter(Connection conn, VerticaConfiguration vtconfig) throws IOException, SQLException, ClassNotFoundException { connection = conn; batchSize = vtconfig.getBatchSize(); delimiter = Pattern.quote(vtconfig.getOutputDelimiter()); terminator = vtconfig.getOutputRecordTerminator(); vTable = new Relation(vtconfig.getOutputTableName()); StringBuilder sb = new StringBuilder(); sb.append("INSERT INTO "); sb.append(vTable.getQualifiedName()); StringBuilder values = new StringBuilder(); values.append(" VALUES("); sb.append("("); String metaStmt = "select ordinal_position, column_name, data_type, is_identity, data_type_name " + "from v_catalog.odbc_columns " + "where schema_name = ? and table_name = ? " + "order by ordinal_position;"; PreparedStatement stmt = conn.prepareStatement(metaStmt); stmt.setString(1, vTable.getSchema()); stmt.setString(2, vTable.getTable()); ResultSet rs = stmt.executeQuery(); boolean addComma = false; while (rs.next()) { if (!rs.getBoolean(4)) { if (addComma) { sb.append(','); values.append(','); } sb.append(rs.getString(2)); values.append('?'); addComma = true; } else { LOG.debug("Skipping identity column " + rs.getString(4)); } } sb.append(')'); values.append(')'); sb.append(values.toString()); statement = conn.prepareStatement(sb.toString()); record = new VerticaRecord(vtconfig.getConfiguration()); } @Override public void close(Reporter arg0) throws IOException { try { statement.executeBatch(); connection.close(); } catch (Exception e) { throw new IOException(e); } } @Override public void write(Text table, Text row) throws IOException { try { String line = row.toString(); line = line.replaceAll(terminator + "$", ""); if (line.length() == 0) return; String [] tokens = line.split(delimiter); for(int count = 0; count < tokens.length; count++) { record.setFromString(count, tokens[count]); } record.write(statement); numRecords++; if (numRecords % batchSize == 0) { statement.executeBatch(); } } catch (SQLException e) { e.printStackTrace(); throw new IOException(e); } catch (ParseException e) { e.printStackTrace(); throw new IOException(e); } } }