package com.linkedin.databus.bootstrap.producer;
/*
*
* Copyright 2013 LinkedIn Corp. All rights reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/
import java.nio.ByteBuffer;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.apache.avro.Schema;
import org.apache.log4j.Logger;
import com.linkedin.databus.bootstrap.api.BootstrapProducerStatus;
import com.linkedin.databus.bootstrap.common.BootstrapConn;
import com.linkedin.databus.bootstrap.common.BootstrapDBMetaDataDAO;
import com.linkedin.databus.bootstrap.common.BootstrapProducerStatsCollector;
import com.linkedin.databus.bootstrap.common.BootstrapReadOnlyConfig;
import com.linkedin.databus.bootstrap.common.SourceInfo;
import com.linkedin.databus.client.consumer.AbstractDatabusStreamConsumer;
import com.linkedin.databus.client.pub.ConsumerCallbackResult;
import com.linkedin.databus.client.pub.DbusEventDecoder;
import com.linkedin.databus.client.pub.SCN;
import com.linkedin.databus.core.DbusEvent;
import com.linkedin.databus.core.DbusEventInternalWritable;
import com.linkedin.databus.core.DbusPrettyLogUtils;
import com.linkedin.databus.core.ScnNotFoundException;
import com.linkedin.databus.core.util.RateMonitor;
import com.linkedin.databus.core.util.StringUtils;
import com.linkedin.databus2.core.BackoffTimer;
import com.linkedin.databus2.core.DatabusException;
import com.linkedin.databus2.util.DBHelper;
public class BootstrapProducerCallback extends AbstractDatabusStreamConsumer
{
public static final String MODULE = BootstrapProducerCallback.class.getName();
public static final Logger LOG = Logger.getLogger(MODULE);
private BootstrapDBMetaDataDAO _bootstrapDao = null;
private PreparedStatement _stmt = null;
private PreparedStatement _logScnStmt = null;
private int _numEvents = 0;
private int _totalNumEvents = 0;
private long _seedCatchupScn = -1;
private int _state = BootstrapProducerStatus.ACTIVE;
private long _oldWindowScn = -1;
private long _newWindowScn = -1;
// max(scn) of log at producer startup
private long _producerStartScn = -1;
private Map<String, SourceInfo> _trackedSources = null;
private Map<Integer, String> _trackedSrcIdsToNames = null;
private BootstrapReadOnlyConfig _config = null;
private String _currentSource = null;
private BackoffTimer _retryTimer = null;
private List<String> _logicalSources = null;
/* Stats Specific */
private BootstrapProducerStatsCollector _statsCollector = null;
private final RateMonitor _srcRm = new RateMonitor(
"ProducerSourceRateMonitor");
private final RateMonitor _totalRm = new RateMonitor(
"ProducerTotalRateMonitor");
private int _currentLogId;
private int _currentRowId;
private final int _maxRowsInLog;
private boolean _errorRetriesExceeded;
private ErrorCaseHandler _errorHandler = null;
public BootstrapProducerCallback(BootstrapReadOnlyConfig config,
List<String> logicalSources) throws Exception
{
this(config, null, null, logicalSources);
}
public BootstrapProducerCallback(BootstrapReadOnlyConfig config,
BootstrapProducerStatsCollector statsCollector,
ErrorCaseHandler errorHandler, List<String> logicalSources)
throws SQLException, DatabusException
{
_config = config;
_logicalSources = logicalSources;
_statsCollector = statsCollector;
_maxRowsInLog = _config.getBootstrapLogSize();
_retryTimer = new BackoffTimer("BootstrapProducer", config.getRetryConfig());
_errorRetriesExceeded = false;
_errorHandler = errorHandler;
getConnection();
init();
}
public void init()
throws SQLException, DatabusException
{
Set<String> configedSources = new HashSet<String>(_logicalSources);
_trackedSources = _bootstrapDao.getDBTrackedSources(configedSources);
_trackedSrcIdsToNames = new HashMap<Integer, String>();
for (Entry<String, SourceInfo> entry : _trackedSources.entrySet())
{
_trackedSrcIdsToNames.put(entry.getValue().getSrcId(), entry.getKey());
}
LOG.info("maxRowsInLog=" + _maxRowsInLog);
LOG.info("trackedSources: ");
int lastState = BootstrapProducerStatus.UNKNOWN;
int curr = 0;
for (SourceInfo info : _trackedSources.values())
{
if (0 == curr)
{
lastState = info.getStatus();
}
else
{
if (info.getStatus() != lastState)
{
String msg = "Bootstrap Source state does not seem to be consistent for all the sources that this producer listens to. "
+ " Found atleast 2 different states :"
+ lastState
+ ","
+ info.getStatus();
LOG.error(msg);
throw new RuntimeException(msg);
}
}
curr++;
LOG.info(info.toString());
}
_state = lastState;
initWindowScn();
}
@Override
public ConsumerCallbackResult onStartDataEventSequence(SCN startScn)
{
_srcRm.start();
_totalNumEvents = 0;
ConsumerCallbackResult success = ConsumerCallbackResult.SUCCESS;
try
{
if (_oldWindowScn == -1)
{
initWindowScn();
}
} catch (SQLException e)
{
if (null != _statsCollector)
_statsCollector.registerSQLException();
LOG.error(
"Got SQLException in startDataEventSequence Hanlder!! Connections will be reset !!",
e);
try
{
reset();
}
catch (DatabusException e2)
{
DbusPrettyLogUtils.logExceptionAtError("Unable to reset connection", e2, LOG);
}
catch (SQLException sqlEx)
{
DbusPrettyLogUtils.logExceptionAtError("Got exception while resetting connections. Stopping Client !!", sqlEx, LOG);
return ConsumerCallbackResult.ERROR_FATAL;
}
success = ConsumerCallbackResult.ERROR;
}
return success;
}
@Override
public ConsumerCallbackResult onEndDataEventSequence(SCN endScn)
{
try
{
// Update the metadata for all sources
updateAllProducerSourcesMetaData();
_oldWindowScn = _newWindowScn;
// Update all the sources info in the database.
// If we need to create new log file for a source, create one.
updateSourcesInDB();
boolean markActive = false;
if (_state == BootstrapProducerStatus.SEEDING_CATCHUP)
{
if (_newWindowScn > _seedCatchupScn)
{
LOG.info("Bootstrap DB for sources ("
+ _trackedSources.values()
+ ") has completed the seeding catchup phase. Marking them active in bootstrap_sources table !! SeedCatchupSCN was :"
+ _seedCatchupScn);
markActive = true;
}
}
else if (_state == BootstrapProducerStatus.FELL_OFF_RELAY)
{
if (_newWindowScn > _producerStartScn)
{
LOG.info("Bootstrap DB for sources ("
+ _trackedSources.values()
+ ") has started getting events since last fell-off relay !! Marking them active !!");
markActive = true;
}
}
if (markActive)
_bootstrapDao.updateSourcesStatus(_trackedSources.keySet(),
BootstrapProducerStatus.ACTIVE);
Connection conn = getConnection();
try
{
DBHelper.commit(conn);
} catch (SQLException s)
{
DBHelper.rollback(conn);
throw s;
}
if (markActive)
{
_state = BootstrapProducerStatus.ACTIVE;
for (SourceInfo info : _trackedSources.values())
{
info.setStatus(BootstrapProducerStatus.ACTIVE);
}
}
LOG.info("bootstrap producer upto scn " + _newWindowScn);
} catch (SQLException e)
{
if (null != _statsCollector)
_statsCollector.registerSQLException();
LOG.error(
"Got SQLException in endDataEventSequence Handler !! Connections will be reset !!",
e);
try
{
reset();
}
catch (DatabusException e2)
{
DbusPrettyLogUtils.logExceptionAtError("Unable to reset connection", e2, LOG);
}
catch (SQLException sqlEx)
{
LOG.error(
"Got exception while resetting connections. Stopping Client !!",
sqlEx);
return ConsumerCallbackResult.ERROR_FATAL;
}
return ConsumerCallbackResult.ERROR;
} finally
{
_totalRm.stop();
long latency = _totalRm.getDuration() / 1000000L;
if (null != _statsCollector)
{
_statsCollector.registerEndWindow(latency, _totalNumEvents,
_newWindowScn);
}
}
return ConsumerCallbackResult.SUCCESS;
}
@Override
public ConsumerCallbackResult onRollback(SCN startScn)
{
return _errorRetriesExceeded ? ConsumerCallbackResult.ERROR_FATAL
: ConsumerCallbackResult.SUCCESS;
}
@Override
public ConsumerCallbackResult onStartSource(String source, Schema sourceSchema)
{
_numEvents = 0;
boolean ret = false;
SourceInfo srcInfo = null;
_currentSource = source;
_srcRm.start();
try
{
srcInfo = _trackedSources.get(source);
if (null == srcInfo)
{
LOG.error("Source :"
+ source
+ " not managed in this bootstrap DB instance !! Managed Sources : ("
+ _trackedSources + ")");
return ConsumerCallbackResult.ERROR;
}
ret = prepareStatement(srcInfo.getSrcId());
} catch (SQLException e)
{
if (null != _statsCollector)
_statsCollector.registerSQLException();
LOG.error(
"Got SQLException in startSource Hanlder!! Connections will be reset !!",
e);
try
{
reset();
}
catch (DatabusException e2)
{
DbusPrettyLogUtils.logExceptionAtError("Unable to reset connection", e2, LOG);
}
catch (SQLException sqlEx)
{
LOG.error(
"Got exception while resetting connections. Stopping Client !!",
sqlEx);
return ConsumerCallbackResult.ERROR_FATAL;
}
return ConsumerCallbackResult.ERROR;
}
return ret ? ConsumerCallbackResult.SUCCESS : ConsumerCallbackResult.ERROR;
}
@Override
public ConsumerCallbackResult onEndSource(String source, Schema sourceSchema)
{
try
{
// Update the metadata for this source
updateProducerSourceMetaData(source);
if (_stmt != null)
{
_stmt.close();
_stmt = null;
}
} catch (SQLException e)
{
if (null != _statsCollector)
_statsCollector.registerSQLException();
LOG.error(
"Got SQLException in endSource Hanlder!! Connections will be reset !!",
e);
try
{
reset();
}
catch (DatabusException e2)
{
DbusPrettyLogUtils.logExceptionAtError("Unable to reset connection", e2, LOG);
}
catch (SQLException sqlEx)
{
LOG.error(
"Got exception while resetting connections. Stopping Client !!",
sqlEx);
return ConsumerCallbackResult.ERROR_FATAL;
}
return ConsumerCallbackResult.ERROR;
} finally
{
_srcRm.stop();
long latency = _srcRm.getDuration() / 1000000L;
if (null != _statsCollector)
_statsCollector.registerBatch(_currentSource, latency, _numEvents,
_newWindowScn, _currentLogId, _currentRowId);
_totalNumEvents += _numEvents;
_numEvents = 0;
}
return ConsumerCallbackResult.SUCCESS;
}
@Override
public ConsumerCallbackResult onDataEvent(DbusEvent e,
DbusEventDecoder eventDecoder)
{
if (e.sequence() < _newWindowScn)
{
LOG.warn("Seeing an Old event. Dropping it !! Current SCN : "
+ _newWindowScn + ". Event :" + e.toString());
return ConsumerCallbackResult.SUCCESS;
}
_numEvents++;
_newWindowScn = e.sequence();
try
{
// TODO (DDSDBUS-776) : remove erstwhile windowscn column
_stmt.setLong(1, _newWindowScn);
_stmt.setLong(2, _newWindowScn);
String keyStr = null;
if (e.isKeyNumber())
{
keyStr = Long.toString(e.key());
}
else if (e.isKeyString())
{
keyStr = StringUtils.bytesToString(e.keyBytes());
}
else if (e.isKeySchema()) {
LOG.error("schema key type not supported: " + e);
return ConsumerCallbackResult.ERROR;
}
else {
LOG.error("unknown event key type: " + e);
return ConsumerCallbackResult.ERROR;
}
_stmt.setString(3, keyStr);
if (!(e instanceof DbusEventInternalWritable))
{
throw new UnsupportedClassVersionError(
"Cannot get raw bytes out of DbusEvent");
}
ByteBuffer bytebuff = ((DbusEventInternalWritable) e).getRawBytes();
byte val[] = new byte[bytebuff.remaining()];
bytebuff.get(val);
_stmt.setBytes(4, val);
_stmt.executeUpdate();
} catch (SQLException e1)
{
if (null != _statsCollector)
_statsCollector.registerSQLException();
LOG.error(
"Got SQLException in dataEvent Hanlder!! Connections will be reset !!",
e1);
try
{
reset();
}
catch (DatabusException e2)
{
DbusPrettyLogUtils.logExceptionAtError("Unable to reset connection", e2, LOG);
}
catch (SQLException sqlEx)
{
LOG.error(
"Got exception while resetting connections. Stopping Client !!",
sqlEx);
return ConsumerCallbackResult.ERROR_FATAL;
}
return ConsumerCallbackResult.ERROR;
}
return ConsumerCallbackResult.SUCCESS;
}
@Override
public ConsumerCallbackResult onCheckpoint(SCN checkpointScn)
{
return ConsumerCallbackResult.SUCCESS;
}
@Override
public ConsumerCallbackResult onError(Throwable err)
{
ConsumerCallbackResult success = ConsumerCallbackResult.ERROR;
try
{
if (err instanceof ScnNotFoundException)
{
try
{
// Producer fell-off the relay. It could be in an active transaction
// (last valid event was not EOP) in
// which case, we should roll-back. This is safe since the checkpoint
// is consistent with the
// roll-back state
getConnection().rollback();
} catch (SQLException sqlEx)
{
if (null != _statsCollector)
_statsCollector.registerSQLException();
LOG.error("Got exception while rolling back transaction !!", sqlEx);
}
_bootstrapDao.updateSourcesStatus(_trackedSources.keySet(),
BootstrapProducerStatus.FELL_OFF_RELAY);
if (null != _statsCollector)
_statsCollector.registerFellOffRelay();
}
success = ConsumerCallbackResult.SUCCESS;
} catch (Exception e)
{
LOG.error("Got exception onError() ", e);
success = ConsumerCallbackResult.ERROR;
}
return success;
}
/*
* Reset the Bootstrap Connection and in memory state of Producer
*/
private void reset()
throws SQLException, DatabusException
{
boolean success = false;
/*
* Retry Connections with exponential backoff upto 1 min.
*/
_retryTimer.reset();
while (!success)
{
try
{
// Close automatically rollbacks the transaction
DBHelper.close(_stmt);
_stmt = null;
DBHelper.close(_logScnStmt);
_logScnStmt = null;
_bootstrapDao.getBootstrapConn().close();
_bootstrapDao.getBootstrapConn().getDBConn(); // recreate the Connection
_bootstrapDao.getBootstrapConn().executeDummyBootstrapDBQuery();
// Initialize in memory state
init();
success = true;
} catch (SQLException sqlEx)
{
if (null != _statsCollector)
_statsCollector.registerSQLException();
LOG.error("Unable to reset the Bootstrap DB connection !!", sqlEx);
if (_retryTimer.getRemainingRetriesNum() <= 0)
{
String message = "Producer Thread reached max retries trying to reset the MySQL Connections. Stopping !!";
LOG.fatal(message);
_errorRetriesExceeded = true;
// throw sqlEx;
/*
* ERROR_FATAL is not implemented yet, Hence using callback to let
* HttpClientImpl stop
*/
_errorHandler.onErrorRetryLimitExceeded(message, sqlEx);
}
_retryTimer.backoffAndSleep();
}
}
}
private void updateAllProducerSourcesMetaData() throws SQLException
{
for (Map.Entry<String, SourceInfo> entry : _trackedSources.entrySet())
{
String src = entry.getKey();
updateProducerSourceMetaData(src);
}
}
private void updateProducerSourceMetaData(String source) throws SQLException
{
// Update the metadata for this source
SourceInfo srcinfo = _trackedSources.get(source);
_currentRowId = getLastLogEntry(source);
_currentLogId = srcinfo.getCurrLogId();
setLogPosition(_currentLogId, _currentRowId, _newWindowScn, source);
// Update the source info for this source
srcinfo.setMaxRowId(_currentRowId);
srcinfo.setWindowScn(_newWindowScn);
}
private boolean prepareStatement(int srcId) throws SQLException
{
Connection conn = null;
try
{
conn = getConnection();
StringBuilder sql = new StringBuilder();
sql.append("insert into ");
sql.append(getTableName(srcId));
sql.append("(scn, windowscn, srckey, val) ");
sql.append(" values(?,?,?,?)");
// sql.append("(scn, windowscn, srckey) ");
// sql.append(" values(?, ?,?)");
_stmt = conn.prepareStatement(sql.toString());
} catch (SQLException e)
{
LOG.error("Got SQLException in prepareStatement!! ", e);
throw e;
}
return true;
}
private void initWindowScn() throws SQLException
{
ResultSet rs = null;
StringBuilder sql = new StringBuilder();
Statement stmt = null;
try
{
sql.append("select max(p.windowscn), max(s.endscn) from bootstrap_producer_state p, bootstrap_seeder_state s ");
sql.append("where p.srcid = s.srcid and p.srcid in (");
int count = _trackedSources.size();
for (SourceInfo srcInfo : _trackedSources.values())
{
count--;
sql.append(srcInfo.getSrcId());
if (count > 0)
sql.append(",");
}
sql.append(")");
stmt = getConnection().createStatement();
LOG.info("sql query = " + sql.toString());
rs = stmt.executeQuery(sql.toString());
if (rs.next())
{
_producerStartScn = _oldWindowScn = _newWindowScn = rs.getLong(1);
_seedCatchupScn = rs.getLong(2);
}
} catch (SQLException e)
{
if (null != _statsCollector)
_statsCollector.registerSQLException();
LOG.error(
"Unable to select producer's max windowscn. Setting windowscn to -1",
e);
_oldWindowScn = -1;
_newWindowScn = -1;
_producerStartScn = -1;
throw e;
} finally
{
DBHelper.close(rs, stmt, null);
}
}
private int getLastLogEntry(String source) throws SQLException
{
int rid = 0;
Statement stmt = null;
ResultSet rs = null;
int srcId = _trackedSources.get(source).getSrcId();
try
{
stmt = getConnection().createStatement();
rs = stmt.executeQuery("select max(id) from " + getTableName(srcId));
if (rs.next())
{
rid = rs.getInt(1);
}
} catch (SQLException e)
{
LOG.error("Unable to find max. rid. Setting current rid to -1", e);
rid = -1;
throw e;
} finally
{
if (null != stmt)
{
stmt.close();
stmt = null;
}
if (null != rs)
{
rs.close();
rs = null;
}
}
return rid;
}
private void setLogPosition(int logid, int logrid, long windowscn,
String source) throws SQLException
{
PreparedStatement stmt = getLogPositionStmt();
stmt.setInt(1, logid);
stmt.setInt(2, logrid);
stmt.setLong(3, windowscn);
stmt.setString(4, source);
stmt.executeUpdate();
}
private PreparedStatement getLogPositionStmt() throws SQLException
{
if (_logScnStmt != null)
{
return _logScnStmt;
}
Connection conn = null;
try
{
conn = getConnection();
StringBuilder sql = new StringBuilder();
sql.append("update bootstrap_producer_state set logid = ?, rid = ? , windowscn = ? where srcid = (select id from bootstrap_sources where src = ?)");
_logScnStmt = conn.prepareStatement(sql.toString());
} catch (SQLException e)
{
if (null != _statsCollector)
_statsCollector.registerSQLException();
LOG.error(
"Exception occurred while getting the bootstrap_producer statement",
e);
throw e;
}
return _logScnStmt;
}
private void updateSourcesInDB() throws SQLException
{
for (Map.Entry<String, SourceInfo> entry : _trackedSources.entrySet())
{
SourceInfo srcinfo = entry.getValue();
srcinfo.saveToDB(getConnection());
}
for (Map.Entry<String, SourceInfo> entry : _trackedSources.entrySet())
{
SourceInfo srcinfo = entry.getValue();
if (srcinfo.getNumRows() >= _maxRowsInLog)
{
srcinfo.switchLogFile(getConnection());
setLogPosition(srcinfo.getCurrLogId(), 0, _newWindowScn, entry.getKey());
// getConnection().commit();
_bootstrapDao.createNewLogTable(srcinfo.getSrcId());
}
}
}
private Connection getConnection() throws SQLException
{
Connection conn = null;
if (_bootstrapDao == null)
{
BootstrapConn dbConn = new BootstrapConn();
try
{
final boolean autoCommit = false;
dbConn.initBootstrapConn(autoCommit, _config.getBootstrapDBUsername(),
_config.getBootstrapDBPassword(), _config.getBootstrapDBHostname(),
_config.getBootstrapDBName());
_bootstrapDao = new BootstrapDBMetaDataDAO(dbConn,
_config.getBootstrapDBHostname(), _config.getBootstrapDBUsername(),
_config.getBootstrapDBPassword(), _config.getBootstrapDBName(),
autoCommit);
} catch (Exception e)
{
LOG.fatal("Unable to open BootstrapDB Connection !!", e);
return null;
}
}
try
{
conn = _bootstrapDao.getBootstrapConn().getDBConn();
} catch (SQLException e)
{
if (null != _statsCollector)
_statsCollector.registerSQLException();
LOG.fatal("Not able to open BootstrapDB Connection !!", e);
throw e;
}
return conn;
}
private String getTableName(int srcId) throws SQLException
{
return _bootstrapDao.getBootstrapConn().getLogTableNameToProduce(srcId);
}
public interface ErrorCaseHandler
{
/* Callback for handling case when error retries limit reached */
void onErrorRetryLimitExceeded(String message, Throwable exception);
}
}