/*
* Copyright 2013 LinkedIn Corp. All rights reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.linkedin.databus2.producers.db;
import java.io.StringReader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.apache.log4j.Logger;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import com.linkedin.databus.core.DatabusRuntimeException;
import com.linkedin.databus.core.ScnTxnPos;
import com.linkedin.databus.core.TrailFilePositionSetter;
import com.linkedin.databus.core.TrailFilePositionSetter.TransactionSCNFinderCallback;
import com.linkedin.databus.core.util.RateMonitor;
import com.linkedin.databus2.core.DatabusException;
/**
*
* Transaction callback that reads XML trail files, locating transaction boundaries and extracting SCN.
*
*/
public class GGXMLTrailTransactionFinder implements TransactionSCNFinderCallback
{
public static final String MODULE = GGXMLTrailTransactionFinder.class.getName();
public static final Logger LOG = Logger.getLogger(MODULE);
/**
* The patterns for detecting SCN and transaction are selected in such a way that they won't be ambiguous.
*/
public static final String TRANSACTION_BEGIN_PREFIX = "<transaction"; // Uniquely identifies the transaction begin.
public static final String TRANSACTION_END_PREFIX = "</transaction"; // Uniquely identifies the transaction end
public static final String TRANSACTION_END_STR = "</transaction>";
// X-Path expression for extracting SCN
public static final String SCN_XPATH_STR = "//transaction/dbupdate/tokens/token[@name=\"TK-CSN\"]/text()";
public static final String SCN_REGEX_STR = "(<token\\s+name=\"TK-CSN\"\\s*>([0-9]+)\\s*</token>)";
public boolean _enableRegex = true;
// Current Txn Position and SCN
private ScnTxnPos _txnPos;
// Prev Txn Position and SCN
private ScnTxnPos _prevTxnPos;
// For Tracking current cursor position
private String _currFile;
/** Byte offset within the file where the current cursor is */
private long _currFileByteOffset;
/** Line number of the cursor */
private long _currLineNumber;
/** The string buffer containing the current txn */
private StringBuilder _currTxnStr = new StringBuilder();
/** TargetSCN to be located */
private long _targetScn = TrailFilePositionSetter.USE_LATEST_SCN;
/** Flag to indicate if end of txn is seen */
private boolean _txnEndSeen = false;
/** Number of valid txns completely seen by this instance */
private long _numTxnsSeen = 0;
/** Number of invalid txns (i.e., containing no valid SCNs) seen by this instance */
private long _numInvalidTxnsSeen = 0;
/** Flag to indicate if at least one txn is completely seen */
private boolean _firstTxnSeen = false;
private final XPathExpression _expr;
private final Pattern _rexpr;
/** Member variables used in intermediate computations in regexQuery() / xpathQuery() */
private long _minScn, _maxScn;
private RateMonitor _queryRateMonitor = new RateMonitor("Query_GGTransactionFinder");
private RateMonitor _rateMonitor = new RateMonitor("GGTransactionFinder");
private boolean _beginTxnSeen = false;
public GGXMLTrailTransactionFinder(boolean enableRegex) throws Exception
{
reset();
XPathFactory xpathFactory = XPathFactory.newInstance();
XPath xpath = xpathFactory.newXPath();
_expr = xpath.compile(SCN_XPATH_STR);
_rexpr = Pattern.compile(SCN_REGEX_STR);
_enableRegex = enableRegex;
_minScn = Long.MAX_VALUE;
_maxScn = Long.MIN_VALUE;
}
public GGXMLTrailTransactionFinder() throws Exception
{
this(true);
}
@Override
public void beginFileProcessing(String file)
{
if(LOG.isDebugEnabled())
LOG.debug("Switching to file :" + file);
_currFile = file;
_currLineNumber = 0;
_currFileByteOffset = 0;
}
@Override
public boolean processLine(String line, int newLineCharLen) throws DatabusException
{
try
{
_rateMonitor.resume();
_rateMonitor.ticks(line.length());
String l = line;
int totalOffset = 0; // tracks the byteOffset within the line on transaction beginning.
boolean ret = false;
/**
* The general XML syntax allow for newlines to be optional between XML elements. Even though GG is empirically
* shown to be inserting newlines between XML element tags, this assumption is not made here. A single line
* can contain zero or more complete transactions.
*/
int beginOffset = l.indexOf(TRANSACTION_BEGIN_PREFIX);
int endOffset = l.indexOf(TRANSACTION_END_PREFIX);
if ((beginOffset >= 0) || (endOffset >= 0))
{
/**
* A transaction can be contained in a single or multiple lines. No assumptions should be made about its placement.
*/
while (true)
{
_txnEndSeen = false;
// Start and end of txns can be in a single line. Moreover, many such transactions be on a line.
beginOffset = l.indexOf(TRANSACTION_BEGIN_PREFIX);
endOffset = l.indexOf(TRANSACTION_END_PREFIX);
if ( beginOffset >= 0)
totalOffset += beginOffset;
// no more endpoints (begin or end of transactions)
if ( (endOffset == -1) && (beginOffset == -1))
break;
// Case where only beginning of transaction tag or a complete transaction is present
if ( (endOffset == -1) ||
((beginOffset >= 0 ) && (beginOffset < endOffset)))
{
_currTxnStr.setLength(0);
processBegin(totalOffset);
if (endOffset == -1)
{
_currTxnStr.append(l);
break;
} else {
_currTxnStr.append(l.subSequence(beginOffset, endOffset));
_currTxnStr.append(TRANSACTION_END_STR);
processEnd();
totalOffset += endOffset + TRANSACTION_END_STR.length();
l = l.substring(endOffset + TRANSACTION_END_STR.length());
}
} else if ( (beginOffset == -1) || (beginOffset > endOffset)) {
// Case where only endTag is seen or a transaction completes and another starts in the same line
if (beginOffset == -1)
{
_currTxnStr.append(l);
processEnd();
if (isDone())
ret = true;
break; // nothing left to process on this line => must break unconditionally
} else {
_currTxnStr.append(l.subSequence(0, beginOffset));
l = l.substring(beginOffset);
processEnd();
}
}
if (isDone())
{
ret = true;
break;
}
}
} else {
_currTxnStr.append(l); // continue accumulating "middle stuff" (between begin- and end-transaction tags)
}
_currFileByteOffset += line.length();
if ( newLineCharLen > 0)
_currFileByteOffset += newLineCharLen;
_currLineNumber++;
return ret;
} finally {
_rateMonitor.suspend();
}
}
private boolean isDone()
{
// last condition works only because code elsewhere has checked for _targetScn >= min SCN ?
if (_txnEndSeen &&
((_targetScn == TrailFilePositionSetter.USE_EARLIEST_SCN)
|| (( _targetScn != TrailFilePositionSetter.USE_LATEST_SCN) && (_txnPos.getMaxScn() >= _targetScn))))
{
return true;
}
return false;
}
/**
* When transaction begin is seen, this should be called to save the positions
* @param byteLineOffset
*/
private void processBegin(int byteLineOffset)
{
_prevTxnPos.copyFrom(_txnPos);
_txnPos.setFile(_currFile);
_txnPos.setFileOffset(_currFileByteOffset + byteLineOffset);
_txnPos.setLineNumber(_currLineNumber+1);
_txnPos.setLineOffset(byteLineOffset);
_txnPos.setMinScn(-1);
_txnPos.setMaxScn(-1);
_txnPos.setTxnRank(_numTxnsSeen); // Rank = Number of transactions before this transactions.
_beginTxnSeen = true;
}
private void xpathQuery() throws DatabusTrailFileParseException
{
try
{
//Set SCN
InputSource source = new InputSource(new StringReader(_currTxnStr.toString()));
_queryRateMonitor.resume(); // count time consumed by XML parsing
Object result = _expr.evaluate(source, XPathConstants.NODESET);
_queryRateMonitor.ticks(_currTxnStr.length());
_queryRateMonitor.suspend();
NodeList nodes = (NodeList) result;
for (int i = 0; i < nodes.getLength(); i++)
{
long newScn = Long.parseLong((nodes.item(i).getNodeValue().trim()));
_minScn = Math.min(_minScn, newScn);
_maxScn = Math.max(_maxScn, newScn);
}
}
catch (XPathExpressionException xpxe)
{
throw new DatabusTrailFileParseException("Got XPath exception for trail-file entry: " + _currTxnStr, xpxe);
}
catch (NumberFormatException nfe)
{
throw new DatabusTrailFileParseException("Got parseLong() exception for trail-file entry: " + _currTxnStr, nfe);
}
}
private void regexQuery() throws DatabusTrailFileParseException
{
String source = _currTxnStr.toString();
_queryRateMonitor.resume(); // count time consumed by regex parsing
Matcher result = _rexpr.matcher(source);
boolean foundScn = result.find();
_queryRateMonitor.ticks(source.length());
_queryRateMonitor.suspend();
if (!foundScn)
{
throw new DatabusTrailFileParseException("Could not find TK-SCN with regex; " +
"likely error in trail-file entry: " + _currTxnStr);
}
// Loop through all SCNs in the transaction and save max/min ones.
while (foundScn)
{
String m = result.group(2);
long newScn = Long.parseLong(m); // TODO: try/catch? regex will catch most errors, but NumberFormatException still ~possible
_minScn = Math.min(_minScn, newScn);
_maxScn = Math.max(_maxScn, newScn);
_queryRateMonitor.resume(); // also count time consumed by regex find() calls
foundScn = result.find();
_queryRateMonitor.suspend();
}
}
/**
* When the transaction end is seen, this should be called to save SCN
* @throws DatabusException
*/
private void processEnd() throws DatabusException
{
if (! _beginTxnSeen)
{
_currTxnStr.setLength(0);
return;
}
_maxScn = Long.valueOf(-1);
_minScn = Long.MAX_VALUE;
try
{
if (!_enableRegex)
{
xpathQuery();
}
else
{
regexQuery();
}
}
catch (DatabusTrailFileParseException ex)
{
LOG.warn("empty/corrupted txn (" + ex.getMessage() + "); resetting invalid _txnPos (" + _txnPos +
") to _prevTxnPos (" + _prevTxnPos + ")");
_txnPos.copyFrom(_prevTxnPos);
++_numInvalidTxnsSeen; // TODO: wire into metrics/monitoring (need accessor plus whatever lies on caller's end)
return;
}
_txnPos.setMaxScn(_maxScn);
_txnPos.setMinScn(_minScn);
_txnEndSeen = true;
_numTxnsSeen++;
if (! _firstTxnSeen )
{
if ((_targetScn >= 0) && (_targetScn < _minScn)) // common case: need to try previous trail file instead
throw new DatabusException("SinceSCN is less than MinScn available in trail file. Requested SinceSCN is :"
+ _targetScn + " but found only : " + _minScn
+ " in Location " + _txnPos);
}
_firstTxnSeen = true;
_beginTxnSeen = false;
if (LOG.isDebugEnabled())
{
LOG.debug("Seen Txn : " + _txnPos);
}
}
@Override
public void endFileProcessing(String file)
{
}
@Override
public ScnTxnPos getTxnPos()
{
if (_txnPos.isEmpty() && _prevTxnPos.isEmpty())
return null;
if (_txnPos.isEmpty())
return _prevTxnPos;
return _txnPos;
}
@Override
public void reset()
{
_txnPos = new ScnTxnPos();
_prevTxnPos = new ScnTxnPos();
_currFile = null;
_currFileByteOffset = 0;
_currLineNumber = 0;
_numTxnsSeen = 0;
_numInvalidTxnsSeen = 0;
_txnEndSeen = false;
_beginTxnSeen = false;
_firstTxnSeen = false;
_currTxnStr.setLength(0);
_queryRateMonitor = new RateMonitor("XPath_GGTransactionFinder");
_queryRateMonitor.start();
_queryRateMonitor.suspend();
_rateMonitor = new RateMonitor("GGTransactionFinder");
_rateMonitor.start();
_rateMonitor.suspend();
}
@Override
public void begin(long targetScn)
{
_targetScn = targetScn;
}
@Override
public long getNumTxnsSeen()
{
return _numTxnsSeen;
}
@Override
public long getCurrentFileOffset()
{
return _currFileByteOffset;
}
public RateMonitor getQueryRateMonitor()
{
return _queryRateMonitor;
}
public RateMonitor getRateMonitor()
{
return _queryRateMonitor;
}
@Override
public String getPerfStats()
{
String overallRateMonitor = _rateMonitor.toString();
String queryRateMonitor = _queryRateMonitor.toString();
// TODO Auto-generated method stub
return "queryRM : " + queryRateMonitor +
", OverallRM : " + overallRateMonitor;
}
/** Special-purpose exception used only by processEnd() and the xpath and regex parsers. */
private class DatabusTrailFileParseException extends Exception
{
public DatabusTrailFileParseException()
{
super();
}
public DatabusTrailFileParseException(String message, Throwable cause)
{
super(message, cause);
}
public DatabusTrailFileParseException(String message)
{
super(message);
}
public DatabusTrailFileParseException(Throwable cause)
{
super(cause);
}
}
}