package com.tesora.dve.tools.analyzer; /* * #%L * Tesora Inc. * Database Virtualization Engine * %% * Copyright (C) 2011 - 2014 Tesora Inc. * %% * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License, version 3, * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * #L% */ import static com.tesora.dve.tools.analyzer.StatementCounter.DMLCounters.DELETE; import static com.tesora.dve.tools.analyzer.StatementCounter.DMLCounters.INSERT_INTO_SELECT; import static com.tesora.dve.tools.analyzer.StatementCounter.DMLCounters.INSERT_INTO_VALUES; import static com.tesora.dve.tools.analyzer.StatementCounter.DMLCounters.SELECT; import static com.tesora.dve.tools.analyzer.StatementCounter.DMLCounters.TRUNCATE; import static com.tesora.dve.tools.analyzer.StatementCounter.DMLCounters.UNION; import static com.tesora.dve.tools.analyzer.StatementCounter.DMLCounters.UPDATE; import static com.tesora.dve.tools.analyzer.StatementCounter.NonDMLCounters.ALTER; import static com.tesora.dve.tools.analyzer.StatementCounter.NonDMLCounters.CREATE; import static com.tesora.dve.tools.analyzer.StatementCounter.NonDMLCounters.DROP; import static com.tesora.dve.tools.analyzer.StatementCounter.NonDMLCounters.GRANT; import static com.tesora.dve.tools.analyzer.StatementCounter.NonDMLCounters.SESSION; import static com.tesora.dve.tools.analyzer.StatementCounter.NonDMLCounters.SHOW; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.PrintStream; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.List; import com.tesora.dve.db.DBNative; import org.apache.log4j.Logger; import com.tesora.dve.common.MultiMap; import com.tesora.dve.common.PEFileUtils; import com.tesora.dve.common.PEXmlUtils; import com.tesora.dve.db.Emitter; import com.tesora.dve.db.Emitter.EmitOptions; import com.tesora.dve.errmap.ErrorMapper; import com.tesora.dve.errmap.FormattedErrorInfo; import com.tesora.dve.exceptions.HasErrorInfo; import com.tesora.dve.exceptions.PEException; import com.tesora.dve.server.global.HostService; import com.tesora.dve.singleton.Singletons; import com.tesora.dve.sql.parser.CandidateParser; import com.tesora.dve.sql.parser.InvokeParser; import com.tesora.dve.sql.parser.ParserInvoker; import com.tesora.dve.sql.schema.Database; import com.tesora.dve.sql.schema.ValueManager; import com.tesora.dve.sql.statement.EmptyStatement; import com.tesora.dve.sql.statement.Statement; import com.tesora.dve.sql.statement.ddl.AlterStatement; import com.tesora.dve.sql.statement.ddl.DropStatement; import com.tesora.dve.sql.statement.ddl.GrantStatement; import com.tesora.dve.sql.statement.ddl.PECreateStatement; import com.tesora.dve.sql.statement.ddl.SchemaQueryStatement; import com.tesora.dve.sql.statement.dml.DMLStatement; import com.tesora.dve.sql.statement.dml.DeleteStatement; import com.tesora.dve.sql.statement.dml.InsertIntoSelectStatement; import com.tesora.dve.sql.statement.dml.InsertIntoValuesStatement; import com.tesora.dve.sql.statement.dml.SelectStatement; import com.tesora.dve.sql.statement.dml.TruncateStatement; import com.tesora.dve.sql.statement.dml.UnionStatement; import com.tesora.dve.sql.statement.dml.UpdateStatement; import com.tesora.dve.sql.statement.session.SessionStatement; import com.tesora.dve.tools.analyzer.jaxb.DbAnalyzerCorpus; import com.tesora.dve.tools.analyzer.jaxb.HasStatement; import com.tesora.dve.tools.analyzer.jaxb.InsertTuples; import com.tesora.dve.tools.analyzer.jaxb.StatementInsertIntoValuesType; import com.tesora.dve.tools.analyzer.jaxb.StatementNonDMLType; import com.tesora.dve.tools.analyzer.jaxb.StatementNonInsertType; import com.tesora.dve.tools.analyzer.jaxb.StatementPopulationType; import com.tesora.dve.tools.analyzer.stats.EnumStatsCollector; import com.tesora.dve.tools.analyzer.stats.IntegerHistogram; public class StatementCounter extends Analyzer { private static final Logger logger = Logger.getLogger(StatementCounter.class); private final HashMap<String, NonInsertEntry> byParam = new HashMap<String, NonInsertEntry>(); private final HashMap<String, NonInsertEntry> byShrunk = new HashMap<String, NonInsertEntry>(); private final HashMap<String, InsertEntry> inserts = new HashMap<String, InsertEntry>(); private final HashMap<String, StatementNonDMLType> otherByInput = new HashMap<String, StatementNonDMLType>(); private final HashMap<String, StatementNonDMLType> otherByShrunk = new HashMap<String, StatementNonDMLType>(); private final EmitOptions emitOptions; private final CachingInvoker myInvoker; private final DbAnalyzerCorpus corpus; private final File corpusFile; private final File checkpointFile; private final int checkpointInterval; // errors are parse errors // warnings are other interesting things - // something we don't fully support across the entire planning chain static enum GlobalCounters { LINES, PROCESSED_STATEMENTS, EMPTY_STATEMENTS, SHRINK_CACHE_ADDS, SHRINK_CACHE_HITS, SHRINK_CACHE_MISSES, ERRORS, WARNINGS } private static final EnumStatsCollector<GlobalCounters> globalCounters = new EnumStatsCollector<>(GlobalCounters.class); static enum NonDMLCounters { SESSION, ALTER, SHOW, CREATE, GRANT, DROP, UNKNOWN } private static final EnumStatsCollector<NonDMLCounters> nonDMLBreakout = new EnumStatsCollector<>(NonDMLCounters.class); static enum DMLCounters { DELETE, UPDATE, SELECT, UNION, TRUNCATE, INSERT_INTO_SELECT, INSERT_INTO_VALUES, UNKNOWN } private static final EnumStatsCollector<DMLCounters> dmlBreakout = new EnumStatsCollector<>(DMLCounters.class); private static final IntegerHistogram literalCounts = new IntegerHistogram(); private final BufferedWriter errorLog; private final PrintStream outputStream; private final boolean stackTraces; public StatementCounter(AnalyzerOptions opts, File corpusFile, File checkpointFile, int checkpointInterval, File errorFileName, PrintStream outputStream) throws Throwable { super(opts); emitOptions = EmitOptions.NONE.analyzerLiteralsAsParameters(); myInvoker = new CachingInvoker(this); corpus = new DbAnalyzerCorpus(); this.corpusFile = corpusFile; this.checkpointFile = checkpointFile; this.checkpointInterval = checkpointInterval; this.errorLog = new BufferedWriter(new FileWriter(errorFileName)); this.outputStream = outputStream; this.stackTraces = getOptions().isVerboseErrors(); } @Override public void setSource(AnalyzerSource as) { if (as != null) { corpus.setDescription(as.getDescription()); } } @Override public ParserInvoker getInvoker() { return myInvoker; } @Override public void onFinished() throws PEException { checkpoint(true); } public void close() { try { errorLog.close(); } catch (final IOException e) { logger.warn("Failed to close the error log file"); } } private void checkpoint(boolean last) throws PEException { final long total = globalCounters.getOccurances(GlobalCounters.LINES); if (last || ((total % checkpointInterval) == 0)) { outputStream.println("Writing checkpoint file, total stmts: " + total); final String corpusString = PEXmlUtils.marshalJAXB(corpus); final File prevFile = new File(checkpointFile.getAbsolutePath() + ".prev"); if (prevFile.exists()) { prevFile.delete(); } if (checkpointFile.exists()) { checkpointFile.renameTo(prevFile); } PEFileUtils.writeToFile(checkpointFile, corpusString, true); } if (last) { final String corpusString = PEXmlUtils.marshalJAXB(sortByFrequency()); PEFileUtils.writeToFile(corpusFile, corpusString, true); printSummary(outputStream); } flushErrorLog(); } private void flushErrorLog() throws PEException { try { errorLog.flush(); } catch (final IOException ioe) { throw new PEException("Unable to flush error log", ioe); } } private static void printSummary(final PrintStream outputStream) { nonDMLBreakout.printTo( "\nNon-DML Breakout : %s\n", " %-20s : %s\n", outputStream ); dmlBreakout.printTo( "\nDML Breakout : %s\n", " %-20s : %s\n", outputStream ); outputStream.println(); outputStream.println("Literal counts / shrunk DML statements"); outputStream.printf(" %-20s : %s\n", "count", literalCounts.getTotalOccurances()); outputStream.printf(" %-20s : %s\n", "min", literalCounts.getMinimum()); outputStream.printf(" %-20s : %4.4f\n", "avg", literalCounts.getAverage()); outputStream.printf(" %-20s : %4.4f\n", "std dev(approx)", literalCounts.getStandardDeviation()); outputStream.printf(" %-20s : %s\n", "90.0%", literalCounts.getPercentile(0.900d)); outputStream.printf(" %-20s : %s\n", "99.0%", literalCounts.getPercentile(0.990d)); outputStream.printf(" %-20s : %s\n", "99.9%", literalCounts.getPercentile(0.999d)); outputStream.printf(" %-20s : %8.8f%%\n", "percentile of 100", literalCounts.findPercentile(100) * 100.0d); outputStream.printf(" %-20s : %s\n", "max", literalCounts.getMaximum()); globalCounters.printTo( "\nSummary : \n", " %-20s : %s\n", outputStream ); } @Override public void onStatement(String sql, SourcePosition sp, Statement s) throws Throwable { saveIntermediateCheckpoint(); globalCounters.increment(GlobalCounters.PROCESSED_STATEMENTS); if (s instanceof EmptyStatement) { globalCounters.increment(GlobalCounters.EMPTY_STATEMENTS); return; } if (!s.isDML()) { trackNonDML(s, sql); return; } final Emitter emitter = Singletons.require(DBNative.class).getEmitter(); emitter.setOptions(emitOptions); final StringBuilder buf = new StringBuilder(); final DMLStatement dmls = (DMLStatement) s; final DMLCounters incCounter = lookupDMLCounter(dmls); if (dmls instanceof InsertIntoValuesStatement) { final InsertIntoValuesStatement iivs = (InsertIntoValuesStatement) dmls; emitter.emitInsertPrefix(tee.getPersistenceContext(), tee.getPersistenceContext().getValues(),iivs, buf); final String prefix = buf.toString(); InsertEntry ie = inserts.get(prefix); if (ie == null) { final Database<?> db = dmls.getDatabase(tee.getPersistenceContext()); ie = new InsertEntry(corpus, prefix, iivs.getColumnSpecification().size(), iivs.getClass() .getSimpleName(), (db == null ? null : db.getName().get())); inserts.put(prefix, ie); } ie.bump(iivs.getValues().size()); } else { emitter.emitStatement(tee.getPersistenceContext(), tee.getPersistenceContext().getValues(),dmls, buf); final String p = buf.toString(); NonInsertEntry se = byParam.get(p); if (se == null) { String shrunk = null; int litCount = -1; if (!(dmls instanceof InsertIntoValuesStatement)) { final CandidateParser cp = new CandidateParser(sql); if (cp.shrink()) { shrunk = cp.getShrunk(); // also verify we get the same number of literals final ValueManager valueManager = tee.getPersistenceContext().getValueManager(); litCount = cp.getLiterals().size(); if (litCount != valueManager.getNumberOfLiterals()) { final ValueManager.CacheStatus cacheStatus = valueManager.getCacheStatus(); String reason; switch (cacheStatus) { case NOCACHE_DYNAMIC_FUNCTION: reason = "contains a non-cacheable dynamic function"; break; case NOCACHE_TOO_MANY_LITERALS: reason = "literal count exceeded configured max_cached_plan_literals"; break; case CACHEABLE: default: reason = "unknown"; } logError(sql, sp, "Mismatched literal size; parse=" + valueManager.getNumberOfLiterals() + "/shrink=" + litCount + " , reason=" + reason , null, false); } } else { logError(sql, sp, "Unable to shrink", null, false); } } final Database<?> db = dmls.getDatabase(tee.getPersistenceContext()); se = new NonInsertEntry(corpus, sql, dmls.getClass().getSimpleName(), (db == null ? null : db.getName() .get()), litCount, incCounter); byParam.put(p, se); if (shrunk != null) { globalCounters.increment(GlobalCounters.SHRINK_CACHE_ADDS); byShrunk.put(shrunk, se); } } if (se.populationObject.getLiteralCount() >= 0) { literalCounts.sample(se.populationObject.getLiteralCount()); } se.bump(dmlBreakout, literalCounts); } } private DMLCounters lookupDMLCounter(DMLStatement dmls) { if (dmls instanceof DeleteStatement) { return DELETE; } else if (dmls instanceof UpdateStatement) { return UPDATE; } else if (dmls instanceof SelectStatement) { return SELECT; } else if (dmls instanceof UnionStatement) { return UNION; } else if (dmls instanceof TruncateStatement) { return TRUNCATE; } else if (dmls instanceof InsertIntoValuesStatement) { return INSERT_INTO_VALUES; } else if (dmls instanceof InsertIntoSelectStatement) { return INSERT_INTO_SELECT; } else { return DMLCounters.UNKNOWN; } } private NonDMLCounters lookupNonDMLCounter(Statement s) { NonDMLCounters incCounter; if (s instanceof SessionStatement) { incCounter = SESSION; } else if (s instanceof AlterStatement) { incCounter = ALTER; } else if (s instanceof SchemaQueryStatement) { incCounter = SHOW; } else if (s instanceof PECreateStatement) { incCounter = CREATE; } else if (s instanceof GrantStatement) { incCounter = GRANT; } else if (s instanceof DropStatement) { incCounter = DROP; } else { incCounter = NonDMLCounters.UNKNOWN; } return incCounter; } @Override public void onException(String sql, SourcePosition sp, Throwable t) { saveIntermediateCheckpoint(); logError(sql, sp, t.getMessage(), t, true); } @Override public void onNotice(String sql, SourcePosition sp, String message) { logger.warn(message + " [" + sp.getLineInfo().toString() + "]: " + sql); } protected void saveIntermediateCheckpoint() { try { checkpoint(false); } catch (final Throwable ct) { logger.error("Unable to save a checkpoint", ct); } } static final String invokerClass = InvokeParser.class.getName(); static final String invokerMethod = "parse"; /** * This is a simple format: * sql: <the sql> * error | warn: <the message> */ private void logError(String sql, SourcePosition sp, String message, Throwable cause, boolean error) { if (error) { globalCounters.increment(GlobalCounters.ERRORS); } else { globalCounters.increment(GlobalCounters.WARNINGS); } try { final String type = error ? "error" : "warn"; final String line = "(line=" + sp.getPosition() + ")"; if (message == null && cause instanceof HasErrorInfo) { FormattedErrorInfo formatted = ErrorMapper.makeResponse((HasErrorInfo)cause); if (formatted != null) message = formatted.getErrorMessage(); } errorLog.write("---- " + type + " " + line + " ---------------"); errorLog.newLine(); errorLog.write("msg: " + message); errorLog.newLine(); errorLog.write("sql: " + sql); errorLog.newLine(); if (error && stackTraces && cause != null) { for(StackTraceElement ste : cause.getStackTrace()) { errorLog.write("trace: " + ste); errorLog.newLine(); if (invokerClass.equals(ste.getClassName())) break; } } } catch (final Throwable ct) { logger.error("Unable to write error log record", ct); } } private void trackNonDML(Statement s, String sql) { String rep = null; if (s instanceof SchemaQueryStatement) { final SchemaQueryStatement sqs = (SchemaQueryStatement) s; rep = "show " + sqs.getTag(); } else { rep = sql; } nonDMLBreakout.increment(lookupNonDMLCounter(s)); trackOther(rep); } private void trackOther(String sql) { StatementNonDMLType nd = null; final String shrunk = CandidateParser.shrinkAnything(sql); if (shrunk != null) { nd = otherByShrunk.get(shrunk); } if (nd == null) { nd = otherByInput.get(sql); } if (nd == null) { nd = new StatementNonDMLType(); nd.setStmt(sql); otherByInput.put(sql, nd); if (shrunk != null) { otherByShrunk.put(shrunk, nd); } corpus.getNonDml().add(nd); } nd.setFreq(nd.getFreq() + 1); } private DbAnalyzerCorpus sortByFrequency() { final MultiMap<Integer, StatementPopulationType> byFrequency = new MultiMap<Integer, StatementPopulationType>(); for (final StatementPopulationType spt : corpus.getPopulation()) { byFrequency.put(spt.getFreq(), spt); } final List<Integer> freqs = new ArrayList<Integer>(byFrequency.keySet()); Collections.sort(freqs, Collections.reverseOrder()); corpus.getPopulation().clear(); for (final Integer i : freqs) { final Collection<StatementPopulationType> sub = byFrequency.get(i); if ((sub == null) || sub.isEmpty()) { continue; } corpus.getPopulation().addAll(sub); } return corpus; } private static class NonInsertEntry { private final StatementNonInsertType populationObject; private final DMLCounters dmlCategory; public NonInsertEntry(DbAnalyzerCorpus reportObject, String rep, String kind, String db, int literals, DMLCounters incCounter) { this.populationObject = new StatementNonInsertType(); this.populationObject.setStmt(rep); this.populationObject.setKind(kind); this.populationObject.setDb(db); this.populationObject.setLiteralCount(literals); reportObject.getPopulation().add(populationObject); dmlCategory = incCounter; } public void bump(EnumStatsCollector<DMLCounters> dmlBreakout, IntegerHistogram literalCounts) { populationObject.setFreq(populationObject.getFreq() + 1); dmlBreakout.increment(dmlCategory); literalCounts.sample(populationObject.getLiteralCount(), 1L); } } private static class InsertEntry { private final StatementInsertIntoValuesType populationObject; private final HashMap<Integer, InsertTuples> byWidth; public InsertEntry(DbAnalyzerCorpus reportObject, String prefix, int colwidth, String kind, String db) { this.populationObject = new StatementInsertIntoValuesType(); this.populationObject.setInsertPrefix(prefix); this.populationObject.setKind(kind); this.populationObject.setDb(db); this.populationObject.setColWidth(colwidth); reportObject.getPopulation().add(populationObject); byWidth = new HashMap<Integer, InsertTuples>(); } public void bump(int width) { InsertTuples tuple = byWidth.get(width); if (tuple == null) { tuple = new InsertTuples(); tuple.setTupleCount(width); tuple.setTuplePop(0); byWidth.put(width, tuple); populationObject.getPopulation().add(tuple); } tuple.setTuplePop(tuple.getTuplePop() + 1); populationObject.setFreq(populationObject.getFreq() + 1); } } private static class CachingInvoker extends AnalyzerInvoker { public CachingInvoker(Analyzer a) { super(a); } @Override public boolean omit(LineInfo info, String sql) { globalCounters.increment(GlobalCounters.LINES); final StatementCounter sc = (StatementCounter) sink; if (CandidateParser.isInsert(sql)) { return false; } final CandidateParser cp = new CandidateParser(sql); if (cp.shrink()) { final String shrunk = cp.getShrunk(); final NonInsertEntry se = sc.byShrunk.get(shrunk); if (se != null) { globalCounters.increment(GlobalCounters.SHRINK_CACHE_HITS); sc.saveIntermediateCheckpoint(); se.bump(dmlBreakout, literalCounts); return true; } globalCounters.increment(GlobalCounters.SHRINK_CACHE_MISSES); } return false; } } }