package org.olap4j; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.sql.SQLException; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Scanner; import java.util.regex.Matcher; import java.util.regex.Pattern; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import edu.kit.aifb.exrunner.ExperimentRun; import edu.kit.aifb.exrunner.model.ExperimentSystem; import edu.kit.aifb.exrunner.model.ParameterSet; import edu.kit.aifb.exrunner.model.ParameterSetProvider; import edu.kit.aifb.exrunner.model.attribute.Attribute; import edu.kit.aifb.exrunner.model.attribute.IntAttribute; import edu.kit.aifb.exrunner.model.attribute.StringAttribute; /** * Parse an Apache log file with StringTokenizer * * Results in sqlite can be queried as follows: * * sqlite3 -header /home/benedikt/Workspaces/Git-Repositories/olap4ld/OLAP4LD-trunk/testresources/bottleneck.db 'select querytime,queryname,avg(triplescount),avg(lookupscount),avg(loadvalidatedatasetstime),avg(executemetadataqueriestime),avg(generatelogicalqueryplantime),avg(generatephysicalqueryplantime),avg(executephysicalqueryplantime) from bottleneck group by queryname order by triplescount, queryname desc' * * If there is an MDX query, the logs look different. * * If there is not, we need to add/remove an earlier complete = true. * * An error with the sqlite may be because of "incomplete" query logs loaded. * */ public class LDCX_Performance_Evaluation_LogParse_Experiments { /* * We are interested in creating such a table: */ public static class LDCX_Query_Attributes { public static final Attribute QUERYTIME = new StringAttribute( "querytime"); public static final Attribute QUERYNAME = new StringAttribute( "queryname"); public static final Attribute DATASETSCOUNT = new IntAttribute( "datasetscount"); public static final Attribute TRIPLESCOUNT = new IntAttribute( "triplescount"); public static final Attribute OBSERVATIONSCOUNT = new IntAttribute( "observationscount"); public static final Attribute LOOKUPSCOUNT = new IntAttribute( "lookupscount"); public static final Attribute LOADVALIDATEDATASETSTIME = new IntAttribute( "loadvalidatedatasetstime"); public static final Attribute EXECUTEMETADATAQUERIESTIME = new IntAttribute( "executemetadataqueriestime"); public static final Attribute EXECUTEMETADATAQUERIESCOUNT = new IntAttribute( "executemetadataqueriescount"); public static final Attribute GENERATELOGICALQUERYPLANTIME = new IntAttribute( "generatelogicalqueryplantime"); public static final Attribute GENERATEPHYSICALQUERYPLANTIME = new IntAttribute( "generatephysicalqueryplantime"); public static final Attribute EXECUTEPHYSICALQUERYPLAN = new IntAttribute( "executephysicalqueryplantime"); } public static class LDCX_ParameterSetProvider extends ParameterSetProvider { public LDCX_ParameterSetProvider() { registerAttributes(LDCX_Query_Attributes.QUERYTIME, LDCX_Query_Attributes.QUERYNAME, LDCX_Query_Attributes.DATASETSCOUNT, LDCX_Query_Attributes.TRIPLESCOUNT, LDCX_Query_Attributes.OBSERVATIONSCOUNT, LDCX_Query_Attributes.LOOKUPSCOUNT, LDCX_Query_Attributes.LOADVALIDATEDATASETSTIME, LDCX_Query_Attributes.EXECUTEMETADATAQUERIESTIME, LDCX_Query_Attributes.EXECUTEMETADATAQUERIESCOUNT, LDCX_Query_Attributes.GENERATELOGICALQUERYPLANTIME, LDCX_Query_Attributes.GENERATEPHYSICALQUERYPLANTIME, LDCX_Query_Attributes.EXECUTEPHYSICALQUERYPLAN); } @Override public List<ParameterSet> getParameterSets() { List<ParameterSet> params = Lists.newArrayList(); // Parse log file and add. List<QueryLog> queryloglist = parseLogs(); // Now we have all querylogs System.out .println("Querytime | Query | Triples | Loading and validating dataset | Generating logical query plan | Executing logical query plan | Transmission"); for (QueryLog queryLog : queryloglist) { params.add(new QueryLogParameterSet(queryLog)); } return params; } /** * Parses olap4ld logs via regex specified for info logs. * * @return */ public List<QueryLog> parseLogs() { List<QueryLog> queryloglist = new ArrayList<QueryLog>(); // Example // Pattern p = Pattern.compile("^[a-zA-Z]+([0-9]+).*"); // Matcher m = p.matcher("Testing123Testing"); // // if (m.find()) { // System.out.println(m.group(1)); // } Scanner s; try { s = new Scanner(SCANNEDFILE); QueryLog query = new QueryLog(); System.out.println("Lines"); // Full example boolean complete = false; while (s.hasNextLine()) { String line = s.nextLine(); // System.out.println(line); // Oct 19, 2013 9:19:24 PM // org.olap4j.driver.olap4ld.Olap4ldStatement // executeOlapQuery if (line.contains("org.olap4j.driver.olap4ld.Olap4ldStatement executeOlapQuery")) { query.querytime = line .replace( "org.olap4j.driver.olap4ld.Olap4ldStatement executeOlapQuery", ""); } // INFO: Parse MDX: SELECT /* $session: // ldcx_performance_evaluation_testSsbOlapCustomers */ NON // EMPTY // {[httpXXX3AXXX2FXXX2Folap4ldYYYgooglecodeYYYcomXXX2FgitXXX2FOLAP4LDZZZtrunkXXX2FtestsXXX2Fssb001XXX2FttlXXX2FexampleYYYttlXXX23lo_discount],[httpXXX3AXXX2FXXX2Folap4ldYYYgooglecodeYYYcomXXX2FgitXXX2FOLAP4LDZZZtrunkXXX2FtestsXXX2Fssb001XXX2FttlXXX2FexampleYYYttlXXX23lo_extendedprice],[httpXXX3AXXX2FXXX2Folap4ldYYYgooglecodeYYYcomXXX2FgitXXX2FOLAP4LDZZZtrunkXXX2FtestsXXX2Fssb001XXX2FttlXXX2FexampleYYYttlXXX23lo_quantity],[httpXXX3AXXX2FXXX2Folap4ldYYYgooglecodeYYYcomXXX2FgitXXX2FOLAP4LDZZZtrunkXXX2FtestsXXX2Fssb001XXX2FttlXXX2FexampleYYYttlXXX23lo_revenue],[httpXXX3AXXX2FXXX2Folap4ldYYYgooglecodeYYYcomXXX2FgitXXX2FOLAP4LDZZZtrunkXXX2FtestsXXX2Fssb001XXX2FttlXXX2FexampleYYYttlXXX23lo_supplycost]} // ON COLUMNS, NON EMPTY // {Members([httpXXX3AXXX2FXXX2Folap4ldYYYgooglecodeYYYcomXXX2FgitXXX2FOLAP4LDZZZtrunkXXX2FtestsXXX2Fssb001XXX2FttlXXX2FexampleYYYttlXXX23lo_custkeyCodeList])} // ON ROWS FROM // [httpXXX3AXXX2FXXX2Folap4ldYYYgooglecodeYYYcomXXX2FgitXXX2FOLAP4LDZZZtrunkXXX2FtestsXXX2Fssb001XXX2FttlXXX2FexampleYYYttlXXX23ds] if (line.contains("Parse MDX: SELECT ")) { // Complete would be: // "^INFO: Parse MDX: SELECT /\\\\*(.+)\\\\*/.*" Pattern p = Pattern.compile("^.*/\\*(.+)\\*/.*"); Matcher m = p.matcher(line); if (m.find()) { System.out.println(m.group(1)); query.queryname = new String(m.group(1)); // We only use distinguishing part query.queryname = query.queryname.replace( "$session: ldcx_performance_evaluation_", ""); } } // INFO: Parse MDX: finished in 97ms. if (line.contains("Parse MDX: finished")) { Pattern p = Pattern.compile("^.*finished in (.+)ms.*"); Matcher m = p.matcher(line); if (m.find()) { System.out.println(m.group(1)); query.parsemdxtime = new Integer(m.group(1)); } } // INFO: Execute metadata query on Linked Data Engine: // finished // in 4038ms. if (line.contains("Execute metadata query on Linked Data Engine: finished")) { Pattern p = Pattern.compile("^.*finished in (.+)ms.*"); Matcher m = p.matcher(line); if (m.find()) { System.out.println(m.group(1)); if (query.executemetadataqueriestime == -1) { query.executemetadataqueriestime = new Integer( m.group(1)); query.executemetadataqueriescount = 1; } else { // Increase query.executemetadataqueriestime += new Integer( m.group(1)); query.executemetadataqueriescount++; } } } // INFO: Load dataset: 2 datasets to load. Pattern myp = Pattern .compile("^.*Load dataset: (.+) datasets crawled.*"); Matcher mym = myp.matcher(line); if (mym.find()) { System.out.println(mym.group(1)); if (query.datasetscount < (new Integer(mym.group(1)))) { query.datasetscount = new Integer(mym.group(1)); } } // INFO: Load dataset: loading 175 triples finished in // 3933ms. if (line.contains("Load dataset: directed crawling algorithm finished in")) { // Two groups not used, anymore. // Pattern p = Pattern // .compile("^.*loading (.+) triples finished in (.+)ms.*"); Pattern p = Pattern .compile("^.*directed crawling algorithm finished in (.+)ms.*"); Matcher m = p.matcher(line); if (m.find()) { if (query.directedcrawlingdatasetstime == -1) { System.out.println("Directed crawling:" + m.group(1)); query.directedcrawlingdatasetstime = new Integer( m.group(1)); } else { System.out.println("Directed crawling:" + m.group(1)); query.directedcrawlingdatasetstime += new Integer( m.group(1)); } } } // INFO: Run normalisation algorithm on dataset: finished in // 17ms. if (line.contains("Run normalisation algorithm on dataset: finished")) { Pattern p = Pattern.compile("^.*finished in (.+)ms.*"); Matcher m = p.matcher(line); if (m.find()) { if (query.normalisationtime == -1) { System.out.println(m.group(1)); query.normalisationtime = new Integer( m.group(1)); } else { System.out.println(m.group(1)); query.normalisationtime += new Integer( m.group(1)); } } } // INFO: Check integrity constraints on dataset: finished in // 70ms. if (line.contains("Check integrity constraints on dataset: finished")) { Pattern p = Pattern.compile("^.*finished in (.+)ms.*"); Matcher m = p.matcher(line); if (m.find()) { if (query.integrityconstraintstime == -1) { System.out.println(m.group(1)); query.integrityconstraintstime = new Integer( m.group(1)); } else { System.out.println(m.group(1)); query.integrityconstraintstime += new Integer( m.group(1)); } } } // INFO: Lookup on resource: http:// if (line.contains("Lookup on resource:")) { Pattern p = Pattern.compile("^.*Lookup on resource:.*"); Matcher m = p.matcher(line); if (m.find()) { if (query.lookupscount == -1) { query.lookupscount = 1; } else { query.lookupscount++; } } } // INFO: Load datasets: Number of loaded triples for all // datasets: 500 if (line.contains("Load datasets: Number of loaded triples for all datasets:")) { Pattern p = Pattern .compile("^.*Load datasets: Number of loaded triples for all datasets: (.+)"); Matcher m = p.matcher(line); if (m.find()) { // At the end will be largest one query.triplescount = new Integer(m.group(1)); } } // INFO: Load datasets: Number of observations for all // datasets: 500 if (line.contains("Load datasets: Number of observations for all datasets:")) { Pattern p = Pattern .compile("^.*Load datasets: Number of observations for all datasets: (.+)"); Matcher m = p.matcher(line); if (m.find()) { query.observationscount = new Integer(m.group(1)); } } // INFO: Transform MDX parse tree: finished in 5131ms. if (line.contains("Transform MDX parse tree: finished")) { Pattern p = Pattern.compile("^.*finished in (.+)ms.*"); Matcher m = p.matcher(line); if (m.find()) { System.out.println(m.group(1)); query.transformmdxparsetreetime = new Integer( m.group(1)); } } if (line.contains("Execute logical query plan: Generate physical query plan finished")) { Pattern p = Pattern.compile("^.*finished in (.+)ms.*"); Matcher m = p.matcher(line); if (m.find()) { System.out.println(m.group(1)); query.generatephysicalqueryplantime = new Integer( m.group(1)); } } if (line.contains("Execute logical query plan: Execute physical query plan finished in")) { Pattern p = Pattern.compile("^.*finished in (.+)ms.*"); Matcher m = p.matcher(line); if (m.find()) { System.out.println(m.group(1)); query.executephysicalqueryplantime = new Integer( m.group(1)); } // XXX Has to be added for LQP test since no caching complete = true; } if (line.contains("Execute logical query plan: Cache results finished in")) { Pattern p = Pattern.compile("^.*finished in (.+)ms.*"); Matcher m = p.matcher(line); if (m.find()) { System.out.println(m.group(1)); query.cacheresultstime = new Integer(m.group(1)); } complete = true; } // // This information is not available from olap4ld log // directly. Instead, we compute the userquery time by // waiting until the next query can be issued. // if // (line.contains("Run performance evaluation query: finished")) // { // // Pattern p = Pattern.compile("^.*finished in (.+)ms.*"); // Matcher m = p.matcher(line); // // if (m.find()) { // System.out.println(m.group(1)); // query.userquerytime = new Integer( // m.group(1)); // } // // // // continue; // } if (complete) { // Now, we can create a new query log queryloglist.add(query); query = new QueryLog(); complete = false; } } // Store all that has been collected, so far. // if (!complete) { // queryloglist.add(query); // } s.close(); } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } return queryloglist; } } public static class QueryLogParameterSet extends ParameterSet { public QueryLogParameterSet(QueryLog queryLog) { /* * Computes the necessary metrics: */ m_attrValues.put(LDCX_Query_Attributes.QUERYTIME, queryLog.querytime); m_attrValues.put(LDCX_Query_Attributes.QUERYNAME, queryLog.queryname); m_attrValues.put(LDCX_Query_Attributes.TRIPLESCOUNT, queryLog.triplescount); m_attrValues.put(LDCX_Query_Attributes.DATASETSCOUNT, queryLog.datasetscount); m_attrValues.put(LDCX_Query_Attributes.OBSERVATIONSCOUNT, queryLog.observationscount); m_attrValues.put(LDCX_Query_Attributes.LOOKUPSCOUNT, queryLog.lookupscount); m_attrValues.put(LDCX_Query_Attributes.LOADVALIDATEDATASETSTIME, queryLog.directedcrawlingdatasetstime + queryLog.normalisationtime + queryLog.integrityconstraintstime); // Included getCubes, therefore remove dataset loading time from it m_attrValues .put(LDCX_Query_Attributes.EXECUTEMETADATAQUERIESTIME, queryLog.executemetadataqueriestime - (queryLog.directedcrawlingdatasetstime + queryLog.normalisationtime + queryLog.integrityconstraintstime)); m_attrValues.put(LDCX_Query_Attributes.EXECUTEMETADATAQUERIESCOUNT, queryLog.executemetadataqueriescount); m_attrValues.put( LDCX_Query_Attributes.GENERATELOGICALQUERYPLANTIME, queryLog.parsemdxtime + queryLog.transformmdxparsetreetime - queryLog.executemetadataqueriestime); m_attrValues.put( LDCX_Query_Attributes.GENERATEPHYSICALQUERYPLANTIME, queryLog.generatephysicalqueryplantime); m_attrValues.put(LDCX_Query_Attributes.EXECUTEPHYSICALQUERYPLAN, queryLog.executephysicalqueryplantime + queryLog.cacheresultstime); } } public static class Bottleneck_ExperimentSystem extends ExperimentSystem { public Bottleneck_ExperimentSystem() { super("bottleneck"); } @Override public Map<Attribute, Object> open() throws IOException { return ImmutableMap.<Attribute, Object> of(); } @Override public void close() throws IOException { } @Override public Map<Attribute, Object> execute(ParameterSet query) throws IOException { return ImmutableMap.<Attribute, Object> of(); } } /** * Common fields for Apache Log demo. */ public static class QueryLog { // Ordered as happening in stream String querytime = "Default"; // Name of query String queryname = "Default"; // Time parsing MDX long parsemdxtime = 0; // Metadata queries long executemetadataqueriestime = 0; long executemetadataqueriescount = 0; // Datasets long datasetscount = 0; // Crawling long directedcrawlingdatasetstime = 0; // Normalisation long normalisationtime = 0; // Validation long integrityconstraintstime = 0; // Lookups long lookupscount = 0; // Triples long triplescount = 0; // Observations long observationscount = 0; // Transform MDX parse tree to logical query plan long transformmdxparsetreetime = 0; // Execute logical query plan long generatephysicalqueryplantime = 0; long executephysicalqueryplantime = 0; long cacheresultstime = 0; } private static final File SCANNEDFILE = new File( "/home/benedikt/Workspaces/Git-Repositories/olap4ld/OLAP4LD-trunk/log/olap4ld_0.log"); // private static final File SCANNEDFILE = new // File("/media/84F01919F0191352/Projects/2013/paper/paper-olap4ld-demo/Experiments/PerformanceStudy/catalina_bigger_5_runs_test_including_slice_2013-10-22_v2.out"); // private static final File SCANNEDFILE = new // File("/media/84F01919F0191352/Projects/2013/paper/paper-olap4ld-demo/Experiments/PerformanceStudy/catalina_bigger_5_runs_test_2013-10-20.out"); private static final File SQLITEFILE = new File( "/home/benedikt/Workspaces/Git-Repositories/olap4ld/OLAP4LD-trunk/testresources/bottleneck.db"); public static void main(String argv[]) throws ClassNotFoundException, SQLException, IOException { ExperimentRun run1 = new ExperimentRun( new Bottleneck_ExperimentSystem(), new LDCX_ParameterSetProvider(), 1, SQLITEFILE, null); run1.run(); } }