/* * ReportGenerator.java * * Version: $Revision: 3734 $ * * Date: $Date: 2009-04-24 04:00:19 +0000 (Fri, 24 Apr 2009) $ * * Copyright (c) 2002-2009, The DSpace Foundation. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * - Neither the name of the DSpace Foundation nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. */ package org.dspace.app.statistics; import java.io.BufferedReader; import java.io.File; import java.io.FileOutputStream; import java.io.FileReader; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.PrintWriter; import java.sql.SQLException; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.Calendar; import java.util.Date; import java.util.GregorianCalendar; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.StringTokenizer; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.dspace.content.DCValue; import org.dspace.content.Item; import org.dspace.core.ConfigurationManager; import org.dspace.core.Context; import org.dspace.handle.HandleManager; /** * This class performs the action of coordinating a usage report being * generated using the standard internal aggregation file format as a basis. * All it's configuration information must come from that file. There is the * opportunity for different output format options such as HTML. * * Use the -help flag for more information * * @author Richard Jones */ public class ReportGenerator { // set up our class globals ///////////////// // aggregators ///////////////// /** aggregator for all actions performed in the system */ private static Map actionAggregator; /** aggregator for all searches performed */ private static Map searchAggregator; /** aggregator for user logins */ private static Map userAggregator; /** aggregator for item views */ private static Map itemAggregator; /** aggregator for current archive state statistics */ private static Map archiveStats; ////////////////// // statistics config data ////////////////// /** bottom limit to output for search word analysis */ private static int searchFloor; /** bottom limit to output for item view analysis */ private static int itemFloor; /** number of items from most popular to be looked up in the database */ private static int itemLookup; /** mode to use for user email display */ private static String userEmail; /** URL of the service being analysed */ private static String url; /** Name of the service being analysed */ private static String name; /** average number of views per item */ private static int avgItemViews; /** name of the server being analysed */ private static String serverName; /** start date of this report */ private static Date startDate = null; /** end date of this report */ private static Date endDate = null; /** the time taken to build the aggregation file from the log */ private static int processTime; /** the number of log lines analysed */ private static int logLines; /** the number of warnings encountered */ private static int warnings; /** the list of results to be displayed in the general summary */ private static List generalSummary; ////////////////// // regular expressions ////////////////// /** pattern that matches an unqualified aggregator property */ private static Pattern real = Pattern.compile("^(.+)=(.+)"); ////////////////////////// // Miscellaneous variables ////////////////////////// /** process timing clock */ private static Calendar startTime = null; /** a map from log file action to human readable action */ private static Map actionMap = null; ///////////////// // report generator config data //////////////// /** the input file to build the report from */ private static String input = null; /** the log file action to human readable action map */ private static String map = ConfigurationManager.getProperty("dspace.dir") + File.separator + "config" + File.separator + "dstat.map"; /** * main method to be run from command line. See usage information for * details as to how to use the command line flags */ public static void main(String [] argv) throws Exception, SQLException { // create context as super user Context context = new Context(); context.setIgnoreAuthorization(true); String myFormat = null; String myInput = null; String myOutput = null; String myMap = null; // read in our command line options for (int i = 0; i < argv.length; i++) { if (argv[i].equals("-format")) { myFormat = argv[i+1].toLowerCase(); } if (argv[i].equals("-in")) { myInput = argv[i+1]; } if (argv[i].equals("-out")) { myOutput = argv[i+1]; } if (argv[i].equals("-map")) { myMap = argv[i+1]; } if (argv[i].equals("-help")) { usage(); System.exit(0); } } processReport(context, myFormat, myInput, myOutput, myMap); } /** * using the pre-configuration information passed here, read in the * aggregation data and output a file containing the report in the * requested format * * this method is retained for backwards compatibility, but delegates the actual * wprk to a new method * * @param context the DSpace context in which this action is performed * @param myFormat the desired output format (currently on HTML supported) * @param myInput the aggregation file to be turned into a report * @param myOutput the file into which to write the report */ public static void processReport(Context context, String myFormat, String myInput, String myOutput, String myMap) throws Exception, SQLException { // create the relevant report type // FIXME: at the moment we only support HTML report generation Report report = null; if (myFormat.equals("html")) { report = new HTMLReport(); ((HTMLReport)report).setOutput(myOutput); } if (myMap != null) { map = myMap; } ReportGenerator.processReport(context, report, myInput); } /** * using the pre-configuration information passed here, read in the * aggregation data and output a file containing the report in the * requested format */ public static void processReport(Context context, Report report, String myInput) throws Exception, SQLException { startTime = new GregorianCalendar(); /** instantiate aggregators */ actionAggregator = new HashMap(); searchAggregator = new HashMap(); userAggregator = new HashMap(); itemAggregator = new HashMap(); archiveStats = new HashMap(); actionMap = new HashMap(); /** instantite lists */ generalSummary = new ArrayList(); // set the parameters for this analysis setParameters(myInput); // pre prepare our standard file readers and buffered readers FileReader fr = null; BufferedReader br = null; // read the input file readInput(input); // load the log file action to human readable action map readMap(map); report.setStartDate(startDate); report.setEndDate(endDate); report.setMainTitle(name, serverName); // define our standard variables for re-use // FIXME: we probably don't need these once we've finished re-factoring Iterator keys = null; int i = 0; String explanation = null; int value; // FIXME: All of these sections should probably be buried in their own // custom methods Statistics overview = new Statistics(); overview.setSectionHeader("General Overview"); Iterator summaryEntries = generalSummary.iterator(); while (summaryEntries.hasNext()) { String entry = (String) summaryEntries.next(); if (actionAggregator.containsKey(entry)) { int count = Integer.parseInt((String) actionAggregator.get(entry)); overview.add(new Stat(translate(entry), count)); } } report.addBlock(overview); // prepare the archive statistics package if (archiveStats.size() > 0) { Statistics archiveInfo = prepareStats(archiveStats, true, false); archiveInfo.setSectionHeader("Archive Information"); archiveInfo.setStatName("Content Type"); archiveInfo.setResultName("Number of items"); report.addBlock(archiveInfo); } // process the items in preparation to be displayed. This includes sorting // by view number, building the links, and getting further info where // necessary Statistics viewedItems = new Statistics("Item/Handle", "Number of views", itemFloor); viewedItems.setSectionHeader("Items Viewed"); Stat[] items = new Stat[itemAggregator.size()]; keys = itemAggregator.keySet().iterator(); i = 0; while (keys.hasNext()) { String key = (String) keys.next(); String link = url + "handle/" + key; value = Integer.parseInt((String) itemAggregator.get(key)); items[i] = new Stat(key, value, link); i++; } Arrays.sort(items); String info = null; for (i = 0; i < items.length; i++) { // Allow negative value to say that all items should be looked up if (itemLookup < 0 || i < itemLookup) { info = getItemInfo(context, items[i].getKey()); } // if we get something back from the db then set it as the key, // else just use the link if (info != null) { items[i].setKey(info + " (" + items[i].getKey() + ")"); } else { items[i].setKey(items[i].getReference()); } // reset the info register info = null; } viewedItems.add(items); report.addBlock(viewedItems); // prepare a report of the full action statistics Statistics fullInfo = prepareStats(actionAggregator, true, true); fullInfo.setSectionHeader("All Actions Performed"); fullInfo.setStatName("Action"); fullInfo.setResultName("Number of times"); report.addBlock(fullInfo); // prepare the user login statistics package if (!userEmail.equals("off")) { Statistics userLogins = prepareStats(userAggregator, true, false); userLogins.setSectionHeader("User Logins"); userLogins.setStatName("User"); userLogins.setResultName("Number of logins"); if (userEmail.equals("alias")) { explanation = "(distinct addresses)"; userLogins.setExplanation(explanation); } report.addBlock(userLogins); } // prepare the search word statistics package Statistics searchWords = prepareStats(searchAggregator, true, false); searchWords.setSectionHeader("Words Searched"); searchWords.setStatName("Word"); searchWords.setResultName("Number of searches"); searchWords.setFloor(searchFloor); report.addBlock(searchWords); // FIXME: because this isn't an aggregator it can't be passed to // prepareStats; should we overload this method for use with this kind // of data? // prepare the average item views statistics if (avgItemViews > 0) { Statistics avg = new Statistics(); avg.setSectionHeader("Averaging Information"); Stat[] average = new Stat[1]; average[0] = new Stat("Average views per item", avgItemViews); avg.add(average); report.addBlock(avg); } // prepare the log line level statistics // FIXME: at the moment we only know about warnings, but future versions // should aggregate all log line levels and display here Statistics levels = new Statistics("Level", "Number of lines"); levels.setSectionHeader("Log Level Information"); Stat[] level = new Stat[1]; level[0] = new Stat("Warnings", warnings); levels.add(level); report.addBlock(levels); // get the display processing time information Calendar endTime = new GregorianCalendar(); long timeInMillis = (endTime.getTimeInMillis() - startTime.getTimeInMillis()); int outputProcessTime = (new Long(timeInMillis).intValue() / 1000); // prepare the processing information statistics Statistics process = new Statistics("Operation", ""); process.setSectionHeader("Processing Information"); Stat[] proc = new Stat[3]; proc[0] = new Stat("Log Processing Time", processTime); proc[0].setUnits("seconds"); proc[1] = new Stat("Output Processing Time", outputProcessTime); proc[1].setUnits("seconds"); proc[2] = new Stat("Log File Lines Analysed", logLines); proc[2].setUnits("lines"); process.add(proc); report.addBlock(process); report.render(); return; } /** * a standard stats block preparation method for use when an aggregator * has to be put out in its entirity. This method will not be able to * deal with complex cases, although it will perform sorting by value and * translations as per the map file if requested * * @param aggregator the aggregator that should be converted * @param sort should the resulting stats be sorted by value * @param translate translate the stat name using the map file * * @return a Statistics object containing all the relevant information */ public static Statistics prepareStats(Map aggregator, boolean sort, boolean translate) { Stat[] stats = new Stat[aggregator.size()]; if (aggregator.size() > 0) { Iterator keys = aggregator.keySet().iterator(); int i = 0; while (keys.hasNext()) { String key = (String) keys.next(); int value = Integer.parseInt((String) aggregator.get(key)); if (translate) { stats[i] = new Stat(translate(key), value); } else { stats[i] = new Stat(key, value); } i++; } if (sort) { Arrays.sort(stats); } } // add the results to the statistics object Statistics statistics = new Statistics(); statistics.add(stats); return statistics; } /** * look the given text up in the action map table and return a translated * value if one exists. If no translation exists the original text is * returned * * @param text the text to be translated * * @return a string containing either the translated text or the original * text */ public static String translate(String text) { if (actionMap.containsKey(text)) { return (String) actionMap.get(text); } else { return text; } } /** * read in the action map file which converts log file line actions into * actions which are more understandable to humans * * @param map the map file */ public static void readMap(String map) throws IOException { FileReader fr = null; BufferedReader br = null; try { // read in the map file, printing a warning if none is found String record = null; try { fr = new FileReader(map); br = new BufferedReader(fr); } catch (IOException e) { System.err.println("Failed to read map file: log file actions will be displayed without translation"); return; } // loop through the map file and read in the values while ((record = br.readLine()) != null) { Matcher matchReal = real.matcher(record); // if the line is real then read it in if (matchReal.matches()) { actionMap.put(matchReal.group(1).trim(), matchReal.group(2).trim()); } } } finally { if (br != null) try { br.close(); } catch (IOException ioe) { } if (fr != null) try { fr.close(); } catch (IOException ioe) { } } } /** * set the passed parameters up as global class variables. This has to * be done in a separate method because the API permits for running from * the command line with args or calling the processReport method statically * from elsewhere * * @param myInput regex for log file names */ public static void setParameters(String myInput) { if (myInput != null) { input = myInput; } return; } /** * read the input file and populate all the class globals with the contents * The values that come from this file form the basis of the analysis report * * @param input the aggregator file */ public static void readInput(String input) throws IOException, ParseException { FileReader fr = null; BufferedReader br = null; // read in the analysis information, throwing an error if we fail to open // the given file String record = null; try { fr = new FileReader(input); br = new BufferedReader(fr); } catch (IOException e) { System.out.println("Failed to read input file: " + input); return; } // first initialise a date format object to do our date processing // if necessary SimpleDateFormat sdf = new SimpleDateFormat("dd'/'MM'/'yyyy"); // FIXME: although this works, it is not very elegant // loop through the aggregator file and read in the values while ((record = br.readLine()) != null) { // match real lines Matcher matchReal = real.matcher(record); // pre-prepare our input strings String section = null; String key = null; String value = null; // temporary string to hold the left hand side of the equation String left = null; // match the line or skip this record if (matchReal.matches()) { // lift the values out of the matcher's result groups left = matchReal.group(1).trim(); value = matchReal.group(2).trim(); // now analyse the left hand side, splitting by ".", taking the // first token as the section and the remainder of the string // as they key if it exists StringTokenizer tokens = new StringTokenizer(left, "."); int numTokens = tokens.countTokens(); if (tokens.hasMoreTokens()) { section = tokens.nextToken(); if (numTokens > 1) { key = left.substring(section.length() + 1); } else { key = ""; } } } else { continue; } // if the line is real, then we carry on // read the analysis contents in if (section.equals("archive")) { archiveStats.put(key, value); } else if (section.equals("action")) { actionAggregator.put(key, value); } else if (section.equals("user")) { userAggregator.put(key, value); } else if (section.equals("search")) { searchAggregator.put(key, value); } else if (section.equals("item")) { itemAggregator.put(key, value); } else if (section.equals("user_email")) { userEmail = value; } else if (section.equals("item_floor")) { itemFloor = Integer.parseInt(value); } else if (section.equals("search_floor")) { searchFloor = Integer.parseInt(value); } else if (section.equals("host_url")) { url = value; } else if (section.equals("item_lookup")) { itemLookup = Integer.parseInt(value); } else if (section.equals("avg_item_views")) { try { avgItemViews = Integer.parseInt(value); } catch (NumberFormatException e) { avgItemViews = 0; } } else if (section.equals("server_name")) { serverName = value; } else if (section.equals("service_name")) { name = value; } else if (section.equals("start_date")) { startDate = sdf.parse(value); } else if (section.equals("end_date")) { endDate = sdf.parse(value); } else if (section.equals("analysis_process_time")) { processTime = Integer.parseInt(value); } else if (section.equals("general_summary")) { generalSummary.add(value); } else if (section.equals("log_lines")) { logLines = Integer.parseInt(value); } else if (section.equals("warnings")) { warnings = Integer.parseInt(value); } } // close the inputs br.close(); fr.close(); } /** * get the information for the item with the given handle * * @param context the DSpace context we are operating under * @param handle the handle of the item being looked up, in the form * 1234/567 and so forth * * @return a string containing a reference (almost citation) to the * article */ public static String getItemInfo(Context context, String handle) throws SQLException { Item item = null; // ensure that the handle exists try { item = (Item) HandleManager.resolveToObject(context, handle); } catch (Exception e) { return null; } // if no handle that matches is found then also return null if (item == null) { return null; } // build the referece // FIXME: here we have blurred the line between content and presentation // and it should probably be un-blurred DCValue[] title = item.getDC("title", null, Item.ANY); DCValue[] author = item.getDC("contributor", "author", Item.ANY); StringBuffer authors = new StringBuffer(); if (author.length > 0) { authors.append("(" + author[0].value); } if (author.length > 1) { authors.append(" et al"); } if (author.length > 0) { authors.append(")"); } String content = title[0].value + " " + authors.toString(); return content; } /** * output the usage information to the terminal */ public static void usage() { String usage = "Usage Information:\n" + "ReportGenerator [options [parameters]]\n" + "-format [output format]\n" + "\tRequired\n" + "\tSpecify the format that you would like the output in\n" + "\tOptions:\n" + "\t\thtml\n" + "-in [aggregation file]\n" + "\tRequired\n" + "\tSpecify the aggregation data file to display\n" + "-out [output file]\n" + "\tOptional\n" + "\tSpecify the file to output the report to\n" + "\tDefault uses [dspace log directory]/report\n" + "-map [map file]\n" + "\tOptional\n" + "\tSpecify the map file to translate log file actions into human readable actions\n" + "\tDefault uses [dspace config directory]/dstat.map\n" + "-help\n" + "\tdisplay this usage information\n"; System.out.println(usage); } }