/*
* StatsAggregator.java
*
* Version: $Revision: $
*
* Date: $Date: $
*
* Copyright (C) 2008, the DSpace Foundation. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* - Neither the name of the DSpace Foundation nor the names of their
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
package org.dspace.app.stats;
import java.sql.SQLException;
import java.sql.Time;
import java.sql.Types;
import java.util.List;
import java.util.ArrayList;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.MissingOptionException;
import org.apache.commons.cli.MissingArgumentException;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.dspace.core.Context;
public class StatsAggregator
{
private static boolean isVerbose = false;
private static boolean aggregate = false;
private static boolean clean = false;
private static List tables = new ArrayList();
private static String tablesLine = null;
private static ArrayList<EventTable> eventTables = new ArrayList<EventTable>();
public static void main(String[] argv) throws Exception
{
readCommandLineOptions(argv);
initAggregations();
processAggregations();
}
private static void processAggregations()
{
if (isVerbose)
System.out.println("PROCESSING tables: " + tablesLine);
Context context = null;
try
{
context = new Context();
for (EventTable table : eventTables)
{
if (isVerbose)
System.out.println("Processing table " + table.getTable());
if (clean)
{
for (Aggregation a : table.getAggregations())
{
if (isVerbose)
System.out.println(" " + new Time(new java.util.Date().getTime()).toString() +
"\tCleaning " + a.getDestination());
a.clean(context);
}
if (isVerbose)
System.out.println(" " + new Time(new java.util.Date().getTime()).toString() +
"\tFlagging records as unaggregated ");
table.flagAsUnaggregated(context);
}
if (aggregate)
{
if (isVerbose)
System.out.println(" " + new Time(new java.util.Date().getTime()).toString() +
"\tFlagging spiders ");
table.flagSpiders(context);
for (Aggregation a : table.getAggregations())
{
if (isVerbose)
System.out.println(" " + new Time(new java.util.Date().getTime()).toString() +
"\tAggregating " + a.getDestination());
a.aggregate(context);
}
if (isVerbose)
System.out.println(" " + new Time(new java.util.Date().getTime()).toString() +
"\tFlagging records as aggregated ");
table.flagAsAggregated(context);
}
if (isVerbose)
System.out.println(" End Processing table " + table.getTable());
}
context.commit();
}
catch (SQLException e)
{
System.out.println("Database error: " + e.getMessage());
System.exit(1);
}
finally
{
if ((context != null) && context.isValid())
context.abort();
}
if (isVerbose)
System.out.println("END PROCESSING ");
}
private static void initAggregations()
{
EventTable table;
Aggregation agg;
for (int i = 0; i < tables.size(); i++)
{
if (tables.get(i).equals("view"))
{
table = new EventTable("stats.view");
agg = new Aggregation("stats.z_view_unagg_month", "stats.view_month");
agg.addValueColumn("value", Types.BIGINT);
table.addAggregation(agg);
agg = new Aggregation("stats.z_view_unagg_country_month", "stats.view_country_month");
agg.addValueColumn("value", Types.BIGINT);
agg.addAggregationColumn("country_code", Types.VARCHAR);
table.addAggregation(agg);
agg = new Aggregation("stats.z_view_unagg_item_month", "stats.view_item_month");
agg.addValueColumn("value", Types.BIGINT);
agg.addAggregationColumn("item_id", Types.INTEGER);
table.addAggregation(agg);
agg = new Aggregation("stats.z_view_unagg_comm_month", "stats.view_comm_month");
agg.addValueColumn("value", Types.BIGINT);
agg.addAggregationColumn("community_id", Types.INTEGER);
table.addAggregation(agg);
agg = new Aggregation("stats.z_view_unagg_coll_month", "stats.view_coll_month");
agg.addValueColumn("value", Types.BIGINT);
agg.addAggregationColumn("collection_id", Types.INTEGER);
table.addAggregation(agg);
agg = new Aggregation("stats.z_view_unagg_country_comm_month", "stats.view_country_comm_month");
agg.addValueColumn("value", Types.BIGINT);
agg.addAggregationColumn("country_code", Types.VARCHAR);
agg.addAggregationColumn("community_id", Types.INTEGER);
table.addAggregation(agg);
agg = new Aggregation("stats.z_view_unagg_country_coll_month", "stats.view_country_coll_month");
agg.addValueColumn("value", Types.BIGINT);
agg.addAggregationColumn("country_code", Types.VARCHAR);
agg.addAggregationColumn("collection_id", Types.INTEGER);
table.addAggregation(agg);
agg = new Aggregation("stats.z_view_unagg_item_comm_month", "stats.view_item_comm_month");
agg.addValueColumn("value", Types.BIGINT);
agg.addAggregationColumn("item_id", Types.INTEGER);
agg.addAggregationColumn("community_id", Types.INTEGER);
table.addAggregation(agg);
agg = new Aggregation("stats.z_view_unagg_item_coll_month", "stats.view_item_coll_month");
agg.addValueColumn("value", Types.BIGINT);
agg.addAggregationColumn("item_id", Types.INTEGER);
agg.addAggregationColumn("collection_id", Types.INTEGER);
table.addAggregation(agg);
agg = new Aggregation("stats.z_view_unagg_metadata_month_1", "stats.view_metadata_month");
agg.addValueColumn("value", Types.BIGINT);
agg.addAggregationColumn("field_id", Types.INTEGER);
agg.addAggregationColumn("field_value", Types.VARCHAR);
table.addAggregation(agg);
agg = new Aggregation("stats.z_view_unagg_metadata_month_2", "stats.view_metadata_month");
agg.addValueColumn("value", Types.BIGINT);
agg.addAggregationColumn("field_id", Types.INTEGER);
agg.addAggregationColumn("field_value", Types.VARCHAR);
table.addAggregation(agg);
agg = new Aggregation("stats.z_view_unagg_metadata_comm_month_1", "stats.view_metadata_comm_month");
agg.addValueColumn("value", Types.BIGINT);
agg.addAggregationColumn("field_id", Types.INTEGER);
agg.addAggregationColumn("field_value", Types.VARCHAR);
agg.addAggregationColumn("community_id", Types.INTEGER);
table.addAggregation(agg);
agg = new Aggregation("stats.z_view_unagg_metadata_comm_month_2", "stats.view_metadata_comm_month");
agg.addValueColumn("value", Types.BIGINT);
agg.addAggregationColumn("field_id", Types.INTEGER);
agg.addAggregationColumn("field_value", Types.VARCHAR);
agg.addAggregationColumn("community_id", Types.INTEGER);
table.addAggregation(agg);
agg = new Aggregation("stats.z_view_unagg_metadata_coll_month_1", "stats.view_metadata_coll_month");
agg.addValueColumn("value", Types.BIGINT);
agg.addAggregationColumn("field_id", Types.INTEGER);
agg.addAggregationColumn("field_value", Types.VARCHAR);
agg.addAggregationColumn("collection_id", Types.INTEGER);
table.addAggregation(agg);
agg = new Aggregation("stats.z_view_unagg_metadata_coll_month_2", "stats.view_metadata_coll_month");
agg.addValueColumn("value", Types.BIGINT);
agg.addAggregationColumn("field_id", Types.INTEGER);
agg.addAggregationColumn("field_value", Types.VARCHAR);
agg.addAggregationColumn("collection_id", Types.INTEGER);
table.addAggregation(agg);
eventTables.add(table);
}
if (tables.get(i).equals("download"))
{
table = new EventTable("stats.download");
agg = new Aggregation("stats.z_download_unagg_month", "stats.download_month");
agg.addValueColumn("value", Types.BIGINT);
agg.addValueColumn("relative_value", Types.DOUBLE);
table.addAggregation(agg);
agg = new Aggregation("stats.z_download_unagg_country_month", "stats.download_country_month");
agg.addValueColumn("value", Types.BIGINT);
agg.addValueColumn("relative_value", Types.DOUBLE);
agg.addAggregationColumn("country_code", Types.VARCHAR);
table.addAggregation(agg);
agg = new Aggregation("stats.z_download_unagg_item_month", "stats.download_item_month");
agg.addValueColumn("value", Types.BIGINT);
agg.addValueColumn("relative_value", Types.DOUBLE);
agg.addAggregationColumn("item_id", Types.INTEGER);
table.addAggregation(agg);
agg = new Aggregation("stats.z_download_unagg_comm_month", "stats.download_comm_month");
agg.addValueColumn("value", Types.BIGINT);
agg.addValueColumn("relative_value", Types.DOUBLE);
agg.addAggregationColumn("community_id", Types.INTEGER);
table.addAggregation(agg);
agg = new Aggregation("stats.z_download_unagg_coll_month", "stats.download_coll_month");
agg.addValueColumn("value", Types.BIGINT);
agg.addValueColumn("relative_value", Types.DOUBLE);
agg.addAggregationColumn("collection_id", Types.INTEGER);
table.addAggregation(agg);
agg = new Aggregation("stats.z_download_unagg_country_comm_month", "stats.download_country_comm_month");
agg.addValueColumn("value", Types.BIGINT);
agg.addValueColumn("relative_value", Types.DOUBLE);
agg.addAggregationColumn("country_code", Types.VARCHAR);
agg.addAggregationColumn("community_id", Types.INTEGER);
table.addAggregation(agg);
agg = new Aggregation("stats.z_download_unagg_country_coll_month", "stats.download_country_coll_month");
agg.addValueColumn("value", Types.BIGINT);
agg.addValueColumn("relative_value", Types.DOUBLE);
agg.addAggregationColumn("country_code", Types.VARCHAR);
agg.addAggregationColumn("collection_id", Types.INTEGER);
table.addAggregation(agg);
agg = new Aggregation("stats.z_download_unagg_item_comm_month", "stats.download_item_comm_month");
agg.addValueColumn("value", Types.BIGINT);
agg.addValueColumn("relative_value", Types.DOUBLE);
agg.addAggregationColumn("item_id", Types.INTEGER);
agg.addAggregationColumn("community_id", Types.INTEGER);
table.addAggregation(agg);
agg = new Aggregation("stats.z_download_unagg_item_coll_month", "stats.download_item_coll_month");
agg.addValueColumn("value", Types.BIGINT);
agg.addValueColumn("relative_value", Types.DOUBLE);
agg.addAggregationColumn("item_id", Types.INTEGER);
agg.addAggregationColumn("collection_id", Types.INTEGER);
table.addAggregation(agg);
agg = new Aggregation("stats.z_download_unagg_metadata_month_1", "stats.download_metadata_month");
agg.addValueColumn("value", Types.BIGINT);
agg.addValueColumn("relative_value", Types.DOUBLE);
agg.addAggregationColumn("field_id", Types.INTEGER);
agg.addAggregationColumn("field_value", Types.VARCHAR);
table.addAggregation(agg);
agg = new Aggregation("stats.z_download_unagg_metadata_month_2", "stats.download_metadata_month");
agg.addValueColumn("value", Types.BIGINT);
agg.addValueColumn("relative_value", Types.DOUBLE);
agg.addAggregationColumn("field_id", Types.INTEGER);
agg.addAggregationColumn("field_value", Types.VARCHAR);
table.addAggregation(agg);
agg = new Aggregation("stats.z_download_unagg_metadata_comm_month_1", "stats.download_metadata_comm_month");
agg.addValueColumn("value", Types.BIGINT);
agg.addValueColumn("relative_value", Types.DOUBLE);
agg.addAggregationColumn("field_id", Types.INTEGER);
agg.addAggregationColumn("field_value", Types.VARCHAR);
agg.addAggregationColumn("community_id", Types.INTEGER);
table.addAggregation(agg);
agg = new Aggregation("stats.z_download_unagg_metadata_comm_month_2", "stats.download_metadata_comm_month");
agg.addValueColumn("value", Types.BIGINT);
agg.addValueColumn("relative_value", Types.DOUBLE);
agg.addAggregationColumn("field_id", Types.INTEGER);
agg.addAggregationColumn("field_value", Types.VARCHAR);
agg.addAggregationColumn("community_id", Types.INTEGER);
table.addAggregation(agg);
agg = new Aggregation("stats.z_download_unagg_metadata_coll_month_1", "stats.download_metadata_coll_month");
agg.addValueColumn("value", Types.BIGINT);
agg.addValueColumn("relative_value", Types.DOUBLE);
agg.addAggregationColumn("field_id", Types.INTEGER);
agg.addAggregationColumn("field_value", Types.VARCHAR);
agg.addAggregationColumn("collection_id", Types.INTEGER);
table.addAggregation(agg);
agg = new Aggregation("stats.z_download_unagg_metadata_coll_month_2", "stats.download_metadata_coll_month");
agg.addValueColumn("value", Types.BIGINT);
agg.addValueColumn("relative_value", Types.DOUBLE);
agg.addAggregationColumn("field_id", Types.INTEGER);
agg.addAggregationColumn("field_value", Types.VARCHAR);
agg.addAggregationColumn("collection_id", Types.INTEGER);
table.addAggregation(agg);
eventTables.add(table);
}
if (tables.get(i).equals("search"))
{
table = new EventTable("stats.search");
agg = new Aggregation("stats.z_search_unagg_month", "stats.search_month");
agg.addValueColumn("value", Types.BIGINT);
table.addAggregation(agg);
agg = new Aggregation("stats.z_search_unagg_words_month", "stats.search_words_month");
agg.addValueColumn("value", Types.BIGINT);
agg.addAggregationColumn("word", Types.VARCHAR);
table.addAggregation(agg);
eventTables.add(table);
}
}
}
private static Options setCommandLineOptions()
{
// create an options object and populate it
Options options = new Options();
OptionBuilder.withLongOpt("tables");
OptionBuilder.withValueSeparator(',');
OptionBuilder.withDescription(
"Run the clean and/or aggregation for the \n specified table(s).\n" +
"Possible values are:\n all, view, download, search\n" +
"Separate multiple with a comma (,)\n" +
"Default is all");
Option tables = OptionBuilder.create('t');
tables.setArgs(Option.UNLIMITED_VALUES);
options.addOption(tables);
options.addOption("a", "aggregate", false, "aggregate the unaggregated tables");
options.addOption("c", "clean", false, "clean the current aggregation");
options.addOption("v", "verbose", false, "print aggregation logging to STDOUT");
options.addOption("h", "help", false, "help");
return options;
}
private static void readCommandLineOptions(String[] argv)
{
// set up command line parser
CommandLineParser parser = new PosixParser();
CommandLine line = null;
Options options = setCommandLineOptions();
try
{
line = parser.parse(options, argv);
}
catch(MissingArgumentException e)
{
System.out.println("Missing Argument: " + e.getMessage());
new HelpFormatter().printHelp("StatsAggregator\n", options);
System.exit(1);
}
catch (ParseException e)
{
System.out.println("ERROR: " + e.getMessage());
new HelpFormatter().printHelp("StatsAggregator\n", options);
System.exit(1);
}
if (line.hasOption('h'))
{
new HelpFormatter().printHelp("StatsAggregator\n", options);
System.exit(0);
}
if (!line.hasOption('a') && !line.hasOption('c'))
{
System.out.println("You have to specify if you want to \n" +
"clean the current aggregation or \n" +
"aggregate the unaggregated tables (or both).");
new HelpFormatter().printHelp("StatsAggregator\n", options);
System.exit(0);
}
if (line.hasOption('v'))
{
isVerbose = true;
}
if (line.hasOption('c'))
{
clean = true;
}
if (line.hasOption('a'))
{
aggregate = true;
}
String temp[] = null;
if(line.hasOption('t'))
{
temp = line.getOptionValues('t');
}
if(temp==null || temp.length==0)
{
tables.add("view");
tables.add("download");
tables.add("search");
}
else
{
for (int i = 0; i < temp.length; i++)
{
if (temp[i].equals("all"))
{
tables.add("view");
tables.add("download");
tables.add("search");
break;
}
if (temp[i].equals("view"))
tables.add("view");
else
if (temp[i].equals("download"))
tables.add("download");
else
if (temp[i].equals("search"))
tables.add("search");
}
}
tablesLine = "";
for (int i = 0; i < tables.size(); i++)
tablesLine = tablesLine + tables.get(i) + (i == tables.size() - 1 ? "" : ", ");
}
}