/* $Id: RepositoryHistoryManager.java 999670 2010-09-21 22:18:19Z kwright $ */
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.manifoldcf.crawler.repository;
import org.apache.manifoldcf.core.interfaces.*;
import org.apache.manifoldcf.crawler.interfaces.*;
import java.util.*;
/** This class is the manager for the history records belonging to the repository connector.
*
* <br><br>
* <b>repohistory</b>
* <table border="1" cellpadding="3" cellspacing="0">
* <tr class="TableHeadingColor">
* <th>Field</th><th>Type</th><th>Description </th>
* <tr><td>id</td><td>BIGINT</td><td>Primary Key</td></tr>
* <tr><td>owner</td><td>VARCHAR(32)</td><td>Reference:repoconnections.connectionname</td></tr>
* <tr><td>starttime</td><td>BIGINT</td><td></td></tr>
* <tr><td>endtime</td><td>BIGINT</td><td></td></tr>
* <tr><td>datasize</td><td>BIGINT</td><td></td></tr>
* <tr><td>activitytype</td><td>VARCHAR(64)</td><td></td></tr>
* <tr><td>entityid</td><td>LONGTEXT</td><td></td></tr>
* <tr><td>resultcode</td><td>VARCHAR(255)</td><td></td></tr>
* <tr><td>resultdesc</td><td>LONGTEXT</td><td></td></tr>
* </table>
* <br><br>
*
*/
public class RepositoryHistoryManager extends org.apache.manifoldcf.core.database.BaseTable
{
public static final String _rcsid = "@(#)$Id: RepositoryHistoryManager.java 999670 2010-09-21 22:18:19Z kwright $";
// Fields
protected final static String idField = "id";
protected final static String ownerNameField = "owner";
protected final static String startTimeField = "starttime";
protected final static String endTimeField = "endtime";
protected final static String dataSizeField = "datasize";
protected final static String activityTypeField = "activitytype";
protected final static String entityIdentifierField = "entityid";
protected final static String resultCodeField = "resultcode";
protected final static String resultDescriptionField = "resultdesc";
/** Thread context */
protected IThreadContext threadContext;
/** A lock manager handle. */
protected final ILockManager lockManager;
/** Constructor.
*@param database is the database instance.
*/
public RepositoryHistoryManager(IThreadContext tc, IDBInterface database)
throws ManifoldCFException
{
super(database,"repohistory");
this.threadContext = tc;
this.lockManager = LockManagerFactory.make(tc);
}
/** Install or upgrade the table.
*@param parentTable is the parent table.
*@param parentField is the parent field.
*/
public void install(String parentTable, String parentField)
throws ManifoldCFException
{
// Always have an outer loop, in case of upgrade
while (true)
{
Map existing = getTableSchema(null,null);
if (existing == null)
{
HashMap map = new HashMap();
map.put(ownerNameField,new ColumnDescription("VARCHAR(32)",false,false,parentTable,parentField,false));
map.put(idField,new ColumnDescription("BIGINT",true,false,null,null,false));
map.put(startTimeField,new ColumnDescription("BIGINT",false,false,null,null,false));
map.put(endTimeField,new ColumnDescription("BIGINT",false,false,null,null,false));
map.put(dataSizeField,new ColumnDescription("BIGINT",false,false,null,null,false));
map.put(activityTypeField,new ColumnDescription("VARCHAR(64)",false,false,null,null,false));
map.put(entityIdentifierField,new ColumnDescription("LONGTEXT",false,false,null,null,false));
map.put(resultCodeField,new ColumnDescription("VARCHAR(255)",false,true,null,null,false));
map.put(resultDescriptionField,new ColumnDescription("LONGTEXT",false,true,null,null,false));
performCreate(map,null);
}
else
{
// Upgrade code.
}
// Index management
IndexDescription ownerIndex = new IndexDescription(false,new String[]{ownerNameField});
IndexDescription startTimeIndex = new IndexDescription(false,new String[]{startTimeField});
IndexDescription endTimeIndex = new IndexDescription(false,new String[]{endTimeField});
IndexDescription activityTypeIndex = new IndexDescription(false,new String[]{activityTypeField});
// Get rid of indexes that shouldn't be there
Map indexes = getTableIndexes(null,null);
Iterator iter = indexes.keySet().iterator();
while (iter.hasNext())
{
String indexName = (String)iter.next();
IndexDescription id = (IndexDescription)indexes.get(indexName);
if (ownerIndex != null && id.equals(ownerIndex))
ownerIndex = null;
else if (startTimeIndex != null && id.equals(startTimeIndex))
startTimeIndex = null;
else if (endTimeIndex != null && id.equals(endTimeIndex))
endTimeIndex = null;
else if (activityTypeIndex == null && id.equals(activityTypeIndex))
activityTypeIndex = null;
else if (indexName.indexOf("_pkey") == -1)
// This index shouldn't be here; drop it
performRemoveIndex(indexName);
}
// Add the ones we didn't find
if (ownerIndex != null)
performAddIndex(null,ownerIndex);
if (startTimeIndex != null)
performAddIndex(null,startTimeIndex);
if (endTimeIndex != null)
performAddIndex(null,endTimeIndex);
if (activityTypeIndex != null)
performAddIndex(null,activityTypeIndex);
break;
}
}
/** Uninstall the table.
*/
public void deinstall()
throws ManifoldCFException
{
performDrop(null);
}
/** Delete all records associated with given owner.
*@param owner is the name of the owner.
*@param invKeys are the invalidation keys.
*/
public void deleteOwner(String owner)
throws ManifoldCFException
{
ArrayList params = new ArrayList();
String query = buildConjunctionClause(params,new ClauseDescription[]{
new UnitaryClause(ownerNameField,owner)});
performDelete("WHERE "+query,params,null);
}
/** Delete records older than a specified time.
*@param timeCutoff is the time, earlier than which records are removed.
*/
public void deleteOldRows(long timeCutoff)
throws ManifoldCFException
{
ArrayList params = new ArrayList();
String query = buildConjunctionClause(params,new ClauseDescription[]{
new UnitaryClause(startTimeField,"<",new Long(timeCutoff))});
performDelete("WHERE "+query,params,null);
}
/** Add row to table, and reanalyze if necessary.
*/
public Long addRow(String connectionName, long startTime, long endTime, long dataSize, String activityType,
String entityIdentifier, String resultCode, String resultDescription)
throws ManifoldCFException
{
Long id = new Long(IDFactory.make(threadContext));
if (lockManager.getSharedConfiguration().getBooleanProperty("org.apache.manifoldcf.crawler.repository.store_history",true))
{
HashMap map = new HashMap();
map.put(idField,id);
map.put(ownerNameField,connectionName);
map.put(startTimeField,new Long(startTime));
map.put(endTimeField,new Long(endTime));
map.put(dataSizeField,new Long(dataSize));
map.put(activityTypeField,activityType);
map.put(entityIdentifierField,entityIdentifier);
if (resultCode != null)
map.put(resultCodeField,resultCode);
if (resultDescription != null)
map.put(resultDescriptionField,resultDescription);
performInsert(map,null);
// Not accurate, but best we can do without overhead
noteModifications(1,0,0);
}
return id;
}
// For result analysis, we make heavy use of Postgresql's more advanced posix regular expression
// handling. The queries in general are fairly messy. There's a "front aligned" way of doing things,
// which uses the start time of a row and finds everything that overlaps the interval from "start time"
// to "start time + interval". Then there's a "rear aligned"" way of doing things, which uses the
// time range from "end time - interval" to "end time". Both sets of data must be evaluated to have a
// complete set of possible unique window positions.
//
// Some of the examples below only use one or the other alignment; they're meant to be illustrative rather
// than complete.
//
// 1) How to come up with the "total count" or "total bytes" of the events in the time window:
//
// SELECT substring(entityid from '<expr>') AS entitybucket, COUNT('x') AS eventcount
// FROM table WHERE starttime > xxx AND endtime <= yyy AND <everything else> GROUP BY entitybucket
// SELECT substring(entityid from '<expr>') AS entitybucket, SUM(bytecount) AS bytecount
// FROM table WHERE starttime > xxx AND endtime <= yyy AND <everything else> GROUP BY entitybucket
//
// Sample queries tried against test table:
// SELECT substring(url from 'gov$') AS urlbucket, COUNT('x') AS eventcount, MIN(starttime) as minstarttime, MAX(endtime) AS maxendtime FROM testtable GROUP BY urlbucket;
// SELECT substring(lower(url) from 'gov$') AS urlbucket, SUM(bytes) AS bytecount, MIN(starttime) as minstarttime, MAX(endtime) AS maxendtime FROM testtable GROUP BY urlbucket;
// SELECT substring(upper(url) from 'gov$') AS urlbucket, COUNT('x') AS eventcount, MIN(starttime) as minstarttime, MAX(endtime) AS maxendtime FROM testtable GROUP BY urlbucket;
//
// 2) How to find a set of rows within the interval window for each row in the greater range (FRONT ALIGNED!!!):
//
// SELECT t0.url AS starturl,t0.starttime AS starttime,t1.url AS secondurl,t1.starttime AS secondstart,t1.endtime AS secondend FROM testtable t0,testtable t1
// WHERE t1.starttime < t0.starttime + 15 AND t1.endtime > t0.starttime
//
// 3) Another way to do it (REAR ALIGNED!!!):
//
// SELECT t0.url AS starturl,t0.endtime AS endtime,t1.url AS secondurl,t1.starttime AS secondstart,t1.endtime AS secondend FROM testtable t0,testtable t1
// WHERE t1.starttime < t0.endtime AND t1.endtime > t0.endtime - 15
//
// 4) How to find the byte count for each of the intervals:
//
// SELECT t0.url AS starturl, t0.starttime AS windowstart, SUM(t1.bytes) AS bytecount FROM testtable t0, testtable t1
// WHERE t1.starttime < t0.starttime + 15 AND t1.endtime > t0.starttime GROUP BY starturl,windowstart;
//
// 5) How to find the byte count per bucket for each of the intervals:
//
// SELECT substring(t0.url from '^.*(gov|com)$') AS bucket, t0.starttime AS windowstart, SUM(t1.bytes) AS bytecount FROM testtable t0, testtable t1
// WHERE substring(t0.url from '^.*(gov|com)$')=substring(t1.url from '^.*(gov|com)$') AND t1.starttime < t0.starttime + 15 AND t1.endtime > t0.starttime GROUP BY bucket,windowstart;
//
// 6) How to find the max byte count for the highest interval for each bucket:
//
// SELECT t2.bucket AS bucketname, MAX(t2.bytecount) AS maxbytecount FROM (SELECT substring(t0.url from '^.*(gov|com)$') AS bucket, t0.starttime AS windowstart, SUM(t1.bytes) AS bytecount FROM testtable t0, testtable t1
// WHERE substring(t0.url from '^.*(gov|com)$')=substring(t1.url from '^.*(gov|com)$') AND t1.starttime < t0.starttime + 15 AND t1.endtime > t0.starttime GROUP BY bucket,windowstart) t2 GROUP BY bucketname;
//
// 7) But, how do we include the right start time? We want the start time from the row that yielded the max bytecount!
// So, use select distinct:
//
// SELECT DISTINCT ON (bucketname) t2.bucket AS bucketname, t2.bytecount AS maxbytecount, t2.windowstart AS windowstart FROM (SELECT substring(t0.url from '^.*(gov|com)$') AS bucket, t0.starttime AS windowstart, SUM(t1.bytes) AS bytecount FROM testtable t0, testtable t1
// WHERE substring(t0.url from '^.*(gov|com)$')=substring(t1.url from '^.*(gov|com)$') AND t1.starttime < t0.starttime + 15 AND t1.endtime > t0.starttime GROUP BY bucket,windowstart) t2 ORDER BY bucketname ASC,maxbytecount DESC;
//
// 8) How do we account for boundary conditions? E.g., fetches that start within the window but go over the window boundary?
// A: We can try to prorate based on window size. This would involve a more complex query:
//
// ... least(t0.starttime + <interval>,t1.endtime) - greatest(t0.starttime,t1.starttime) AS overlaptime ...
//
// 9) Prorated byte count, FRONT ALIGNED form and BACK ALIGNED form:
//
// ... bytes * (least(t0.starttime + <interval>,t1.endtime) - greatest(t0.starttime,t1.starttime))/(t1.endtime-t1.starttime) AS bytecount ...
// OR
// ... bytes * (least(t0.endtime,t1.endtime) - greatest(t0.endtime - <interval>,t1.starttime))/(t1.endtime-t1.starttime) AS bytecount ...
//
// But, our version of postgresql doesn't know about greatest() and least(), so do this:
//
// SELECT t0.url AS starturl,t0.starttime AS starttime,t1.url AS secondurl,t1.starttime AS secondstart,t1.endtime AS secondend,
// t1.bytes AS fullbytes,
// t1.bytes * ((case when t0.starttime + 15<t1.endtime then t0.starttime + 15 else t1.endtime end) -
// (case when t0.starttime>t1.starttime then t0.starttime else t1.starttime end))/(t1.endtime - t1.starttime) AS proratedbytes
// FROM testtable t0,testtable t1 WHERE t1.starttime < t0.starttime + 15 AND t1.endtime > t0.starttime
/** Get a simple history, based on the passed-in filtering criteria and sort order.
* The resultset returned should have the following columns: "activity","starttime","elapsedtime","resultcode","resultdesc","bytes","identifier".
*/
public IResultSet simpleReport(String connectionName, FilterCriteria criteria, SortOrder sort, int startRow, int maxRowCount)
throws ManifoldCFException
{
// Build the query.
StringBuilder sb = new StringBuilder("SELECT ");
ArrayList list = new ArrayList();
sb.append(idField).append(" AS id,").append(activityTypeField).append(" AS activity,").append(startTimeField).append(" AS starttime,(")
.append(endTimeField).append("-").append(startTimeField).append(")")
.append(" AS elapsedtime,").append(resultCodeField).append(" AS resultcode,").append(resultDescriptionField)
.append(" AS resultdesc,").append(dataSizeField).append(" AS bytes,").append(entityIdentifierField)
.append(" AS identifier FROM ").append(getTableName());
addCriteria(sb,list,"",connectionName,criteria,false);
// Note well: We can't order by "identifier" in all databases, so in order to guarantee order we use "id". This will force a specific internal
// order for the OFFSET/LIMIT clause. We include "starttime" because that's the default ordering.
addOrdering(sb,new String[]{"starttime","id"},sort);
addLimits(sb,startRow,maxRowCount);
return performQuery(sb.toString(),list,null,null,maxRowCount);
}
/** Count the number of rows specified by a given set of criteria. This can be used to make decisions
* as to whether a query based on those rows will complete in an acceptable amount of time.
*@param connectionName is the name of the connection.
*@param criteria is the filtering criteria, which selects the records of interest.
*@return the number of rows included by the criteria.
*/
public long countHistoryRows(String connectionName, FilterCriteria criteria)
throws ManifoldCFException
{
StringBuilder sb = new StringBuilder("SELECT ");
ArrayList list = new ArrayList();
sb.append(constructCountClause("*")).append(" AS countcol FROM ");
sb.append(getTableName());
addCriteria(sb,list,"",connectionName,criteria,false);
IResultSet set = performQuery(sb.toString(),list,null,null);
if (set.getRowCount() < 1)
throw new ManifoldCFException("Expected at least one row");
IResultRow row = set.getRow(0);
Long value = (Long)row.getValue("countcol");
return value.longValue();
}
/** Get the maximum number of rows a window-based report can work with.
*@return the maximum rows.
*/
public long getMaxRows()
throws ManifoldCFException
{
return getWindowedReportMaxRows();
}
/** Get a bucketed history, with sliding window, of maximum activity level.
* The resultset returned should have the following columns: "starttime","endtime","activitycount","idbucket".
* An activity is counted as being within the interval window on a prorated basis, which can lead to fractional
* counts.
*/
public IResultSet maxActivityCountReport(String connectionName, FilterCriteria filterCriteria, SortOrder sort, BucketDescription idBucket,
long interval, int startRow, int maxRowCount)
throws ManifoldCFException
{
// The query we will generate here looks like this:
// SELECT *
// FROM
// (SELECT DISTINCT ON (idbucket) t3.bucket AS idbucket, t3.activitycount AS activitycount,
// t3.windowstart AS starttime, t3.windowend AS endtime
// FROM (SELECT * FROM (SELECT t0.bucket AS bucket, t0.starttime AS windowstart, t0.starttime + <interval> AS windowend,
// SUM(CAST(((case when t0.starttime + <interval> < t1.endtime then t0.starttime + <interval> else t1.endtime end) -
// (case when t0.starttime>t1.starttime then t0.starttime else t1.starttime end)) AS double precision)
// / CAST((t1.endtime - t1.starttime) AS double precision)) AS activitycount
// FROM (SELECT DISTINCT substring(entityid from '<bucketregexp>') AS bucket, starttime FROM repohistory WHERE <criteria>) t0, repohistory t1
// WHERE t0.bucket=substring(t1.entityid from '<bucket_regexp>')
// AND t1.starttime < t0.starttime + <interval> AND t1.endtime > t0.starttime
// AND <criteria on t1>
// GROUP BY bucket,windowstart,windowend
// UNION SELECT t0a.bucket AS bucket, t0a.endtime - <interval> AS windowstart, t0a.endtime AS windowend,
// SUM(CAST(((case when t0a.endtime < t1a.endtime then t0a.endtime else t1a.endtime end) -
// (case when t0a.endtime - <interval> > t1a.starttime then t0a.endtime - <interval> else t1a.starttime end)) AS double precision)
// / CAST((t1a.endtime - t1a.starttime) AS double precision)) AS activitycount
// FROM (SELECT DISTINCT substring(entityid from '<bucketregexp>') AS bucket, endtime FROM repohistory WHERE <criteria>) t0a, repohistory t1a
// WHERE t0a.bucket=substring(t1a.entityid from '<bucket_regexp>')
// AND (t1a.starttime < t0a.endtime AND t1a.endtime > t0a.endtime - <interval>
// AND <criteria on t1a>
// GROUP BY bucket,windowstart,windowend) t2
// ORDER BY bucket ASC,activitycount DESC) t3) t4 ORDER BY xxx LIMIT yyy OFFSET zzz;
//
// There are two different intervals being considered; each one may independently contribute possible
// items to the list. One is based on the start time of the current record; the other is based on the
// end time of the current record. That's why there are two inner clauses with a UNION.
StringBuilder sb = new StringBuilder();
ArrayList list = new ArrayList();
sb.append("SELECT * FROM (SELECT t6.bucket AS bucket,")
.append("t6.windowstart AS windowstart,t6.windowend AS windowend, SUM(t6.activitycount) AS activitycount")
.append(" FROM (SELECT ");
// Turn the interval into a string, since we'll need it a lot.
String intervalString = new Long(interval).toString();
sb.append("t0.bucket AS bucket, t0.").append(startTimeField).append(" AS windowstart, t0.")
.append(startTimeField).append("+").append(intervalString).append(" AS windowend, ")
.append(constructDoubleCastClause("((CASE WHEN t0."+
startTimeField+"+"+intervalString+"<t1."+endTimeField+" THEN t0."+
startTimeField+"+"+intervalString+" ELSE t1."+endTimeField+" END) - (CASE WHEN t0."+
startTimeField+">t1."+startTimeField+" THEN t0."+startTimeField+" ELSE t1."+startTimeField+" END))"))
.append(" / ").append(constructDoubleCastClause("(t1."+endTimeField+"-t1."+startTimeField+")"))
.append(" AS activitycount FROM (SELECT DISTINCT ");
addBucketExtract(sb,list,"",entityIdentifierField,idBucket);
sb.append(" AS bucket,").append(startTimeField).append(" FROM ").append(getTableName());
addCriteria(sb,list,"",connectionName,filterCriteria,false);
sb.append(") t0,")
.append(getTableName()).append(" t1 WHERE ");
sb.append("t0.bucket=");
addBucketExtract(sb,list,"t1.",entityIdentifierField,idBucket);
sb.append(" AND t1.").append(startTimeField).append("<t0.").append(startTimeField).append("+").append(intervalString)
.append(" AND t1.").append(endTimeField).append(">t0.").append(startTimeField);
addCriteria(sb,list,"t1.",connectionName,filterCriteria,true);
sb.append(") t6 GROUP BY bucket,windowstart,windowend UNION SELECT t6a.bucket AS bucket,")
.append("t6a.windowstart AS windowstart, t6a.windowend AS windowend, SUM(t6a.activitycount) AS activitycount")
.append(" FROM (SELECT ");
sb.append("t0a.bucket AS bucket, t0a.").append(endTimeField).append("-").append(intervalString).append(" AS windowstart, t0a.")
.append(endTimeField).append(" AS windowend, ")
.append(constructDoubleCastClause("((CASE WHEN t0a."+
endTimeField+"<t1a."+endTimeField+" THEN t0a."+endTimeField+
" ELSE t1a."+endTimeField+" END) - (CASE WHEN t0a."+
endTimeField+"-"+intervalString+">t1a."+startTimeField+
" THEN t0a."+endTimeField+"-"+intervalString+" ELSE t1a."+startTimeField+" END))"))
.append(" / ").append(constructDoubleCastClause("(t1a."+endTimeField+"-t1a."+startTimeField+")"))
.append(" AS activitycount FROM (SELECT DISTINCT ");
addBucketExtract(sb,list,"",entityIdentifierField,idBucket);
sb.append(" AS bucket,").append(endTimeField).append(" FROM ").append(getTableName());
addCriteria(sb,list,"",connectionName,filterCriteria,false);
sb.append(") t0a,")
.append(getTableName()).append(" t1a WHERE ");
sb.append("t0a.bucket=");
addBucketExtract(sb,list,"t1a.",entityIdentifierField,idBucket);
sb.append(" AND t1a.").append(startTimeField).append("<t0a.").append(endTimeField)
.append(" AND t1a.").append(endTimeField).append(">t0a.").append(endTimeField).append("-").append(intervalString);
addCriteria(sb,list,"t1a.",connectionName,filterCriteria,true);
sb.append(") t6a GROUP BY bucket,windowstart,windowend) t2");
Map otherColumns = new HashMap();
otherColumns.put("idbucket","bucket");
otherColumns.put("activitycount","activitycount");
otherColumns.put("starttime","windowstart");
otherColumns.put("endtime","windowend");
StringBuilder newsb = new StringBuilder("SELECT * FROM (");
ArrayList newList = new ArrayList();
newsb.append(constructDistinctOnClause(newList,sb.toString(),list,new String[]{"idbucket"},
new String[]{"activitycount"},new boolean[]{false},otherColumns)).append(") t4");
addOrdering(newsb,new String[]{"activitycount","starttime","endtime","idbucket"},sort);
addLimits(newsb,startRow,maxRowCount);
return performQuery(newsb.toString(),newList,null,null,maxRowCount);
}
/** Get a bucketed history, with sliding window, of maximum byte count.
* The resultset returned should have the following columns: "starttime","endtime","bytecount","idbucket".
*/
public IResultSet maxByteCountReport(String connectionName, FilterCriteria filterCriteria, SortOrder sort, BucketDescription idBucket,
long interval, int startRow, int maxRowCount)
throws ManifoldCFException
{
// The query we will generate here looks like this:
// SELECT *
// FROM
// (SELECT DISTINCT ON (idbucket) t3.bucket AS idbucket, t3.bytecount AS bytecount,
// t3.windowstart AS starttime, t3.windowend AS endtime
// FROM (SELECT * FROM (SELECT t0.bucket AS bucket, t0.starttime AS windowstart, t0.starttime + <interval> AS windowend,
// SUM(t1.datasize * ((case when t0.starttime + <interval> < t1.endtime then t0.starttime + <interval> else t1.endtime end) -
// (case when t0.starttime>t1.starttime then t0.starttime else t1.starttime end))
// / (t1.endtime - t1.starttime)) AS bytecount
// FROM (SELECT DISTINCT substring(entityid from '<bucketregexp>') AS bucket, starttime FROM repohistory WHERE <criteria>) t0, repohistory t1
// WHERE t0.bucket=substring(t1.entityid from '<bucket_regexp>')
// AND t1.starttime < t0.starttime + <interval> AND t1.endtime > t0.starttime
// AND <criteria on t1>
// GROUP BY bucket,windowstart,windowend
// UNION SELECT t0a.bucket AS bucket, t0a.endtime - <interval> AS windowstart, t0a.endtime AS windowend,
// SUM(t1a.datasize * ((case when t0a.endtime < t1a.endtime then t0a.endtime else t1a.endtime end) -
// (case when t0a.endtime - <interval> > t1a.starttime then t0a.endtime - <interval> else t1a.starttime end))
// / (t1a.endtime - t1a.starttime)) AS bytecount
// FROM (SELECT DISTINCT substring(entityid from '<bucketregexp>') AS bucket, endtime FROM repohistory WHERE <criteria>) t0a, repohistory t1a
// WHERE t0a.bucket=substring(t1a.entityid from '<bucket_regexp>')
// AND (t1a.starttime < t0a.endtime AND t1a.endtime > t0a.endtime - <interval>
// AND <criteria on t1a>
// GROUP BY bucket,windowstart,windowend) t2
// ORDER BY bucket ASC,bytecount DESC) t3) t4 ORDER BY xxx LIMIT yyy OFFSET zzz;
//
// There are two different intervals being considered; each one may independently contribute possible
// items to the list. One is based on the start time of the current record; the other is based on the
// end time of the current record. That's why there are two inner clauses with a UNION.
StringBuilder sb = new StringBuilder();
ArrayList list = new ArrayList();
sb.append("SELECT * FROM (SELECT t6.bucket AS bucket,")
.append("t6.windowstart AS windowstart, t6.windowend AS windowend, SUM(t6.bytecount) AS bytecount")
.append(" FROM (SELECT ");
// Turn the interval into a string, since we'll need it a lot.
String intervalString = new Long(interval).toString();
sb.append("t0.bucket AS bucket, t0.").append(startTimeField).append(" AS windowstart, t0.")
.append(startTimeField).append("+").append(intervalString).append(" AS windowend, ")
.append("t1.").append(dataSizeField)
.append(" * ((CASE WHEN t0.")
.append(startTimeField).append("+").append(intervalString).append("<t1.").append(endTimeField)
.append(" THEN t0.").append(startTimeField).append("+").append(intervalString).append(" ELSE t1.")
.append(endTimeField).append(" END) - (CASE WHEN t0.").append(startTimeField).append(">t1.").append(startTimeField)
.append(" THEN t0.").append(startTimeField).append(" ELSE t1.").append(startTimeField)
.append(" END)) / (t1.").append(endTimeField).append("-t1.").append(startTimeField)
.append(")")
.append(" AS bytecount FROM (SELECT DISTINCT ");
addBucketExtract(sb,list,"",entityIdentifierField,idBucket);
sb.append(" AS bucket,").append(startTimeField).append(" FROM ").append(getTableName());
addCriteria(sb,list,"",connectionName,filterCriteria,false);
sb.append(") t0,")
.append(getTableName()).append(" t1 WHERE ");
sb.append("t0.bucket=");
addBucketExtract(sb,list,"t1.",entityIdentifierField,idBucket);
sb.append(" AND t1.").append(startTimeField).append("<t0.").append(startTimeField).append("+").append(intervalString)
.append(" AND t1.").append(endTimeField).append(">t0.").append(startTimeField);
addCriteria(sb,list,"t1.",connectionName,filterCriteria,true);
sb.append(") t6 GROUP BY bucket,windowstart,windowend UNION SELECT t6a.bucket AS bucket,")
.append("t6a.windowstart AS windowstart, t6a.windowend AS windowend, SUM(t6a.bytecount) AS bytecount")
.append(" FROM (SELECT ")
.append("t0a.bucket AS bucket, t0a.").append(endTimeField).append("-").append(intervalString).append(" AS windowstart, t0a.")
.append(endTimeField).append(" AS windowend, ")
.append("t1a.").append(dataSizeField).append(" * ((CASE WHEN t0a.")
.append(endTimeField).append("<t1a.").append(endTimeField)
.append(" THEN t0a.").append(endTimeField).append(" ELSE t1a.")
.append(endTimeField).append(" END) - (CASE WHEN t0a.").append(endTimeField).append("-").append(intervalString)
.append(">t1a.").append(startTimeField)
.append(" THEN t0a.").append(endTimeField).append("-").append(intervalString).append(" ELSE t1a.")
.append(startTimeField)
.append(" END)) / (t1a.").append(endTimeField).append("-t1a.").append(startTimeField)
.append(")")
.append(" AS bytecount FROM (SELECT DISTINCT ");
addBucketExtract(sb,list,"",entityIdentifierField,idBucket);
sb.append(" AS bucket,").append(endTimeField).append(" FROM ").append(getTableName());
addCriteria(sb,list,"",connectionName,filterCriteria,false);
sb.append(") t0a,")
.append(getTableName()).append(" t1a WHERE ");
sb.append("t0a.bucket=");
addBucketExtract(sb,list,"t1a.",entityIdentifierField,idBucket);
sb.append(" AND t1a.").append(startTimeField).append("<t0a.").append(endTimeField)
.append(" AND t1a.").append(endTimeField).append(">t0a.").append(endTimeField).append("-").append(intervalString);
addCriteria(sb,list,"t1a.",connectionName,filterCriteria,true);
sb.append(") t6a GROUP BY bucket,windowstart,windowend) t2");
Map otherColumns = new HashMap();
otherColumns.put("idbucket","bucket");
otherColumns.put("bytecount","bytecount");
otherColumns.put("starttime","windowstart");
otherColumns.put("endtime","windowend");
StringBuilder newsb = new StringBuilder("SELECT * FROM (");
ArrayList newList = new ArrayList();
newsb.append(constructDistinctOnClause(newList,sb.toString(),list,new String[]{"idbucket"},
new String[]{"bytecount"},new boolean[]{false},otherColumns)).append(") t4");
addOrdering(newsb,new String[]{"bytecount","starttime","endtime","idbucket"},sort);
addLimits(newsb,startRow,maxRowCount);
return performQuery(newsb.toString(),newList,null,null,maxRowCount);
}
/** Get a bucketed history of different result code/identifier combinations.
* The resultset returned should have the following columns: "eventcount","resultcodebucket","idbucket".
*/
public IResultSet resultCodesReport(String connectionName, FilterCriteria filterCriteria, SortOrder sort,
BucketDescription resultCodeBucket, BucketDescription idBucket, int startRow, int maxRowCount)
throws ManifoldCFException
{
// The query we'll use here will be:
//
// SELECT * FROM (SELECT substring(resultcode FROM '<result_regexp>') AS resultcodebucket,
// substring(entityidentifier FROM '<id_regexp>') AS idbucket,
// COUNT('x') AS eventcount FROM repohistory WHERE <criteria>) t1
// GROUP BY t1.resultcodebucket,t1.idbucket
// ORDER BY xxx LIMIT yyy OFFSET zzz
StringBuilder sb = new StringBuilder("SELECT t1.resultcodebucket,t1.idbucket,");
ArrayList list = new ArrayList();
sb.append(constructCountClause("'x'")).append(" AS eventcount FROM (SELECT ");
addBucketExtract(sb,list,"",resultCodeField,resultCodeBucket);
sb.append(" AS resultcodebucket, ");
addBucketExtract(sb,list,"",entityIdentifierField,idBucket);
sb.append(" AS idbucket FROM ").append(getTableName());
addCriteria(sb,list,"",connectionName,filterCriteria,false);
sb.append(") t1 GROUP BY resultcodebucket,idbucket");
addOrdering(sb,new String[]{"eventcount","resultcodebucket","idbucket"},sort);
addLimits(sb,startRow,maxRowCount);
return performQuery(sb.toString(),list,null,null,maxRowCount);
}
/** Turn a bucket description into a return column.
* This is complicated by the fact that the extraction code is inherently case sensitive. So if case insensitive is
* desired, that means we whack the whole thing to lower case before doing the match.
*/
protected void addBucketExtract(StringBuilder sb, ArrayList list, String columnPrefix, String columnName, BucketDescription bucketDesc)
{
boolean isSensitive = bucketDesc.isSensitive();
sb.append(constructSubstringClause(columnPrefix+columnName,"?",!isSensitive));
list.add(bucketDesc.getRegexp());
}
/** Add criteria clauses to query.
*/
protected boolean addCriteria(StringBuilder sb, ArrayList list, String fieldPrefix, String connectionName, FilterCriteria criteria, boolean whereEmitted)
{
whereEmitted = emitClauseStart(sb,whereEmitted);
sb.append(fieldPrefix).append(ownerNameField).append("=?");
list.add(connectionName);
String[] activities = criteria.getActivities();
if (activities != null)
{
whereEmitted = emitClauseStart(sb,whereEmitted);
if (activities.length == 0)
{
sb.append("0>1");
}
else
{
sb.append(fieldPrefix).append(activityTypeField).append(" IN(");
int i = 0;
while (i < activities.length)
{
if (i > 0)
sb.append(",");
String activity = activities[i++];
sb.append("?");
list.add(activity);
}
sb.append(")");
}
}
Long startTime = criteria.getStartTime();
if (startTime != null)
{
whereEmitted = emitClauseStart(sb,whereEmitted);
sb.append(fieldPrefix).append(startTimeField).append(">").append(startTime.toString());
}
Long endTime = criteria.getEndTime();
if (endTime != null)
{
whereEmitted = emitClauseStart(sb,whereEmitted);
sb.append(fieldPrefix).append(endTimeField).append("<=").append(endTime.toString());
}
RegExpCriteria entityMatch = criteria.getEntityMatch();
if (entityMatch != null)
{
whereEmitted = emitClauseStart(sb,whereEmitted);
sb.append(constructRegexpClause(fieldPrefix+entityIdentifierField,"?",entityMatch.isInsensitive()));
list.add(entityMatch.getRegexpString());
}
RegExpCriteria resultCodeMatch = criteria.getResultCodeMatch();
if (resultCodeMatch != null)
{
whereEmitted = emitClauseStart(sb,whereEmitted);
sb.append(constructRegexpClause(fieldPrefix+resultCodeField,"?",resultCodeMatch.isInsensitive()));
list.add(resultCodeMatch.getRegexpString());
}
return whereEmitted;
}
/** Emit a WHERE or an AND, depending...
*/
protected boolean emitClauseStart(StringBuilder sb, boolean whereEmitted)
{
if (whereEmitted)
sb.append(" AND ");
else
sb.append(" WHERE ");
return true;
}
/** Add ordering.
*/
protected void addOrdering(StringBuilder sb, String[] completeFieldList, SortOrder sort)
{
// Keep track of the fields we've seen
Map hash = new HashMap();
// Emit the "Order by"
sb.append(" ORDER BY ");
// Go through the specified list
int i = 0;
int count = sort.getCount();
while (i < count)
{
if (i > 0)
sb.append(",");
String column = sort.getColumn(i);
sb.append(column);
if (sort.getDirection(i) == sort.SORT_ASCENDING)
sb.append(" ASC");
else
sb.append(" DESC");
hash.put(column,column);
i++;
}
// Now, go through the complete field list, and emit sort criteria for everything
// not actually specified. This is so LIMIT and OFFSET give consistent results.
int j = 0;
while (j < completeFieldList.length)
{
String field = completeFieldList[j];
if (hash.get(field) == null)
{
if (i > 0)
sb.append(",");
sb.append(field);
// Always make it DESC order...
sb.append(" DESC");
i++;
}
j++;
}
}
/** Add limit and offset.
*/
protected void addLimits(StringBuilder sb, int startRow, int maxRowCount)
{
sb.append(" ").append(constructOffsetLimitClause(startRow,maxRowCount));
}
}