/*******************************************************************************
* Copyright (c) 2004, 2007 IBM Corporation and Cambridge Semantics Incorporated.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* File: $Source: /cvsroot/slrp/boca/com.ibm.adtech.boca.model.indexer.lucene/src/com/ibm/adtech/boca/model/indexer/lucene/ModelIndexQuery.java,v $
* Created by: Wing Yung ( <a href="mailto:wingyung@us.ibm.com">wingyung@us.ibm.com </a>)
* Created on: 10/11/2005
* Revision: $Id: ModelIndexQuery.java 161 2007-07-31 14:11:06Z mroy $
*
* Contributors:
* IBM Corporation - initial API and implementation
* Cambridge Semantics Incorporated - Fork to Anzo
*******************************************************************************/
package org.openanzo.datasource.nodecentric.indexer;
import java.util.Calendar;
import java.util.Collection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeFilter;
import org.apache.lucene.util.Version;
import org.openanzo.exceptions.ExceptionConstants;
import org.openanzo.indexer.IndexerException;
import org.openanzo.indexer.lucene.LuceneConstants;
import org.openanzo.indexer.lucene.LuceneQuery;
import org.openanzo.rdf.Constants.INDEXER;
/**
* Adds a date filter to the query.
*
* This stuff should be in another class since it's not indexer-specific.
*
* A date range should be specified as part of the query string. It will have the following format:
*
* http://openanzo.orgified:<i>date_expression</i>
*
* The date expression defines a date range determined by two time expressions.
*
* Time expressions can assume one of four forms: 1) a string containing the number of ms that have elapsed since January 1, 1970, 2) * for before/after
* expressions (see below), 3) a relative time expression, or 4) an absolute time expression.
*
* Relative time expressions begin with y (year), mo (month), d (date), * h (hour) or mi (minute). The next character in a relative time expression is '-' (can
* be interpreted as minus). mo-2 means two months ago, h-4 means four hours ago, etc.
*
* Absolute time expressions also begin with y, mo, d, h, or mi. The numbers following specify the exact time, with 4 chars for the year and two for each
* subsequent time unit. mo200404 means April 2004, mi200505051921 means 7:21pm on May 5, 2005.
*
* An absolute time may be passed in alone as a time range. The implied time range goes from the specified time to one time unit beyond the specified time. so
* modified:mo200506 encapsulates all of June 2005, modified:d20050621 encapsulates all of June 21, 2005.
*
* Ranges are of the form <i>start_char</i> <i>time_expr_1</i> to <i>time_expr_2</i> <i>end_char</i>. start_char is '[' or '{'. end_char is ']' or '}'. [ and ]
* include the dates that they are adjacent to, and { and } exclude the dates that they are adjacent to. Note that in the case where both time expressions are
* relative time expressions, the start_char and end_char must be present but they will be ignored. For relative time expressions, the implied
* inclusion/exclusion is as follows:
*
* [time_expr_1 to time_expr_2}
*
* @author Wing Yung (<a href="mailto:wingyung@us.ibm.com">wingyung@us.ibm.com</a>)
*/
public class ModelIndexQuery extends LuceneQuery {
//private static final Logger log = LoggerFactory.getLogger(ModelIndexQuery.class);
private static final String PREFIX_YEAR = "y";
private static final String PREFIX_MONTH = "mo";
private static final String PREFIX_WEEK = "w";
private static final String PREFIX_DAY = "d";
private static final String PREFIX_HOUR = "h";
private static final String PREFIX_MINUTE = "mi";
/**
* Create a new ModelIndexQuery
*/
public ModelIndexQuery() {
super();
analyzer = new PerFieldAnalyzerWrapper(new StandardAnalyzer(Version.LUCENE_30));
((PerFieldAnalyzerWrapper) analyzer).addAnalyzer(INDEXER.INDEXER_FIELD_PREDICATE, new WhitespaceAnalyzer());
((PerFieldAnalyzerWrapper) analyzer).addAnalyzer(INDEXER.INDEXER_FIELD_SUBJECT, new WhitespaceAnalyzer());
((PerFieldAnalyzerWrapper) analyzer).addAnalyzer(INDEXER.INDEXER_FIELD_GRAPH_URI, new WhitespaceAnalyzer());
((PerFieldAnalyzerWrapper) analyzer).addAnalyzer(LuceneConstants.INDEXER_FIELD_CREATED_BY, new WhitespaceAnalyzer());
}
@Override
public void initialize(String defaultField, String queryStr) throws IndexerException {
super.initialize(defaultField, queryStr);
}
@Override
public void initialize(String defaultField, String queryStr, Collection<Query> terms) throws IndexerException {
super.initialize(defaultField, queryStr, terms);
}
@Override
public void initialize(String queryStr) throws IndexerException {
long[] range = new long[2];
String newQueryStr;
newQueryStr = parseModifiedOut(queryStr, range);
if (!newQueryStr.equals(queryStr)) {
if (range[0] != -1 && range[1] != -1) {
filter = new TermRangeFilter(LuceneConstants.INDEXER_FIELD_MODIFIED, DateTools.timeToString(range[0], DateTools.Resolution.MILLISECOND), DateTools.timeToString(range[1], DateTools.Resolution.MILLISECOND), true, true);
} else if (range[0] == -1) {
filter = new TermRangeFilter(LuceneConstants.INDEXER_FIELD_MODIFIED, DateTools.timeToString(0, DateTools.Resolution.MILLISECOND), DateTools.timeToString(range[1], DateTools.Resolution.MILLISECOND), true, true);
} else if (range[1] == -1) {
filter = new TermRangeFilter(LuceneConstants.INDEXER_FIELD_MODIFIED, DateTools.timeToString(range[0], DateTools.Resolution.MILLISECOND), DateTools.timeToString(Integer.MAX_VALUE, DateTools.Resolution.MILLISECOND), false, false);
} else {
// No range found...
// Probably some sort of error, because the query was changed - may lead to
// unexpected behavior. For now, change it back to the original query.
newQueryStr = queryStr;
}
}
super.initialize(newQueryStr);
}
/*
* Modified string:
* modified:xxx
* modified:[xxx TO yyy]
* modified:[xxx to yyy]
*/
static String parseModifiedOut(String queryStr, long[] newRange) throws IndexerException {
Pattern modRangePattern = Pattern.compile(LuceneConstants.INDEXER_FIELD_MODIFIED + ":(\\[|\\{)(\\S+) to ([^]]+)(\\]|\\})");
Matcher matcher = modRangePattern.matcher(queryStr);
if (matcher.find()) {
String opener = matcher.group(1);
String closer = matcher.group(4);
boolean beginIsInclusive = opener.equals("[");
boolean endIsInclusive = closer.equals("]");
long time1 = parseTimeExpression(matcher.group(2), true, true, beginIsInclusive, null);
long time2 = parseTimeExpression(matcher.group(3), true, false, endIsInclusive, null);
newRange[0] = time1;
newRange[1] = time2;
int startIndex = matcher.start();
int endIndex = matcher.group(0).length() + startIndex;
String retStr = queryStr.substring(0, startIndex).trim() + " " + queryStr.substring(endIndex).trim();
retStr = retStr.trim();
return retStr;
}
Pattern singlePattern = Pattern.compile(LuceneConstants.INDEXER_FIELD_MODIFIED + ":(\\S+)");
matcher = singlePattern.matcher(queryStr);
if (matcher.find()) {
String timeStr = matcher.group(1);
long[] range = new long[2];
long time1 = parseTimeExpression(timeStr, false, false, false, range);
long time2 = range[1];
if (newRange != null && newRange.length == 2) {
newRange[0] = time1;
newRange[1] = time2;
}
int startIndex = matcher.start();
int endIndex = matcher.group(0).length() + startIndex;
String retStr = queryStr.substring(0, startIndex).trim() + " " + queryStr.substring(endIndex).trim();
retStr = retStr.trim();
return retStr;
}
return queryStr;
}
static long parseTimeExpression(String timeExpression) throws IndexerException {
return parseTimeExpression(timeExpression, false, false, false, null);
}
/**
* Parses the given time expression.
*
* @param timeExpression
* @param isRange
* - true if this time expression is part of a range (xxx to yyy)
* @param isBegin
* - true if this time expression is the beginning marker, false if it is the ending marker. Only considered if isRange == true
* @param isInclusive
* - true if this time expression is inclusive (it is adjacent to a '[' or ']'), false if it is adjacent to a '{' or '}'. Only considered if
* isRange == true.
* @param range
* - this gets filled in by the method, if there is some reasonable interpretation of the time as a range. For example, y2004 could be
* interpreted as 1/1/2004 - 1/1/2005
* @return
* @throws IndexerException
*/
private static long parseTimeExpression(String timeExpression, boolean isRange, boolean isBegin, boolean isInclusive, long range[]) throws IndexerException {
long retval = -1;
if (timeExpression.equals("*")) {
return -1;
}
//RE re = new RE("(\\w+)((\\d\\d\\d\\d)(\\d\\d)?(\\d\\d)?(\\d\\d)?(\\d\\d)?|-(\\d+))");
long now = System.currentTimeMillis();
try {
// If it's a plain long, just parse it.
long time = Long.parseLong(timeExpression);
if (time <= 0) {
retval = now + time;
} else {
retval = time;
}
} catch (NumberFormatException nfe) {
try {
int year = 0;
int month = 0;
int day = 1;
int hour = 0;
int minute = 0;
int endyear = 0;
int endmonth = 0;
int endday = 1;
int endhour = 0;
int endminute = 0;
Pattern relativePattern = Pattern.compile("([a-z]+)-(\\d+)");
Matcher match = relativePattern.matcher(timeExpression);
String type = null;
long nummsInInterval = 0;
if (match.find()) {
Calendar cal = Calendar.getInstance();
year = endyear = cal.get(Calendar.YEAR);
month = endmonth = cal.get(Calendar.MONTH);
day = endday = cal.get(Calendar.DATE);
hour = endhour = cal.get(Calendar.HOUR_OF_DAY);
minute = endminute = cal.get(Calendar.MINUTE);
type = match.group(1);
int length = Integer.parseInt(match.group(2));
retval = now - length * nummsInInterval;
if (type.equals(PREFIX_YEAR)) {
endyear = (year -= length);
endyear++;
} else if (type.equals(PREFIX_MONTH)) {
endmonth = (month -= length);
endmonth++;
} else if (type.equals(PREFIX_WEEK)) {
endday = (day -= 7 * length);
endday += 7;
} else if (type.equals(PREFIX_DAY)) {
endday = (day -= length);
endday++;
} else if (type.equals(PREFIX_HOUR)) {
endhour = (hour -= length);
endhour++;
} else if (type.equals(PREFIX_MINUTE)) {
endminute = (minute -= length);
endminute++;
} else {
throw new IndexerException(ExceptionConstants.INDEX.INVALID_TIME_SPECIFIER, nfe, type);
}
if (isRange) {
cal = Calendar.getInstance();
/* Not sure if this makes sense for relative times.
* When you say h-3 to h-1, it's not ambiguous (it means three hours before to one hour before),
* covering a time period of two hours. The range is an implicit
* [h-3 to h-1}
*
* However, when you say h2005093011 to h2005093013 it's not entirely clear whether
* it is inclusive (covering a time period of three hours) or exclusive (covering
* a time period of two hours). This ambiguity can be resolved with []'s (inclusive) and
* {}'s (exclusive.
*
* Unfortunately, [h-3 to h-2] would mean "after three hours ago and before and including the hour
* that started two hours ago." This is somewhat confusing.
*
*/
/*
if ((isBegin && ! isInclusive)
|| (! isBegin && isInclusive)){
cal.set(endyear, endmonth, endday, endhour, endminute);
} else {
cal.set(year, month, day, hour, minute);
}
*/
cal.set(year, month, day, hour, minute);
retval = cal.getTimeInMillis();
} else {
Calendar start = Calendar.getInstance();
start.set(year, month, day, hour, minute);
Calendar end = Calendar.getInstance();
end.set(endyear, endmonth, endday, endhour, endminute);
if (range != null && range.length == 2) {
range[0] = start.getTimeInMillis();
range[1] = end.getTimeInMillis();
}
retval = start.getTimeInMillis();
}
return retval;
}
Pattern absolutePattern = Pattern.compile("([a-z]+)(\\d\\d\\d\\d)(\\d\\d)?(\\d\\d)?(\\d\\d)?(\\d\\d)?");
match = absolutePattern.matcher(timeExpression);
if (match.find()) {
type = match.group(1);
if (type.equals(PREFIX_YEAR)) {
year = Integer.parseInt(match.group(2));
endyear = year + 1;
} else if (type.equals(PREFIX_MONTH)) {
year = Integer.parseInt(match.group(2));
month = Integer.parseInt(match.group(3));
endyear = year;
endmonth = month + 1;
} else if (type.equals(PREFIX_WEEK)) {
throw new IndexerException(ExceptionConstants.INDEX.INVALID_TIME_SPECIFIER, nfe, type);
} else if (type.equals(PREFIX_DAY)) {
year = Integer.parseInt(match.group(2));
month = Integer.parseInt(match.group(3));
day = Integer.parseInt(match.group(4));
endyear = year;
endmonth = month;
endday = day + 1;
} else if (type.equals(PREFIX_HOUR)) {
year = Integer.parseInt(match.group(2));
month = Integer.parseInt(match.group(3));
day = Integer.parseInt(match.group(4));
hour = Integer.parseInt(match.group(5));
endyear = year;
endmonth = month;
endday = day;
endhour = hour + 1;
} else if (type.equals(PREFIX_MINUTE)) {
year = Integer.parseInt(match.group(2));
month = Integer.parseInt(match.group(3));
day = Integer.parseInt(match.group(4));
hour = Integer.parseInt(match.group(5));
minute = Integer.parseInt(match.group(6));
endyear = year;
endmonth = month;
endday = day;
endhour = hour;
endminute = minute + 1;
} else {
throw new IndexerException(ExceptionConstants.INDEX.INVALID_TIME_SPECIFIER, nfe, type);
}
} else {
throw new IndexerException(ExceptionConstants.INDEX.INVALID_TIME_SPECIFIER, nfe, timeExpression);
}
if (month > 0)
month -= 1;
if (endmonth > 0)
endmonth -= 1;
// If it's not a range, it's possible that it is an implicit range -
// For example, if "y2004" was specified, this implies the range
// from 1/1/2004 to 1/1/2005.
if (!isRange) {
Calendar cal = Calendar.getInstance();
// Calendar month is 0-indexed.
cal.set(year, month, day, hour, minute);
retval = cal.getTimeInMillis();
if (range != null && range.length == 2) {
range[0] = retval;
cal.set(endyear, endmonth, endday, endhour, endminute);
range[1] = cal.getTimeInMillis();
}
} else {
Calendar cal = Calendar.getInstance();
// By default, isBegin and isInclusive is handled correctly,
// as is ! isBegin and ! isInclusive.
// In the other cases, use the end of the implied interval.
if ((isBegin && !isInclusive) || (!isBegin && isInclusive)) {
cal.set(endyear, endmonth, endday, endhour, endminute);
} else {
cal.set(year, month, day, hour, minute);
}
retval = cal.getTimeInMillis();
}
} catch (NumberFormatException nfe2) {
// Date wasn't formatted correctly.
throw new IndexerException(ExceptionConstants.INDEX.INVALID_MODIFIED_TIME, nfe2, timeExpression);
} catch (StringIndexOutOfBoundsException oe) {
throw new IndexerException(ExceptionConstants.INDEX.INVALID_MODIFIED_TIME, oe, timeExpression);
}
}
return retval;
}
}