/* The contents of this file are subject to the license and copyright terms
* detailed in the license directory at the root of the source tree (also
* available online at http://fedora-commons.org/license/).
*/
package fedora.server.search;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.apache.log4j.Logger;
import fedora.server.ReadOnlyContext;
import fedora.server.Server;
import fedora.server.errors.ObjectIntegrityException;
import fedora.server.errors.QueryParseException;
import fedora.server.errors.RepositoryConfigurationException;
import fedora.server.errors.ServerException;
import fedora.server.errors.StorageDeviceException;
import fedora.server.errors.StreamIOException;
import fedora.server.errors.UnrecognizedFieldException;
import fedora.server.storage.ConnectionPool;
import fedora.server.storage.DOReader;
import fedora.server.storage.RepositoryReader;
import fedora.server.storage.types.DatastreamXMLMetadata;
import fedora.server.utilities.DateUtility;
import fedora.server.utilities.MD5Utility;
/**
* A FieldSearchResults object returned as the result of a FieldSearchSQLImpl
* search.
* <p>
* A FieldSearchResultSQLImpl is intended to be re-used in cases where the
* results of a query require more than one call to the server.
* </p>
*
* @author Chris Wilper
*/
public class FieldSearchResultSQLImpl
implements FieldSearchResult {
/** Logger for this class. */
private static final Logger LOG =
Logger.getLogger(FieldSearchResultSQLImpl.class.getName());
/* fields supporting public accessors */
private ArrayList m_objectFields;
private String m_token;
private long m_cursor = -1;
private final long m_completeListSize = -1;
private Date m_expirationDate;
private String m_nextPID;
/* invariants */
private final Connection m_conn;
private final ConnectionPool m_cPool;
private final RepositoryReader m_repoReader;
private final String[] m_resultFields;
private final int m_maxResults;
private final int m_maxSeconds;
private long m_startMillis;
/* internal state */
private Statement m_statement;
private ResultSet m_resultSet;
private long m_nextCursor = 0;
private boolean m_expired;
/**
* Construct a FieldSearchResultSQLImpl object.
* <p />
* Upon construction, a connection is obtained from the connectionPool, and
* the query is executed. (The connection will be returned to the pool only
* after the last result has been obtained from the ResultSet, the session
* is expired, or some non-recoverable error has occurred)
* <p />
* Once the ResultSet is obtained, one result is requested of it (and
* remembered for use in step()); then the call returns.
*
* @param cPool
* the connectionPool
* @param repoReader
* the provider of object field information for results
* @param resultFields
* which fields should be returned in results
* @param maxResults
* how many results should be returned at one time. This should be
* the smaller of a) the FieldSearchImpl's limit [the server limit]
* and b) the requested limit [the client limit]
* @param query
* the end-user query
*/
protected FieldSearchResultSQLImpl(ConnectionPool cPool,
RepositoryReader repoReader,
String[] resultFields,
int maxResults,
int maxSeconds,
FieldSearchQuery query)
throws SQLException, QueryParseException {
m_cPool = cPool;
m_repoReader = repoReader;
m_resultFields = resultFields;
m_maxResults = maxResults;
m_maxSeconds = maxSeconds;
m_conn = m_cPool.getConnection();
try {
m_statement = m_conn.createStatement();
m_resultSet =
m_statement
.executeQuery(logAndGetQueryText(query,
m_resultFields)); //2004.05.02 wdn5e
} catch (SQLException sqle) {
// if there's any kind of problem getting the resultSet,
// give the connection back to the pool
try {
if (m_resultSet != null) {
m_resultSet.close();
}
if (m_statement != null) {
m_statement.close();
}
if (m_conn != null) {
m_cPool.free(m_conn);
}
throw sqle;
} catch (SQLException sqle2) {
throw sqle2;
} finally {
m_resultSet = null;
m_statement = null;
}
}
}
//2004.05.02 wdn5e -- sort on selected fields
private String logAndGetQueryText(FieldSearchQuery query,
String[] resultFields) //2004.05.02 wdn5e
throws SQLException, QueryParseException {
StringBuffer queryText = new StringBuffer("SELECT");
if (query.getType() == FieldSearchQuery.TERMS_TYPE) {
queryText.append(" doFields.pid FROM doFields"
+ getWhereClause(query.getTerms()));
} else {
StringBuffer resultFieldsString = new StringBuffer();
if (resultFields.length > 0) {
String delimiter = " ";
for (String element : resultFields) {
String dbColumn = "doFields." + dcFixup(element);
resultFieldsString.append(delimiter + dbColumn);
delimiter = ", ";
}
}
queryText.append(resultFieldsString);
queryText.append(" FROM doFields");
queryText.append(getWhereClause(query.getConditions()));
// disabled sorting: see bug 78
// queryText.append(" ORDER BY");
// queryText.append(resultFieldsString);
}
String qt = queryText.toString();
LOG.debug(qt);
return qt;
}
private String getWhereClause(String terms) throws QueryParseException {
if (terms.indexOf("'") != -1) {
throw new QueryParseException("Query cannot contain the ' character.");
}
StringBuffer whereClause = new StringBuffer();
if (!terms.equals("*") && !terms.equals("")) {
whereClause.append(" WHERE");
// formulate the where clause if the terms aren't * or ""
int usedCount = 0;
boolean needsEscape = false;
for (String column : FieldSearchSQLImpl.DB_COLUMN_NAMES) {
// use only stringish columns in query
boolean use = column.indexOf("Date") == -1;
if (!use) {
if (column.equals("dcDate")) {
use = true;
}
}
if (use) {
if (usedCount > 0) {
whereClause.append(" OR");
}
String qPart = toSql(column, terms);
if (qPart.charAt(0) == ' ') {
needsEscape = true;
} else {
whereClause.append(" ");
}
whereClause.append(qPart);
usedCount++;
}
}
if (needsEscape) {
// whereClause.append(" {escape '/'}");
}
}
return whereClause.toString();
}
private String getWhereClause(List conditions) throws QueryParseException {
StringBuffer whereClause = new StringBuffer();
boolean willJoin = false;
if (conditions.size() > 0) {
boolean needsEscape = false;
whereClause.append(" WHERE");
for (int i = 0; i < conditions.size(); i++) {
Condition cond = (Condition) conditions.get(i);
if (i > 0) {
whereClause.append(" AND");
}
String op = cond.getOperator().getSymbol();
String prop = cond.getProperty();
if (prop.toLowerCase().endsWith("date")) {
// deal with dates ... cDate mDate dcmDate date
if (op.equals("~")) {
if (prop.equals("date")) {
// query for dcDate as string
String sqlPart =
toSql("doFields.dcDate", cond.getValue());
if (sqlPart.startsWith(" ")) {
needsEscape = true;
} else {
whereClause.append(' ');
}
whereClause.append(sqlPart);
} else {
throw new QueryParseException("The ~ operator "
+ "cannot be used with cDate, mDate, "
+ "or dcmDate because they are not "
+ "string-valued fields.");
}
} else { // =, <, <=, >, >=
// property must be parsable as a date... if ok,
// do (cDate, mDate, dcmDate)
// or (date) <- dcDate from dcDates table
Date dt = DateUtility.parseDateAsUTC(cond.getValue());
if (dt == null) {
throw new QueryParseException("When using "
+ "equality or inequality operators "
+ "with a date-based value, the date "
+ "must be in yyyy-MM-DD[THH:mm:ss[.SSS][Z]] "
+ "form.");
}
if (prop.equals("date")) {
// do a left join on the dcDates table...dcDate
// query will be of form:
// select pid
// from doFields
// left join dcDates on doFields.pid=dcDates.pid
// where...
if (!willJoin) {
willJoin = true;
whereClause.insert(0, " LEFT JOIN dcDates "
+ "ON doFields.pid=dcDates.pid");
}
whereClause.append(" dcDates.dcDate" + op
+ dt.getTime());
} else {
whereClause.append(" doFields." + prop + op
+ dt.getTime());
}
}
} else {
if (op.equals("=")) {
if (isDCProp(prop)) {
throw new QueryParseException("The = operator "
+ "can only be used with dates and "
+ "non-repeating fields.");
} else {
// do a real equals check... do a toSql but
// reject it if it uses "LIKE"
String sqlPart =
toSql("doFields." + prop, cond.getValue());
if (sqlPart.indexOf("LIKE ") != -1) {
throw new QueryParseException("The = "
+ "operator cannot be used with "
+ "wildcards.");
}
if (sqlPart.startsWith(" ")) {
needsEscape = true;
} else {
whereClause.append(' ');
}
whereClause.append(sqlPart);
}
} else if (op.equals("~")) {
if (isDCProp(prop)) {
// prepend dc and caps the first char first...
prop =
"dc" + prop.substring(0, 1).toUpperCase()
+ prop.substring(1);
}
// the field name is ok, so toSql it
String sqlPart =
toSql("doFields." + prop, cond.getValue());
if (sqlPart.startsWith(" ")) {
needsEscape = true;
} else {
whereClause.append(' ');
}
whereClause.append(sqlPart);
} else {
throw new QueryParseException("Can't use >, >=, <, "
+ "or <= operator on a string-based field.");
}
}
}
if (needsEscape) {
// whereClause.append(" {escape '/'}");
}
}
return whereClause.toString();
}
protected boolean isExpired() {
long passedSeconds =
(System.currentTimeMillis() - m_startMillis) / 1000;
m_expired = passedSeconds > m_maxSeconds;
LOG.debug("has fieldSearchResultSQL expired? "+m_expired +
", passed: "+passedSeconds);
if (m_expired) {
// clean up
try {
if (m_resultSet != null) {
m_resultSet.close();
}
if (m_statement != null) {
m_statement.close();
}
if (m_conn != null) {
m_cPool.free(m_conn);
}
} catch (SQLException sqle) {
} finally {
m_resultSet = null;
m_statement = null;
}
}
return m_expired;
}
/**
* Update object with the next chunk of results. if getToken() is null after
* this call, the resultSet was exhausted.
*/
protected void step() throws UnrecognizedFieldException,
ObjectIntegrityException, RepositoryConfigurationException,
StreamIOException, ServerException {
m_objectFields = new ArrayList();
int resultCount = 0;
// Run through resultSet, adding each result to m_objectFields
// for up to maxResults objects, or until the result set is
// empty, whichever comes first.
// Note: If this is the first chunk of results for the entire search,
// m_nextPID will be null, and will require the cursor to be advanced
// to the first result. For all remaining chunks, the cursor will
// already be in the correct position (and m_nextPID will have been set)
// so a call to m_resultSet.next() is not initially necessary.
try {
while (resultCount < m_maxResults
&& (m_nextPID != null || m_resultSet.next())) {
resultCount++;
// add the current object's info to m_objectFields
String pid;
if (m_nextPID == null) {
pid = m_resultSet.getString("pid");
} else {
pid = m_nextPID;
m_nextPID = null;
}
m_objectFields.add(getObjectFields(pid));
}
// done with this block. now, are there more results?
if (resultCount == m_maxResults && m_resultSet.next()) {
// yes, and we've now advanced the cursor so we must remember
// the pid so the next chunk can use it
m_nextPID = m_resultSet.getString("pid");
// generate a token, make sure the cursor is set,
// and make sure the expirationDate is set
long now = System.currentTimeMillis();
m_token = MD5Utility.getBase16Hash(hashCode() + "" + now);
m_cursor = m_nextCursor;
// keep m_nextCursor updated for next block
m_nextCursor += resultCount;
m_startMillis = now;
Date dt = new Date();
dt.setTime(m_startMillis + 1000 * m_maxSeconds);
m_expirationDate = dt;
} else {
// no, so make sure the token is null and clean up
m_token = null;
try {
if (m_resultSet != null) {
m_resultSet.close();
}
if (m_statement != null) {
m_statement.close();
}
if (m_conn != null) {
m_cPool.free(m_conn);
}
} catch (SQLException sqle2) {
throw new StorageDeviceException("Error closing statement "
+ "or result set." + sqle2.getMessage());
} finally {
m_resultSet = null;
m_statement = null;
}
}
} catch (SQLException sqle) {
try {
if (m_resultSet != null) {
m_resultSet.close();
}
if (m_statement != null) {
m_statement.close();
}
if (m_conn != null) {
m_cPool.free(m_conn);
}
throw new StorageDeviceException("Error with sql database. "
+ sqle.getMessage());
} catch (SQLException sqle2) {
throw new StorageDeviceException("Error closing statement "
+ "or result set." + sqle.getMessage()
+ sqle2.getMessage());
} finally {
m_resultSet = null;
m_statement = null;
}
}
}
/**
* For the given pid, get a reader on the object from the repository and
* return an ObjectFields object with resultFields fields populated.
*
* @param pid
* the unique identifier of the object for which the information is
* requested.
* @return ObjectFields populated with the requested fields
* @throws UnrecognizedFieldException
* if a resultFields value isn't valid
* @throws ObjectIntegrityException
* if the underlying digital object can't be parsed
* @throws RepositoryConfigurationException
* if the sax parser can't be constructed
* @throws StreamIOException
* if an error occurs while reading the serialized digital object
* stream
* @throws ServerException
* if any other kind of error occurs while reading the underlying
* object
*/
private ObjectFields getObjectFields(String pid)
throws UnrecognizedFieldException, ObjectIntegrityException,
RepositoryConfigurationException, StreamIOException,
ServerException {
DOReader r =
m_repoReader.getReader(Server.USE_DEFINITIVE_STORE,
ReadOnlyContext.EMPTY,
pid);
ObjectFields f;
// If there's a DC record available, use SAX to parse the most
// recent version of it into f.
DatastreamXMLMetadata dcmd = null;
try {
dcmd = (DatastreamXMLMetadata) r.GetDatastream("DC", null);
} catch (ClassCastException cce) {
throw new ObjectIntegrityException("Object " + r.GetObjectPID()
+ " has a DC datastream, but it's not inline XML.");
}
if (dcmd != null) {
f = new ObjectFields(m_resultFields, dcmd.getContentStream());
// add dcmDate if wanted
for (String element : m_resultFields) {
if (element.equals("dcmDate")) {
f.setDCMDate(dcmd.DSCreateDT);
}
}
} else {
f = new ObjectFields();
}
// add non-dc values from doReader for the others in m_resultFields[]
// Disseminator[] disses=null;
for (String n : m_resultFields) {
if (n.equals("pid")) {
f.setPid(pid);
}
if (n.equals("label")) {
f.setLabel(r.GetObjectLabel());
}
if (n.equals("state")) {
f.setState(r.GetObjectState());
}
if (n.equals("ownerId")) {
f.setOwnerId(r.getOwnerId());
}
if (n.equals("cDate")) {
f.setCDate(r.getCreateDate());
}
if (n.equals("mDate")) {
f.setMDate(r.getLastModDate());
}
}
return f;
}
public List objectFieldsList() {
return m_objectFields;
}
public String getToken() {
return m_token;
}
public long getCursor() {
return m_cursor;
}
public long getCompleteListSize() {
return m_completeListSize;
}
public Date getExpirationDate() {
return m_expirationDate;
}
/**
* Return a condition suitable for a SQL WHERE clause, given a column name
* and a string with a possible pattern (using * and questionmark
* wildcards).
* <p>
* </p>
* If the string has any characters that need to be escaped, it will begin
* with a space, indicating to the caller that the entire WHERE clause
* should end with " {escape '/'}".
*
* @param name
* the name of the field in the database
* @param in
* the query string, where * and ? are treated as wildcards
* @return String a suitable string for use in a SQL WHERE clause, as
* described above
*/
private static String toSql(String name, String in) {
if (!name.endsWith("pid")) {
in = in.toLowerCase(); // if it's not a PID-type field,
}
// it's case insensitive
if (name.startsWith("dc") || name.startsWith("doFields.dc")) {
StringBuffer newIn = new StringBuffer();
if (!in.startsWith("*")) {
newIn.append("* ");
}
newIn.append(in);
if (!in.endsWith("*")) {
newIn.append(" *");
}
in = newIn.toString();
}
if (in.indexOf("\\") != -1) {
// has one or more escapes, un-escape and translate
StringBuffer out = new StringBuffer();
out.append("\'");
boolean needLike = false;
boolean needEscape = false;
boolean lastWasEscape = false;
for (int i = 0; i < in.length(); i++) {
char c = in.charAt(i);
if (!lastWasEscape && c == '\\') {
lastWasEscape = true;
} else {
char nextChar = '!';
boolean useNextChar = false;
if (!lastWasEscape) {
if (c == '?') {
out.append('_');
needLike = true;
} else if (c == '*') {
out.append('%');
needLike = true;
} else {
nextChar = c;
useNextChar = true;
}
} else {
nextChar = c;
useNextChar = true;
}
if (useNextChar) {
if (nextChar == '\"') {
out.append("\\\"");
needEscape = true;
} else if (nextChar == '\'') {
out.append("\\\'");
needEscape = true;
} else if (nextChar == '%') {
out.append("\\%");
needEscape = true;
} else if (nextChar == '_') {
out.append("\\_");
needEscape = true;
} else {
out.append(nextChar);
}
}
lastWasEscape = false;
}
}
out.append("\'");
if (needLike) {
out.insert(0, " LIKE ");
} else {
// replace any \% and \_ in value string with % or _
String fixedString =
out.toString().replaceAll("\\\\%", "%")
.replaceAll("\\\\_", "_");
out = new StringBuffer();
out.append(fixedString);
out.insert(0, " = ");
}
out.insert(0, name);
if (needEscape) {
out.insert(0, ' ');
}
return out.toString();
} else {
// no escapes, just translate if needed
StringBuffer out = new StringBuffer();
out.append("\'");
boolean needLike = false;
boolean needEscape = false;
for (int i = 0; i < in.length(); i++) {
char c = in.charAt(i);
if (c == '?') {
out.append('_');
needLike = true;
} else if (c == '*') {
out.append('%');
needLike = true;
} else if (c == '\"') {
out.append("\\\"");
needEscape = true;
} else if (c == '\'') {
out.append("\\\'");
needEscape = true;
} else if (c == '%') {
out.append("\\%");
needEscape = true;
} else if (c == '_') {
out.append("\\_");
needEscape = true;
} else {
out.append(c);
}
}
out.append("\'");
if (needLike) {
out.insert(0, " LIKE ");
} else {
// replace any \% and \_ in value string with % or _
String fixedString =
out.toString().replaceAll("\\\\%", "%")
.replaceAll("\\\\_", "_");
out = new StringBuffer();
out.append(fixedString);
out.insert(0, " = ");
}
out.insert(0, name);
if (needEscape) {
out.insert(0, ' ');
}
return out.toString();
}
}
/**
* Tell whether a field name, as given in the search request, is a dublin
* core field.
*
* @param the
* field
* @return whether it's a dublin core field
*/
private static final boolean isDCProp(String in) {//2004.05.18 wdn5e wasn't static final
if (in.equals("mDate") || in.equals("dcmDate")) {
return false;
}
for (String n : FieldSearchSQLImpl.DB_COLUMN_NAMES) {
if (n.startsWith("dc")
&& n.toLowerCase().indexOf(in.toLowerCase()) == 2) { //2004.05.18 wdn5e (was -1)
return true;
}
}
return false;
}
//2004.05.18 wdn5e -- logic now needed > 1 places
private static final String dcFixup(String st) {
String dcFixed;
if (isDCProp(st)) {
dcFixed = "dc" + st.substring(0, 1).toUpperCase() + st.substring(1);
} else {
dcFixed = st;
}
return dcFixed;
}
}