/*******************************************************************************
* Australian National University Data Commons
* Copyright (C) 2013 The Australian National University
*
* This file is part of Australian National University Data Commons.
*
* Australian National University Data Commons is free software: you
* can redistribute it and/or modify it under the terms of the GNU
* General Public License as published by the Free Software Foundation,
* either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
******************************************************************************/
package au.edu.anu.datacommons.search;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import au.edu.anu.datacommons.config.Config;
import au.edu.anu.datacommons.properties.GlobalProps;
/**
* SparqlQuery
*
* Australian National University Data Commons
*
* Generates a SPARQL query with words to search as input. The terms can be words or phrases (enclosed in double quotes) and have 'AND' and 'OR' as binary
* operators. The default binary operator is AND, which means when two words are provided without a binary operator, results containing both the terms will be
* returned. The following are valid term strings:
*
* <ul>
* <li>first second "third fourth"</li>
* <li>first OR "second third"</li>
* <li>first AND second OR third</li>
* </ul>
*
* Usage:
*
* <code>
* Sparql query = new SparqlQuery("term1 term2 etc...");
* String queryToExec = query.generateQuery();
* </code>
*
* <pre>
* Version Date Developer Description
* 0.1 14/03/2012 Rahul Khanna (RK) Initial
* 0.2 13/06/2012 Genevieve Turner (GT) Changed query string output to debug
* 0.1 17/07/2012 Genevieve Turner(GT) Added the ability to add a set of triples
* </pre>
*/
public final class SparqlQuery
{
private final Logger LOGGER = LoggerFactory.getLogger(this.getClass());
private StringBuilder sparqlQuery;
private ArrayList<String> terms;
private String[] retFields;
private String[] dcFieldsToSearch;
private ArrayList<String> prefixes;
private ArrayList<String> vars;
private ArrayList<String> triples;
private ArrayList<String> filters;
private int offset = 0;
private int limit = 0;
/**
* SparqlQuery
*
* Autralian National University Data Commons
*
* Constructor for this class.
*
* <pre>
* Version Date Developer Description
* 0.1 16/03/2012 Rahul Khanna (RK) Initial
* </pre>
*/
public SparqlQuery()
{
terms = new ArrayList<String>();
prefixes = new ArrayList<String>();
vars = new ArrayList<String>();
triples = new ArrayList<String>();
filters = new ArrayList<String>();
// Read the list of fields to search terms in.
dcFieldsToSearch = GlobalProps.getProperty(GlobalProps.PROP_SEARCH_SEARCHFIELDS).split(",");
// Read the list of fields that are returned by the query.
retFields = GlobalProps.getProperty(GlobalProps.PROP_SEARCH_RETURNFIELDS).split(",");
}
/**
* SparqlQuery
*
* Autralian National University Data Commons
*
* Overloaded Constructor for this class that accepts keywords to search for and calls the setTerms method automatically.
*
* <pre>
* Version Date Developer Description
* 0.1 28/03/2012 Rahul Khanna (RK) Initial
* </pre>
*
* @param termsString
* A space-separated list of keywords to search as a String.
*/
public SparqlQuery(String termsString)
{
this();
setTerms(termsString);
}
public SparqlQuery(String termsString, int offset, int limit)
{
this(termsString);
this.offset = offset;
this.limit = limit;
}
/**
* setTerms
*
* Autralian National University Data Commons
*
* Accepts a String of terms and splits them into individual terms.
*
* <pre>
* Version Date Developer Description
* 0.1 16/03/2012 Rahul Khanna (RK) Initial
* </pre>
*
* @param termsString
* Keywords to search. For example:
*/
public void setTerms(String termsString)
{
// Seperate out the phrases from individual words.
Matcher matcher = Pattern.compile("\"[^\"]+\"|[^\"\\s]+").matcher(termsString);
LOGGER.info("Terms split up: ");
while (matcher.find())
{
terms.add(matcher.group(0).replaceAll("\"", ""));
LOGGER.info("\"" + terms.get(terms.size() - 1) + "\"");
}
// Once the terms are set and the other elements required to create a SPARQL query for running a search.
setDefaultSearchElements();
}
/**
* getOffset
*
* Australian National University Data Commons
*
* Gets the offset value in this query.
*
* <pre>
* Version Date Developer Description
* 0.1 08/05/2012 Rahul Khanna (RK) Initial
* </pre>
*
* @return Offset as int. 0 if not specified.
*/
public int getOffset()
{
return offset;
}
/**
* setOffset
*
* Australian National University Data Commons
*
* Sets the offset value in this query.
*
* <pre>
* Version Date Developer Description
* 0.1 08/05/2012 Rahul Khanna (RK) Initial
* </pre>
*
* @param offset
* Offset as int.
*/
public void setOffset(int offset)
{
this.offset = offset;
}
/**
* getLimit
*
* Australian National University Data Commons
*
* Gets the limit value in this query that limits the number of search results returned.
*
* <pre>
* Version Date Developer Description
* 0.1 08/05/2012 Rahul Khanna (RK) Initial
* </pre>
*
* @return Limit as int.
*/
public int getLimit()
{
return limit;
}
/**
* setLimit
*
* Australian National University Data Commons
*
* Sets the limit value in this query to limit the number of search results returned.
*
* <pre>
* Version Date Developer Description
* 0.1 08/05/2012 Rahul Khanna (RK) Initial
* </pre>
*
* @param limit
* Limit as int.
*/
public void setLimit(int limit)
{
this.limit = limit;
}
/**
* addPrefix
*
* Autralian National University Data Commons
*
* Adds a namespace prefix element
*
* <pre>
* Version Date Developer Description
* 0.1 28/03/2012 Rahul Khanna (RK) Initial
* </pre>
*
* @param prefix
* A prefix string that will translate into a namespace URI. Colon following a prefix is optional and is automatically added. E.g. dc
* @param namespaceUri
* The namespace URI the prefix will be translated into. Angle brackets surrounding the URI are optional and are automatically added. E.g.
* "http://purl.org/dc/elements/1.1/"
*/
public void addPrefix(String prefix, String namespaceUri)
{
StringBuilder prefixLine = new StringBuilder();
prefixLine.append("PREFIX ");
prefixLine.append(prefix);
if (prefix.charAt(prefix.length() - 1) != ':')
prefixLine.append(":");
prefixLine.append(" ");
if (namespaceUri.charAt(0) != '<')
prefixLine.append("<");
prefixLine.append(namespaceUri);
if (namespaceUri.charAt(namespaceUri.length() - 1) != '>')
prefixLine.append(">");
prefixes.add(prefixLine.toString());
}
/**
* addVar
*
* Autralian National University Data Commons
*
* Adds a field to the list of fields whose value is included in the resultset by the SPARQL query for the rows that meet the criteria (filters). "*" can be
* used to return all fields.
*
* <pre>
* Version Date Developer Description
* 0.1 28/03/2012 Rahul Khanna (RK) Initial.
* </pre>
*
* @param var
* Field that's included in the resultset.
*/
public void addVar(String var)
{
StringBuilder formattedVar = new StringBuilder();
if (var.charAt(0) != '?')
formattedVar.append("?");
formattedVar.append(var);
vars.add(var);
}
/**
* addTriple
*
* Autralian National University Data Commons
*
* Adds a triple to the list of triples to be used for
*
* <pre>
* Version Date Developer Description
* 0.1 28/03/2012 Rahul Khanna (RK) Initial
* </pre>
*
* @param subject
* Subject as string. '?' must precede a non-literal.
* @param predicate
* A fully qualified predicate including namespace or namespace prefix if specified.
* @param object
*
* @param isOptional
*/
public void addTriple(String subject, String predicate, String object, boolean isOptional)
{
StringBuilder tripleLine = new StringBuilder();
if (isOptional)
tripleLine.append("OPTIONAL {");
tripleLine.append(subject);
tripleLine.append(" ");
tripleLine.append(predicate);
tripleLine.append(" ");
tripleLine.append(object);
if (isOptional)
tripleLine.append("}");
triples.add(tripleLine.toString());
}
/**
* addTripleSet
*
* Provides a more flexible way of adding triples. Useful for performing actions
* such as UNION.
*
* <pre>
* Version Date Developer Description
* 0.1 17/07/2012 Genevieve Turner(GT) Initial
* </pre>
*
* @param triple The set of triples to add
*/
public void addTripleSet(String triple)
{
triples.add(triple);
}
/**
* addFilter
*
* Autralian National University Data Commons
*
* Adds a filter to the list of filters that comprise the criteria that that each object should match to be included in the resultset.
*
* <pre>
* Version Date Developer Description
* 0.1 28/03/2012 Rahul Khanna (RK) Initial
* </pre>
*
* @param filter
* A complete filter item as string.
* @param operator
* The opeartor to use before this filter, such as "&&" or "||".
*/
public void addFilter(String filter, String operator)
{
StringBuilder filterLine = new StringBuilder();
if (operator != null && !operator.equals(""))
{
filterLine.append(operator);
filterLine.append(Config.NEWLINE);
}
filterLine.append("(");
filterLine.append(Config.NEWLINE);
filterLine.append(filter);
filterLine.append(Config.NEWLINE);
filterLine.append(")");
filters.add(filterLine.toString());
}
/**
* setDefaultSearchTemplate
*
* Autralian National University Data Commons
*
* Adds the default elements for a SPARQL query for search.
*
* <pre>
* Version Date Developer Description
* 0.1 28/03/2012 Rahul Khanna (RK) Initial.
* </pre>
*/
public void setDefaultSearchElements()
{
// Add DC prefix.
addPrefix("dc", "http://purl.org/dc/elements/1.1/");
// Add Vars.
for (int i = 0; i < retFields.length; i++)
addVar("?" + retFields[i]);
// Add triples.
for (int i = 0; i < dcFieldsToSearch.length; i++)
{
if (i == 0)
addTriple("?item", "dc:" + dcFieldsToSearch[i], "?" + dcFieldsToSearch[i], false);
else
addTriple("?item", "dc:" + dcFieldsToSearch[i], "?" + dcFieldsToSearch[i], true);
}
// Generate a filter for each search term that has a regular expression for each field that should be searched for the term.
for (int iTerm = 0; iTerm < terms.size(); iTerm++)
{
String operator;
if (iTerm > 0)
{
if (terms.get(iTerm).equals("OR"))
{
operator = "||";
iTerm++;
}
else if (terms.get(iTerm).equals("AND"))
{
operator = "&&";
iTerm++;
}
else
{
operator = "&&";
}
}
else
{
operator = "";
}
StringBuilder regExpForTerm = new StringBuilder();
for (int jDcField = 0; jDcField < dcFieldsToSearch.length; jDcField++)
{
if (jDcField > 0)
{
regExpForTerm.append(Config.NEWLINE);
regExpForTerm.append("|| ");
}
regExpForTerm.append("regex(");
regExpForTerm.append("?");
regExpForTerm.append(dcFieldsToSearch[jDcField]);
regExpForTerm.append(", \"");
regExpForTerm.append(terms.get(iTerm));
regExpForTerm.append("\", ");
regExpForTerm.append("\"i\")");
}
addFilter(regExpForTerm.toString(), operator);
}
}
/**
* generateQuery
*
* Autralian National University Data Commons
*
* Creates a SPARQL query using the elements of a SPARQL query (prefixes, vars, triples and filters) that searches for terms in Dublin Core fields specified
* and returns fields specified as vars.
*
* <code>
* PREFIX dc: <http://purl.org/dc/elements/1.1/> // Prefixes
* SELECT ?item ?title ?description // Vars
* {
* ?item dc:title ?title
* OPTIONAL {?item dc:creator ?creator}
* OPTIONAL {?item dc:subject ?subject}
* OPTIONAL {?item dc:description ?description}
* OPTIONAL {?item dc:publisher ?publisher}
* OPTIONAL {?item dc:contributor ?contributor} // Triples (some may be optional)
* OPTIONAL {?item dc:date ?date}
* OPTIONAL {?item dc:type ?type}
* OPTIONAL {?item dc:format ?format}
* OPTIONAL {?item dc:identifier ?identifier}
* OPTIONAL {?item dc:source ?source}
* OPTIONAL {?item dc:language ?language}
* OPTIONAL {?item dc:relation ?relation}
* OPTIONAL {?item dc:coverage ?coverage}
* OPTIONAL {?item dc:rights ?rights}
* FILTER // Filters
* (
* (
* regex(?title, "condition", "i") // "condition" is a search keyword/phrase.
* || regex(?creator, "condition", "i")
* || regex(?subject, "condition", "i")
* || regex(?description, "condition", "i")
* || regex(?publisher, "condition", "i")
* || regex(?contributor, "condition", "i")
* || regex(?date, "condition", "i")
* || regex(?type, "condition", "i")
* || regex(?format, "condition", "i")
* || regex(?identifier, "condition", "i")
* || regex(?source, "condition", "i")
* || regex(?language, "condition", "i")
* || regex(?relation, "condition", "i")
* || regex(?coverage, "condition", "i")
* || regex(?rights, "condition", "i")
* )
* )
* }
* OFFSET 0 // Offset
* LIMIT 10 // Limit
* </code>
*
* <pre>
* Version Date Developer Description
* 0.1 16/03/2012 Rahul Khanna (RK) Initial
* 0.2 13/06/2012 Genevieve Turner (GT) Changed query string output to debug
* </pre>
*
* @return The SPARQL query as a String.
*/
public String generateQuery()
{
sparqlQuery = new StringBuilder();
// Prefixes.
for (String iPrefix : prefixes)
{
sparqlQuery.append(iPrefix);
sparqlQuery.append(Config.NEWLINE);
}
// SELECT clause for vars.
sparqlQuery.append("SELECT");
for (String iVar : vars)
{
sparqlQuery.append(" ");
sparqlQuery.append(iVar);
}
// WHERE clause for triples.
sparqlQuery.append(Config.NEWLINE);
sparqlQuery.append("WHERE {");
sparqlQuery.append(Config.NEWLINE);
sparqlQuery.append("");
for (String iTriple : triples)
{
sparqlQuery.append(iTriple);
sparqlQuery.append(" .");
sparqlQuery.append(Config.NEWLINE);
}
// FILTER clause for Filters.
if (filters.size() > 0)
{
sparqlQuery.append("FILTER (");
sparqlQuery.append(Config.NEWLINE);
for (String iFilter : filters)
{
sparqlQuery.append(iFilter);
sparqlQuery.append(Config.NEWLINE);
}
sparqlQuery.append(")");
sparqlQuery.append(Config.NEWLINE);
}
sparqlQuery.append("}");
if (offset > 0)
{
sparqlQuery.append(Config.NEWLINE);
sparqlQuery.append("OFFSET ");
sparqlQuery.append(offset);
}
if (limit > 0)
{
sparqlQuery.append(Config.NEWLINE);
sparqlQuery.append("LIMIT ");
sparqlQuery.append(limit);
}
LOGGER.trace("Returning SPARQL query: {}", sparqlQuery.toString().replaceAll(Config.NEWLINE, " "));
return sparqlQuery.toString();
}
}