/*
* Copyright (C) 2012 Jan Pokorsky
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package cz.cas.lib.proarc.common.fedora;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.yourmediashelf.fedora.client.FedoraClient;
import com.yourmediashelf.fedora.client.FedoraClientException;
import com.yourmediashelf.fedora.client.request.RiSearch;
import com.yourmediashelf.fedora.client.response.FindObjectsResponse;
import com.yourmediashelf.fedora.client.response.RiSearchResponse;
import cz.cas.lib.proarc.common.fedora.RemoteStorage.RemoteObject;
import cz.cas.lib.proarc.common.fedora.relation.RelationEditor;
import cz.cas.lib.proarc.common.fedora.relation.RelationResource;
import cz.cas.lib.proarc.common.json.JsonUtils;
import cz.cas.lib.proarc.common.object.HasDataHandler;
import cz.cas.lib.proarc.common.object.model.MetaModel;
import cz.cas.lib.proarc.common.object.model.MetaModelRepository;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Scanner;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* Implements search queries with ITQL.
*
* <p>It will require an interface to implement an alternative search that
* does not support ITQL.
*
* @author Jan Pokorsky
*/
public final class SearchView {
private static final Logger LOG = Logger.getLogger(SearchView.class.getName());
private static final String QUERY_LAST_CREATED = readQuery("lastCreated.itql");
private static final String QUERY_FIND_BY_MODEL = readQuery("findByModel.itql");
private static final String QUERY_FIND_MEMBERS = readQuery("findMembers.itql");
private static final String QUERY_FIND_MEMBER_HIERARCHY = readQuery("findMemberHierarchy.itql");
private static final String QUERY_FIND_PIDS = readQuery("findPids.itql");
private static final String QUERY_FIND_REFERRERS = readQuery("findReferrers.itql");
private static final String QUERY_FIND_DEVICE_REFERRERS = readQuery("findDeviceReferrers.itql");
private final FedoraClient fedora;
private final int maxLimit;
private final RemoteStorage storage;
private Locale locale = Locale.ENGLISH;
private ObjectMapper mapper;
SearchView(RemoteStorage storage) {
this(storage, 100);
}
SearchView(RemoteStorage storage, int maxLimit) {
this.storage = storage;
this.fedora = storage.getClient();
this.maxLimit = maxLimit;
}
public void setLocale(Locale locale) {
if (locale == null) {
throw new NullPointerException("locale");
}
this.locale = locale;
}
/**
* @see #findQuery(cz.cas.lib.proarc.common.fedora.SearchView.Query)
*/
public List<Item> findQuery(String title, String label, String identifier, String owner, String model, Collection<String> hasOwners)
throws FedoraClientException, IOException {
return findQuery(new Query().setTitle(title).setLabel(label)
.setIdentifier(identifier).setOwner(owner).setModel(model)
.setHasOwners(hasOwners));
}
/**
* Finds objects matching passed fields using the Fedora Basic Search.
* Matching objects are filtered with {@link #find(java.lang.String[]) }
* to return only ProArc objects.
*
* @return limited list of objects.
* @see <a href='https://wiki.duraspace.org/display/FEDORA35/Basic+Search'>Fedora Basic Search</a>
*/
public List<Item> findQuery(Query q)
throws FedoraClientException, IOException {
final int objectsLimit = 80;
StringBuilder query = new StringBuilder();
if (q.getModel() != null && !q.getModel().isEmpty()) {
query.append("type~").append(q.getModel());
}
// FedoraClient.findObjects() does not support OR operator!
if (!q.getHasOwners().isEmpty()) {
query.append(" rights~").append(q.getHasOwners().iterator().next());
}
buildQuery(query, "title", q.getTitle());
buildQuery(query, "label", q.getLabel());
buildQuery(query, "identifier", q.getIdentifier());
buildQuery(query, "ownerId", q.getOwner());
buildQuery(query, "creator", q.getCreator());
final String queryString = query.toString().trim();
LOG.fine(queryString);
FindObjectsResponse response = FedoraClient.findObjects().query(queryString).resultFormat("xml")
.pid()
.maxResults(objectsLimit)
.execute(fedora);
List<String> pids = response.getPids();
if (LOG.isLoggable(Level.FINE)) {
LOG.fine("pids count: " + pids.size() + ", token: " + response.getToken() + ", pids: " + pids.toString());
}
List<Item> result = new ArrayList<Item>(maxLimit);
while (!pids.isEmpty()) {
List<Item> items = find(pids.toArray(new String[pids.size()]));
result.addAll(items);
String token = response.getToken();
if (token == null || result.size() + objectsLimit > maxLimit) {
break;
}
response = FedoraClient.findObjects().query(queryString).resultFormat("xml").pid()
.maxResults(objectsLimit).sessionToken(token)
.execute(fedora);
pids = response.getPids();
if (LOG.isLoggable(Level.FINE)) {
LOG.fine("resumed: pids count: " + pids.size() + ", token: " + response.getToken() + ", pids: " + pids.toString());
}
}
return result;
}
/**
* Finds objects matching passed phrase using the Fedora Basic Search.
* Matching objects are filtered with {@link #find(java.lang.String[]) }
* to return only ProArc objects.
*
* @param phrase phrase to search in any field of the object
* @return limited list of objects.
* @see <a href='https://wiki.duraspace.org/display/FEDORA35/Basic+Search'>Fedora Basic Search</a>
*/
public List<Item> findPhrase(String phrase) throws FedoraClientException, IOException {
final int objectsLimit = 80;
phrase = normalizePhrase(phrase);
FindObjectsResponse response = FedoraClient.findObjects().terms(phrase).resultFormat("xml")
.pid()
.maxResults(objectsLimit)
.execute(fedora);
List<String> pids = response.getPids();
if (LOG.isLoggable(Level.FINE)) {
LOG.fine("pids count: " + pids.size() + ", token: " + response.getToken() + ", pids: " + pids.toString());
}
List<Item> result = new ArrayList<Item>(maxLimit);
while (!pids.isEmpty()) {
List<Item> items = find(true, pids.toArray(new String[pids.size()]));
result.addAll(items);
String token = response.getToken();
if (token == null || result.size() + objectsLimit > maxLimit) {
break;
}
response = FedoraClient.findObjects().terms(phrase).resultFormat("xml").pid()
.maxResults(objectsLimit).sessionToken(token)
.execute(fedora);
pids = response.getPids();
if (LOG.isLoggable(Level.FINE)) {
LOG.fine("resumed: pids count: " + pids.size() + ", token: " + response.getToken() + ", pids: " + pids.toString());
}
}
return result;
}
static StringBuilder buildQuery(StringBuilder builder, String field, String value) {
if (value == null || value.isEmpty()) {
return builder;
}
// remove leading and trailing white spaces and asterisks
value = value.replaceAll("^[\\s\\*]+|[\\s\\*]+$", "");
// Fedora query does not accept "'" char and does not allow to escape special chars *, ?
value = value.replaceAll("['*]", "?");
if (!value.isEmpty() && !"*".equals(value)) {
value = "'*" + value + "*'";
if (builder.length() > 0) {
builder.append(' ');
}
builder.append(field).append('~').append(value);
}
return builder;
}
/**
* Removes superfluous chars a and optimizes phrase to match the most relevant records.
* <p/>For ITQL it trims leading and trailing whitespaces and asterisks
* and wraps the result with asterisks.
*/
static String normalizePhrase(String phrase) {
phrase = phrase == null ? "" : phrase;
phrase = phrase.replaceAll("^[\\s\\*]+|[\\s\\*]+$", "");
phrase = phrase.isEmpty() ? "*" : "*" + phrase + "*";
return phrase;
}
public List<Item> find(String... pids) throws FedoraClientException, IOException {
return find(Arrays.asList(pids), true);
}
public List<Item> find(boolean onlyActive, String... pids) throws FedoraClientException, IOException {
return find(Arrays.asList(pids), onlyActive);
}
/**
* Finds active descriptors of passed PIDs.
*
* @param pids PIDs of digital objects
* @return list of descriptors
* @throws FedoraClientException
* @throws IOException
*/
public List<Item> find(List<String> pids) throws FedoraClientException, IOException {
return find(pids, true);
}
/**
* Finds descriptors of passed PIDs.
*
* @param pids PIDs of digital objects
* @param onlyActive {@code true} includes only active objects
* @return list of descriptors
* @throws FedoraClientException
* @throws IOException
*/
public List<Item> find(List<String> pids, boolean onlyActive) throws FedoraClientException, IOException {
// issue 85: reasonable count of PIDs per query to prevent StackOverflowError.
// Greater query page sizes (>1000, <2000) are acceptable but Mulgara responses are really slow.
// It does not make sence to add paging to API as load on demand of SmartGWT Tree
// does not support paging and it is not expected to have monograph or
// issue page counts grater than 10000.
final int queryPageSize = 100;
final int size = pids.size();
ArrayList<Item> result = new ArrayList<Item>(size);
for (int startOffset = 0; startOffset < size; ) {
int endOffset = Math.min(size, startOffset + queryPageSize);
List<String> subList = pids.subList(startOffset, endOffset);
List<Item> members = findImpl(subList, onlyActive);
startOffset = endOffset;
result.addAll(members);
}
return result;
}
List<Item> findImpl(List<String> pids, boolean onlyActive) throws FedoraClientException, IOException {
if (pids.isEmpty()) {
return Collections.emptyList();
}
StringBuilder expr = new StringBuilder(256);
for (String pid : pids) {
if (expr.length() > 0) {
expr.append("\n or ");
}
expr.append(String.format(
"$pid <http://mulgara.org/mulgara#is> <info:fedora/%s>",
pid));
}
String query = QUERY_FIND_PIDS.replace("${pids.expression}", expr);
String onlyActiveExpr = onlyActive
? "and $pid <info:fedora/fedora-system:def/model#state>"
+ " <info:fedora/fedora-system:def/model#Active>"
: "";
query = query.replace("${includeActive}", onlyActiveExpr);
LOG.fine(query);
RiSearch search = buildSearch(query);
return consumeSearch(search.execute(fedora));
}
/**
* Finds children of the passed remote object. The result list is sorted
* using RELS-EXT stream.
*
* @param parent PID of parent to query
* @return the sorted list
* @throws FedoraClientException
* @throws IOException
*/
public List<Item> findSortedChildren(String parentPid)
throws FedoraClientException, IOException, DigitalObjectException {
RemoteObject parent = storage.find(parentPid);
List<String> memberPids = new RelationEditor(parent).getMembers();
List<Item> items = find(memberPids, true);
ArrayList<Item> sortedItems = new ArrayList<Item>(memberPids.size());
for (String memberPid : memberPids) {
for (Iterator<Item> it = items.iterator(); it.hasNext();) {
Item item = it.next();
if (memberPid.equals(item.getPid())) {
sortedItems.add(item);
it.remove();
break;
}
}
}
return sortedItems;
}
public List<Item> findChildren(String pid) throws FedoraClientException, IOException {
String query = QUERY_FIND_MEMBERS.replace("${parent}", RelationResource.fromPid(pid).getResource());
RiSearch search = buildSearch(query);
return consumeSearch(search.execute(fedora));
}
/**
* Traverses a graph of PID's members.
*
* @param pid PID to traverse
* @return list of all PID's members
*/
public List<Item> findChildrenHierarchy(String pid) throws FedoraClientException, IOException {
String query = QUERY_FIND_MEMBER_HIERARCHY.replace("${ROOT}", RelationResource.fromPid(pid).getResource());
RiSearch search = buildSearch(query);
return consumeSearch(search.execute(fedora));
}
public List<Item> findLastCreated(int offset, String model, String user) throws FedoraClientException, IOException {
return findLastCreated(offset, model, user, 100);
}
public List<Item> findLastCreated(int offset, String model, String user, int limit) throws FedoraClientException, IOException {
return findLast(offset, model, user, limit, "$created desc");
}
public List<Item> findLastModified(int offset, String model, String user, int limit) throws FedoraClientException, IOException {
return findLast(offset, model, user, limit, "$modified desc");
}
private List<Item> findLast(int offset, String model, String user, int limit, String orderBy) throws FedoraClientException, IOException {
String modelFilter = "";
String ownerFilter = "";
if (model != null && !model.isEmpty()) {
modelFilter = String.format("and $pid <info:fedora/fedora-system:def/model#hasModel> <info:fedora/%s>", model);
}
if (user != null) {
ownerFilter = String.format("and $pid <http://proarc.lib.cas.cz/relations#hasOwner> $group\n"
+ "and <info:fedora/%s> <info:fedora/fedora-system:def/relations-external#isMemberOf> $group", user);
}
String query = QUERY_LAST_CREATED.replace("${OFFSET}", String.valueOf(offset));
query = query.replace("${MODEL_FILTER}", modelFilter);
query = query.replace("${OWNER_FILTER}", ownerFilter);
query = query.replace("${ORDERBY}", orderBy);
LOG.fine(query);
RiSearch search = buildSearch(query);
if (limit > 0) {
limit = Math.min(limit, maxLimit);
search.limit(limit);
}
return consumeSearch(search.execute(fedora));
}
public List<Item> findReferrers(String pid) throws IOException, FedoraClientException {
String query = QUERY_FIND_REFERRERS.replace("${PID}", pid);
RiSearch search = buildSearch(query);
return consumeSearch(search.execute(fedora));
}
/**
* Find objects that have the given model.
* @param modelId model PID to query
* @return list of objects
* @throws IOException
* @throws FedoraClientException
*/
public List<Item> findByModel(String modelId) throws IOException, FedoraClientException {
String query = QUERY_FIND_BY_MODEL.replace("${metaModelPid}", modelId);
RiSearch search = buildSearch(query);
search.limit(1000);
return consumeSearch(search.execute(fedora));
}
/**
* Is the device referred with {@code hasDevice} relation by any digital object?
* @param deviceId device PID
* @return {@code true} if it is connected
* @throws IOException
* @throws FedoraClientException
*/
public boolean isDeviceInUse(String deviceId) throws IOException, FedoraClientException {
String query = QUERY_FIND_DEVICE_REFERRERS.replace("${devicePid}", deviceId);
RiSearch search = buildSearch(query);
search.limit(1);
search.stream(true);
List<Item> result = consumeSearch(search.execute(fedora));
return !result.isEmpty();
}
private List<Item> consumeSearch(RiSearchResponse response) throws IOException {
String json = response.getEntity(String.class);
Result result = readResponse(json);
return consumeSearch(result.results);
}
Result readResponse(String json) throws IOException {
if (mapper == null) {
// requires mapper without mix in annotation of Item
mapper = JsonUtils.createObjectMapper();
}
return mapper.readValue(json, Result.class);
}
private List<Item> consumeSearch(List<Item> items) {
for (Item item : items) {
replaceUriWithPid(item);
resolveObjectLabel(item);
}
return items;
}
private static String replaceUriWithPid(String uri) {
return uri == null ? uri : RelationResource.toPid(uri);
}
private static Item replaceUriWithPid(Item item) {
item.pid = replaceUriWithPid(item.pid);
item.model = replaceUriWithPid(item.model);
item.state = replaceUriWithPid(item.state);
return item;
}
void resolveObjectLabel(Item item) {
// XXX implement a plugin cache
MetaModel model = MetaModelRepository.getInstance().find(item.getModel());
if (model == null) {
// other than digital object model (device, ...)
return ;
}
HasSearchViewHandler hasHandler = model.getPlugin().getHandlerProvider(HasSearchViewHandler.class);
if (hasHandler != null) {
String label = hasHandler.createSearchViewHandler().getObjectLabel(item, locale);
item.setLabel(label);
}
}
private static RiSearch buildSearch(String query) {
RiSearch search = FedoraClient.riSearch(query).distinct(true)
.type("tuples").lang("itql")
.flush(true) // required to get reliable responses
// .format("json")
.xParam("format", "json");
return search;
}
private static String readQuery(String file) {
InputStream is = SearchView.class.getResourceAsStream(file);
if (is == null) {
throw new IllegalStateException(file + " not found!");
}
Scanner scanner = new Scanner(is, "UTF-8").useDelimiter("\\A");
String content = scanner.next();
scanner.close();
return content;
}
/**
* A plug-in capability.
*/
public interface HasSearchViewHandler extends HasDataHandler {
SearchViewHandler createSearchViewHandler();
}
/**
* Implement to customize a result label of a search.
*/
public interface SearchViewHandler {
String getObjectLabel(Item item, Locale locale);
}
public static class Item {
private String pid;
private String model;
private String owner;
private String label;
private String state;
private String created;
private String modified;
/** Parent PID. Optional for some queries */
private String parent;
/** batch import ID. Optional for some queries */
private Integer batchId;
/**
* Synthetic name of count query. count(hasExport)
* @see <a href='http://docs.mulgara.org/itqlcommands/select.html#o194'>
* Count Function</a>
*/
private String k0;
public Item() {
}
public Item(String pid) {
this.pid = pid;
}
public String getCreated() {
return created;
}
public void setCreated(String created) {
this.created = created;
}
public String getLabel() {
return label;
}
public void setLabel(String label) {
this.label = label;
}
public String getModel() {
return model;
}
public void setModel(String model) {
this.model = model;
}
public String getModified() {
return modified;
}
public void setModified(String modified) {
this.modified = modified;
}
public String getOwner() {
return owner;
}
public void setOwner(String owner) {
this.owner = owner;
}
public String getPid() {
return pid;
}
public void setPid(String pid) {
this.pid = pid;
}
public String getState() {
return state;
}
public void setState(String state) {
this.state = state;
}
public String getParentPid() {
return parent;
}
public void setParentPid(String parentPid) {
this.parent = parentPid;
}
public Integer getBatchId() {
return batchId;
}
public void setBatchId(Integer batchId) {
this.batchId = batchId;
}
public String getK0() {
return k0;
}
public void setK0(String k0) {
this.k0 = k0;
}
public Integer getHasExport() {
if (k0 != null && !k0.isEmpty()) {
try {
return Integer.parseInt(k0);
} catch (NumberFormatException ex) {
// ignore
}
}
return null;
}
}
static class Result {
private List<Item> results;
public List<Item> getResults() {
return results;
}
public void setResults(List<Item> results) {
this.results = results;
}
}
public static class Query {
private String title;
private String creator;
private String label;
private String identifier;
private String owner;
private String model;
private Collection<String> hasOwners;
public String getTitle() {
return title;
}
public Query setTitle(String title) {
this.title = title;
return this;
}
public String getCreator() {
return creator;
}
public Query setCreator(String creator) {
this.creator = creator;
return this;
}
public String getLabel() {
return label;
}
public Query setLabel(String label) {
this.label = label;
return this;
}
public String getIdentifier() {
return identifier;
}
public Query setIdentifier(String identifier) {
this.identifier = identifier;
return this;
}
public String getOwner() {
return owner;
}
public Query setOwner(String owner) {
this.owner = owner;
return this;
}
public String getModel() {
return model;
}
public Query setModel(String model) {
this.model = model;
return this;
}
public Collection<String> getHasOwners() {
return hasOwners != null ? hasOwners : Collections.<String>emptyList();
}
public Query setHasOwners(Collection<String> hasOwners) {
this.hasOwners = hasOwners;
return this;
}
}
}