/** * The contents of this file are subject to the license and copyright * detailed in the LICENSE and NOTICE files at the root of the source * tree and available online at * * http://www.dspace.org/license/ */ package org.dspace.search; import org.apache.log4j.Logger; import org.dspace.authorize.factory.AuthorizeServiceFactory; import org.dspace.authorize.service.AuthorizeService; import org.dspace.content.*; import org.dspace.content.Collection; import org.dspace.content.factory.ContentServiceFactory; import org.dspace.content.service.ItemService; import org.dspace.core.Constants; import org.dspace.core.Context; import org.dspace.discovery.DiscoverQuery; import org.dspace.discovery.DiscoverResult; import org.dspace.discovery.SearchServiceException; import org.dspace.discovery.SearchUtils; import org.dspace.eperson.Group; import org.dspace.handle.factory.HandleServiceFactory; import org.dspace.handle.service.HandleService; import java.sql.SQLException; import java.text.ParseException; import java.util.*; /** * Utility class for extracting information about items, possibly just within a * certain community or collection, that have been created, modified or * withdrawn within a particular range of dates. * * @author Robert Tansley * @version $Revision$ */ public class Harvest { /** log4j logger */ private static final Logger log = Logger.getLogger(Harvest.class); protected static final AuthorizeService authorizeService = AuthorizeServiceFactory.getInstance().getAuthorizeService(); protected static final HandleService handleService = HandleServiceFactory.getInstance().getHandleService(); protected static final ItemService itemService = ContentServiceFactory.getInstance().getItemService(); /** * Obtain information about items that have been created, modified or * withdrawn within a given date range. You can also specify 'offset' and * 'limit' so that a big harvest can be split up into smaller sections. * <P> * Note that dates are passed in the standard ISO8601 format used by DSpace * (and OAI-PMH). * <P> * FIXME: Assumes all in_archive items have public metadata * * @param context * DSpace context * @param scope * a Collection, Community, or <code>null</code> indicating the scope is * all of DSpace * @param startDate * start of date range, or <code>null</code> * @param endDate * end of date range, or <code>null</code> * @param offset * for a partial harvest, the point in the overall list of * matching items to start at. 0 means just start at the * beginning. * @param limit * the number of matching items to return in a partial harvest. * Specify 0 to return the whole list (or the rest of the list if * an offset was specified.) * @param items * if <code>true</code> the <code>item</code> field of each * <code>HarvestedItemInfo</code> object is filled out * @param collections * if <code>true</code> the <code>collectionHandles</code> * field of each <code>HarvestedItemInfo</code> object is * filled out * @param withdrawn * If <code>true</code>, information about withdrawn items is * included * @param nonAnon * If items without anonymous access should be included or not * @return List of <code>HarvestedItemInfo</code> objects * @throws SQLException if database error * @throws java.text.ParseException If the date is not in a supported format */ public static List<HarvestedItemInfo> harvest(Context context, DSpaceObject scope, String startDate, String endDate, int offset, int limit, boolean items, boolean collections, boolean withdrawn, boolean nonAnon) throws SQLException, ParseException { DiscoverQuery discoverQuery = new DiscoverQuery(); discoverQuery.addFilterQueries("search.resourcetype:" + Constants.ITEM); if (scope != null) { discoverQuery.addFieldPresentQueries("location:" + scope.getID()); } if (startDate != null) { discoverQuery.addFilterQueries("lastModified => " + new DCDate(startDate).toString()); } if (endDate != null) { discoverQuery.addFilterQueries("lastModified <= " + new DCDate(startDate).toString()); } if (!withdrawn) { discoverQuery.addFilterQueries("archived: true OR withdrawn: false"); }else{ discoverQuery.addFilterQueries("archived: true OR withdrawn: true"); } // Order by item ID, so that for a given harvest the order will be // consistent. This is so that big harvests can be broken up into // several smaller operations (e.g. for OAI resumption tokens.) discoverQuery.setSortField("search.resourceid", DiscoverQuery.SORT_ORDER.asc); List<HarvestedItemInfo> infoObjects = new LinkedList<HarvestedItemInfo>(); // Count of items read from the record set that match the selection criteria. // Note : Until 'index > offset' the records are not added to the output set. int index = 0; // Count of items added to the output set. int itemCounter = 0; try { DiscoverResult discoverResult = SearchUtils.getSearchService().search(context, discoverQuery); // Process results of query into HarvestedItemInfo objects Iterator<DSpaceObject> dsoIterator = discoverResult.getDspaceObjects().iterator(); while (dsoIterator.hasNext() && ((limit == 0) || (itemCounter < limit))) { DSpaceObject dso = dsoIterator.next(); HarvestedItemInfo itemInfo = new HarvestedItemInfo(); itemInfo.context = context; itemInfo.handle = dso.getHandle(); itemInfo.itemID = dso.getID(); itemInfo.datestamp = ((Item) dso).getLastModified(); itemInfo.withdrawn = ((Item) dso).isWithdrawn(); if (collections) { // Add collections data fillCollections(context, itemInfo); } if (items) { // Add the item reference itemInfo.item = itemService.find(context, itemInfo.itemID); } if ((nonAnon) || (itemInfo.item == null) || (withdrawn && itemInfo.withdrawn)) { index++; if (index > offset) { infoObjects.add(itemInfo); itemCounter++; } } else { // We only want items that allow for anonymous access. if (anonAccessAllowed(context, itemInfo)) { index++; if (index > offset) { infoObjects.add(itemInfo); itemCounter++; } } } } } catch (SearchServiceException e) { log.error(e.getMessage(), e); } return infoObjects; } /** * Get harvested item info for a single item. <code>item</code> field in * returned <code>HarvestedItemInfo</code> object is always filled out. * * @param context * DSpace context * @param handle * Prefix-less Handle of item * @param collections * if <code>true</code> the <code>collectionHandles</code> * field of the <code>HarvestedItemInfo</code> object is filled * out * * @return <code>HarvestedItemInfo</code> object for the single item, or * <code>null</code> * @throws SQLException if database error */ public static HarvestedItemInfo getSingle(Context context, String handle, boolean collections) throws SQLException { // FIXME: Assume Handle is item Item i = (Item) handleService.resolveToObject(context, handle); if (i == null) { return null; } // Fill out OAI info item object HarvestedItemInfo itemInfo = new HarvestedItemInfo(); itemInfo.context = context; itemInfo.item = i; itemInfo.handle = handle; itemInfo.withdrawn = i.isWithdrawn(); itemInfo.datestamp = i.getLastModified(); itemInfo.itemID = i.getID(); // Get the sets if (collections) { fillCollections(context, itemInfo); } return itemInfo; } /** * Fill out the containers field of the HarvestedItemInfo object * * @param context * DSpace context * @param itemInfo * HarvestedItemInfo object to fill out * @throws SQLException if database error */ private static void fillCollections(Context context, HarvestedItemInfo itemInfo) throws SQLException { // Get the collection Handles from DB List<Collection> collections = itemInfo.item.getCollections(); itemInfo.collectionHandles = new ArrayList<>(); for (Collection collection : collections) { itemInfo.collectionHandles.add(collection.getHandle()); } } /** * Does the item allow anonymous access ? ie. authorizedGroups must include id=0. */ private static boolean anonAccessAllowed(Context context, HarvestedItemInfo itemInfo) throws SQLException { List<Group> authorizedGroups = authorizeService.getAuthorizedGroups(context, itemInfo.item, Constants.READ); for (Group authorizedGroup : authorizedGroups) { if (authorizedGroup.getName().equals(Group.ANONYMOUS)) { return true; } } return false; } }