/** * The contents of this file are subject to the license and copyright * detailed in the LICENSE and NOTICE files at the root of the source * tree and available online at * * http://www.dspace.org/license/ */ package org.dspace.discovery; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.methods.GetMethod; import org.apache.commons.io.IOUtils; import org.apache.commons.lang.time.DateFormatUtils; import org.apache.log4j.Logger; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer; import org.apache.solr.client.solrj.response.FacetField; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.util.ClientUtils; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.FacetParams; import org.dspace.content.*; import org.dspace.content.Collection; import org.dspace.core.*; import org.dspace.discovery.configuration.*; import org.dspace.handle.HandleManager; import org.dspace.utils.DSpace; import org.springframework.stereotype.Service; import java.io.*; import java.net.URLEncoder; import java.sql.SQLException; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.*; /** * SolrIndexer contains the methods that index Items and their metadata, * collections, communities, etc. It is meant to either be invoked from the * command line (see dspace/bin/index-all) or via the indexContent() methods * within DSpace. * <p/> * The Administrator can choose to run SolrIndexer in a cron that repeats * regularly, a failed attempt to index from the UI will be "caught" up on in * that cron. * * The SolrServiceImple is registered as a Service in the ServiceManager via * A spring configuration file located under * classpath://spring/spring-dspace-applicationContext.xml * * Its configuration is Autowired by the ApplicationContext * * @author Kevin Van de Velde (kevin at atmire dot com) * @author Mark Diggory (markd at atmire dot com) * @author Ben Bosman (ben at atmire dot com) */ @Service public class SolrServiceImpl implements SearchService, IndexingService { private static final Logger log = Logger.getLogger(SolrServiceImpl.class); private static final String LAST_INDEXED_FIELD = "SolrIndexer.lastIndexed"; public static final String FILTER_SEPARATOR = "|||"; /** * Non-Static CommonsHttpSolrServer for processing indexing events. */ private CommonsHttpSolrServer solr = null; /** * Non-Static Singelton instance of Configuration Service */ // private ConfigurationService configurationService; // @Autowired // @Required // public void setConfigurationService(ConfigurationService configurationService) { // this.configurationService = configurationService; // } protected CommonsHttpSolrServer getSolr() throws java.net.MalformedURLException, org.apache.solr.client.solrj.SolrServerException { if ( solr == null) { String solrService = new DSpace().getConfigurationService().getProperty("discovery.search.server") ; log.debug("Solr URL: " + solrService); solr = new CommonsHttpSolrServer(solrService); solr.setBaseURL(solrService); SolrQuery solrQuery = new SolrQuery() .setQuery("search.resourcetype:2 AND search.resourceid:1"); solr.query(solrQuery); } return solr; } /** * If the handle for the "dso" already exists in the index, and the "dso" * has a lastModified timestamp that is newer than the document in the index * then it is updated, otherwise a new document is added. * * @param context Users Context * @param dso DSpace Object (Item, Collection or Community * @throws SQLException * @throws IOException */ public void indexContent(Context context, DSpaceObject dso) throws SQLException { indexContent(context, dso, false); } /** * If the handle for the "dso" already exists in the index, and the "dso" * has a lastModified timestamp that is newer than the document in the index * then it is updated, otherwise a new document is added. * * @param context Users Context * @param dso DSpace Object (Item, Collection or Community * @param force Force update even if not stale. * @throws SQLException * @throws IOException */ public void indexContent(Context context, DSpaceObject dso, boolean force) throws SQLException { String handle = dso.getHandle(); if (handle == null) { handle = HandleManager.findHandle(context, dso); } try { switch (dso.getType()) { case Constants.ITEM: Item item = (Item) dso; if (item.isArchived() && !item.isWithdrawn()) { /** * If the item is in the repository now, add it to the index */ if (requiresIndexing(handle, ((Item) dso).getLastModified()) || force) { unIndexContent(context, handle); buildDocument(context, (Item) dso); } } else { /** * Make sure the item is not in the index if it is not in * archive. TODO: Someday DSIndexer should block withdrawn * content on search/retrieval and allow admins the ability * to still search for withdrawn Items. */ unIndexContent(context, handle); log.info("Removed Item: " + handle + " from Index"); } break; case Constants.COLLECTION: buildDocument((Collection) dso); log.info("Wrote Collection: " + handle + " to Index"); break; case Constants.COMMUNITY: buildDocument((Community) dso); log.info("Wrote Community: " + handle + " to Index"); break; default: log .error("Only Items, Collections and Communities can be Indexed"); } } catch (Exception e) { log.error(e.getMessage(), e); } } /** * unIndex removes an Item, Collection, or Community only works if the * DSpaceObject has a handle (uses the handle for its unique ID) * * @param context * @param dso DSpace Object, can be Community, Item, or Collection * @throws SQLException * @throws IOException */ public void unIndexContent(Context context, DSpaceObject dso) throws SQLException, IOException { try { unIndexContent(context, dso.getHandle()); } catch (Exception exception) { log.error(exception.getMessage(), exception); emailException(exception); } } /** * Unindex a Document in the Lucene index. * @param context the dspace context * @param handle the handle of the object to be deleted * @throws IOException * @throws SQLException */ public void unIndexContent(Context context, String handle) throws IOException, SQLException { unIndexContent(context, handle, false); } /** * Unindex a Document in the Lucene Index. * @param context the dspace context * @param handle the handle of the object to be deleted * @throws SQLException * @throws IOException */ public void unIndexContent(Context context, String handle, boolean commit) throws SQLException, IOException { try { getSolr().deleteById(handle); if(commit) { getSolr().commit(); } } catch (SolrServerException e) { log.error(e.getMessage(), e); } } /** * reIndexContent removes something from the index, then re-indexes it * * @param context context object * @param dso object to re-index */ public void reIndexContent(Context context, DSpaceObject dso) throws SQLException, IOException { try { indexContent(context, dso); } catch (Exception exception) { log.error(exception.getMessage(), exception); emailException(exception); } } /** * create full index - wiping old index * * @param c context to use */ public void createIndex(Context c) throws SQLException, IOException { /* Reindex all content preemptively. */ updateIndex(c, true); } /** * Iterates over all Items, Collections and Communities. And updates them in * the index. Uses decaching to control memory footprint. Uses indexContent * and isStale to check state of item in index. * * @param context the dspace context */ public void updateIndex(Context context) { updateIndex(context, false); } /** * Iterates over all Items, Collections and Communities. And updates them in * the index. Uses decaching to control memory footprint. Uses indexContent * and isStale to check state of item in index. * <p/> * At first it may appear counterintuitive to have an IndexWriter/Reader * opened and closed on each DSO. But this allows the UI processes to step * in and attain a lock and write to the index even if other processes/jvms * are running a reindex. * * @param context the dspace context * @param force whether or not to force the reindexing */ public void updateIndex(Context context, boolean force) { try { ItemIterator items = null; try { for (items = Item.findAll(context); items.hasNext();) { Item item = items.next(); indexContent(context, item, force); item.decache(); } } finally { if (items != null) { items.close(); } } Collection[] collections = Collection.findAll(context); for (Collection collection : collections) { indexContent(context, collection, force); context.removeCached(collection, collection.getID()); } Community[] communities = Community.findAll(context); for (Community community : communities) { indexContent(context, community, force); context.removeCached(community, community.getID()); } getSolr().commit(); } catch (Exception e) { log.error(e.getMessage(), e); } } /** * Iterates over all documents in the Lucene index and verifies they are in * database, if not, they are removed. * * @param force whether or not to force a clean index * @throws IOException IO exception * @throws SQLException sql exception * @throws SearchServiceException occurs when something went wrong with querying the solr server */ public void cleanIndex(boolean force) throws IOException, SQLException, SearchServiceException { Context context = new Context(); context.turnOffAuthorisationSystem(); try { if (force) { getSolr().deleteByQuery("*:*"); } else { SolrQuery query = new SolrQuery(); query.setQuery("*:*"); QueryResponse rsp = getSolr().query(query); SolrDocumentList docs = rsp.getResults(); Iterator iter = docs.iterator(); while (iter.hasNext()) { SolrDocument doc = (SolrDocument) iter.next(); String handle = (String) doc.getFieldValue("handle"); DSpaceObject o = HandleManager.resolveToObject(context, handle); if (o == null) { log.info("Deleting: " + handle); /* * Use IndexWriter to delete, its easier to manage * write.lock */ unIndexContent(context, handle); } else { context.removeCached(o, o.getID()); log.debug("Keeping: " + handle); } } } } catch(Exception e){ throw new SearchServiceException(e.getMessage(), e); } finally { context.abort(); } } /** * Maintenance to keep a SOLR index efficient. * Note: This might take a long time. */ public void optimize() { try { long start = System.currentTimeMillis(); System.out.println("SOLR Search Optimize -- Process Started:"+start); getSolr().optimize(); long finish = System.currentTimeMillis(); System.out.println("SOLR Search Optimize -- Process Finished:"+finish); System.out.println("SOLR Search Optimize -- Total time taken:"+(finish-start) + " (ms)."); } catch (SolrServerException sse) { System.err.println(sse.getMessage()); } catch (IOException ioe) { System.err.println(ioe.getMessage()); } } // ////////////////////////////////// // Private // ////////////////////////////////// private void emailException(Exception exception) { // Also email an alert, system admin may need to check for stale lock try { String recipient = ConfigurationManager .getProperty("alert.recipient"); if (recipient != null) { Email email = ConfigurationManager .getEmail(I18nUtil.getEmailFilename( Locale.getDefault(), "internal_error")); email.addRecipient(recipient); email.addArgument(ConfigurationManager .getProperty("dspace.url")); email.addArgument(new Date()); String stackTrace; if (exception != null) { StringWriter sw = new StringWriter(); PrintWriter pw = new PrintWriter(sw); exception.printStackTrace(pw); pw.flush(); stackTrace = sw.toString(); } else { stackTrace = "No exception"; } email.addArgument(stackTrace); email.send(); } } catch (Exception e) { // Not much we can do here! log.warn("Unable to send email alert", e); } } /** * Is stale checks the lastModified time stamp in the database and the index * to determine if the index is stale. * * @param handle the handle of the dso * @param lastModified the last modified date of the DSpace object * @return a boolean indicating if the dso should be re indexed again * @throws SQLException sql exception * @throws IOException io exception * @throws SearchServiceException if something went wrong with querying the solr server */ private boolean requiresIndexing(String handle, Date lastModified) throws SQLException, IOException, SearchServiceException { boolean reindexItem = false; boolean inIndex = false; SolrQuery query = new SolrQuery(); query.setQuery("handle:" + handle); QueryResponse rsp; try { rsp = getSolr().query(query); } catch (SolrServerException e) { throw new SearchServiceException(e.getMessage(),e); } for (SolrDocument doc : rsp.getResults()) { inIndex = true; Object value = doc.getFieldValue(LAST_INDEXED_FIELD); if(value instanceof Date) { Date lastIndexed = (Date) value; if (lastIndexed.before(lastModified)) { reindexItem = true; } } } return reindexItem || !inIndex; } /** * @param myitem the item for which our locations are to be retrieved * @return a list containing the identifiers of the communities & collections * @throws SQLException sql exception */ private List<String> getItemLocations(Item myitem) throws SQLException { List<String> locations = new Vector<String>(); // build list of community ids Community[] communities = myitem.getCommunities(); // build list of collection ids Collection[] collections = myitem.getCollections(); // now put those into strings int i = 0; for (i = 0; i < communities.length; i++) { locations.add("m" + communities[i].getID()); } for (i = 0; i < collections.length; i++) { locations.add("l" + collections[i].getID()); } return locations; } private List<String> getCollectionLocations(Collection target) throws SQLException { List<String> locations = new Vector<String>(); // build list of community ids Community[] communities = target.getCommunities(); // now put those into strings for (Community community : communities) { locations.add("m" + community.getID()); } return locations; } /** * Write the document to the index under the appropriate handle. * @param doc the solr document to be written to the server * @throws IOException IO exception */ private void writeDocument(SolrInputDocument doc) throws IOException { try { getSolr().add(doc); } catch (SolrServerException e) { log.error(e.getMessage(), e); } } /** * Build a Lucene document for a DSpace Community. * * @param community Community to be indexed * @throws SQLException * @throws IOException */ private void buildDocument(Community community) throws SQLException, IOException { // Create Document SolrInputDocument doc = buildDocument(Constants.COMMUNITY, community.getID(), community.getHandle(), null); // and populate it String name = community.getMetadata("name"); if (name != null) { doc.addField("name", name); } //Do any additional indexing, depends on the plugins List<SolrServiceIndexPlugin> solrServiceIndexPlugins = new DSpace().getServiceManager().getServicesByType(SolrServiceIndexPlugin.class); for (SolrServiceIndexPlugin solrServiceIndexPlugin : solrServiceIndexPlugins) { solrServiceIndexPlugin.additionalIndex(community, doc); } writeDocument(doc); } /** * Build a Lucene document for a DSpace Collection. * * @param collection Collection to be indexed * @throws SQLException sql exception * @throws IOException IO exception */ private void buildDocument(Collection collection) throws SQLException, IOException { List<String> locations = getCollectionLocations(collection); // Create Lucene Document SolrInputDocument doc = buildDocument(Constants.COLLECTION, collection.getID(), collection.getHandle(), locations); // and populate it String name = collection.getMetadata("name"); if (name != null) { doc.addField("name", name); } //Do any additional indexing, depends on the plugins List<SolrServiceIndexPlugin> solrServiceIndexPlugins = new DSpace().getServiceManager().getServicesByType(SolrServiceIndexPlugin.class); for (SolrServiceIndexPlugin solrServiceIndexPlugin : solrServiceIndexPlugins) { solrServiceIndexPlugin.additionalIndex(collection, doc); } writeDocument(doc); } /** * Build a Lucene document for a DSpace Item and write the index * * @param context Users Context * @param item The DSpace Item to be indexed * @throws SQLException * @throws IOException */ private void buildDocument(Context context, Item item) throws SQLException, IOException { String handle = item.getHandle(); if (handle == null) { handle = HandleManager.findHandle(context, item); } // get the location string (for searching by collection & community) List<String> locations = getItemLocations(item); SolrInputDocument doc = buildDocument(Constants.ITEM, item.getID(), handle, locations); log.debug("Building Item: " + handle); //Keep a list of our sort values which we added, sort values can only be added once List<String> sortFieldsAdded = new ArrayList<String>(); try { List<DiscoveryConfiguration> discoveryConfigurations = SearchUtils.getAllDiscoveryConfigurations(item); //A map used to save each sidebarFacet config by the metadata fields Map<String, List<SidebarFacetConfiguration>> sidebarFacets = new HashMap<String, List<SidebarFacetConfiguration>>(); Map<String, List<DiscoverySearchFilter>> searchFilters = new HashMap<String, List<DiscoverySearchFilter>>(); Map<String, DiscoverySortFieldConfiguration> sortFields = new HashMap<String, DiscoverySortFieldConfiguration>(); Map<String, DiscoveryRecentSubmissionsConfiguration> recentSubmissionsConfigurationMap = new HashMap<String, DiscoveryRecentSubmissionsConfiguration>(); for (DiscoveryConfiguration discoveryConfiguration : discoveryConfigurations) { //Sidebar facet mapping configuration read in for(SidebarFacetConfiguration facet : discoveryConfiguration.getSidebarFacets()){ for (int i = 0; i < facet.getMetadataFields().size(); i++) { String metadataField = facet.getMetadataFields().get(i); List<SidebarFacetConfiguration> resultingList; if(sidebarFacets.get(metadataField) != null){ resultingList = sidebarFacets.get(metadataField); }else{ //New metadata field, create a new list for it resultingList = new ArrayList<SidebarFacetConfiguration>(); } resultingList.add(facet); sidebarFacets.put(metadataField, resultingList); } } for (int i = 0; i < discoveryConfiguration.getSearchFilters().size(); i++) { DiscoverySearchFilter discoverySearchFilter = discoveryConfiguration.getSearchFilters().get(i); for (int j = 0; j < discoverySearchFilter.getMetadataFields().size(); j++) { String metadataField = discoverySearchFilter.getMetadataFields().get(j); List<DiscoverySearchFilter> resultingList; if(searchFilters.get(metadataField) != null){ resultingList = searchFilters.get(metadataField); }else{ //New metadata field, create a new list for it resultingList = new ArrayList<DiscoverySearchFilter>(); } resultingList.add(discoverySearchFilter); searchFilters.put(metadataField, resultingList); } } DiscoverySortConfiguration sortConfiguration = discoveryConfiguration.getSearchSortConfiguration(); if(sortConfiguration != null){ for (DiscoverySortFieldConfiguration discoverySortConfiguration : sortConfiguration.getSortFields()) { sortFields.put(discoverySortConfiguration.getMetadataField(), discoverySortConfiguration); } } DiscoveryRecentSubmissionsConfiguration recentSubmissionConfiguration = discoveryConfiguration.getRecentSubmissionConfiguration(); if(recentSubmissionConfiguration != null){ recentSubmissionsConfigurationMap.put(recentSubmissionConfiguration.getMetadataSortField(), recentSubmissionConfiguration); } } List<String> toIgnoreFields = new ArrayList<String>(); String ignoreFieldsString = new DSpace().getConfigurationService().getProperty("discovery.index.ignore"); if(ignoreFieldsString != null){ if(ignoreFieldsString.indexOf(",") != -1){ for (int i = 0; i < ignoreFieldsString.split(",").length; i++) { toIgnoreFields.add(ignoreFieldsString.split(",")[i].trim()); } } else { toIgnoreFields.add(ignoreFieldsString); } } DCValue[] mydc = item.getMetadata(Item.ANY, Item.ANY, Item.ANY, Item.ANY); for (DCValue meta : mydc){ String field = meta.schema + "." + meta.element; String unqualifiedField = field; String value = meta.value; if (value == null) { continue; } if (meta.qualifier != null && !meta.qualifier.trim().equals("")) { field += "." + meta.qualifier; } //We are not indexing provenance, this is useless if (toIgnoreFields.contains(field) || toIgnoreFields.contains(unqualifiedField + "." + Item.ANY)) { continue; } if ((searchFilters.get(field) != null || searchFilters.get(unqualifiedField + "." + Item.ANY) != null)) { List<DiscoverySearchFilter> searchFilterConfigs = searchFilters.get(field); if(searchFilterConfigs == null){ searchFilterConfigs = searchFilters.get(unqualifiedField + "." + Item.ANY); } for (DiscoverySearchFilter searchFilter : searchFilterConfigs) { if(searchFilter.getType().equals(DiscoveryConfigurationParameters.TYPE_DATE)){ //For our search filters that are dates we format them properly Date date = toDate(value); if(date != null){ //TODO: make this date format configurable ! value = DateFormatUtils.formatUTC(date, "yyyy-MM-dd"); } } doc.addField(searchFilter.getIndexFieldName(), value); //Add a dynamic fields for auto complete in search if(searchFilter.isFullAutoComplete()){ doc.addField(searchFilter.getIndexFieldName() + "_ac", value); }else{ String[] values = value.split(" "); for (String val : values) { doc.addField(searchFilter.getIndexFieldName() + "_ac", val); } } } } if (sidebarFacets.get(field) != null || sidebarFacets.get(unqualifiedField + "." + Item.ANY) != null) { //Retrieve the configurations List<SidebarFacetConfiguration> facetConfigurations = sidebarFacets.get(field); if(facetConfigurations == null){ facetConfigurations = sidebarFacets.get(unqualifiedField + "." + Item.ANY); } for (SidebarFacetConfiguration configuration : facetConfigurations) { if(configuration.getType().equals(DiscoveryConfigurationParameters.TYPE_TEXT)){ //Add a special filter //We use a separator to split up the lowercase and regular case, this is needed to get our filters in regular case //Solr has issues with facet prefix and cases String separator = new DSpace().getConfigurationService().getProperty("discovery.solr.facets.split.char"); if(separator == null){ separator = FILTER_SEPARATOR; } doc.addField(configuration.getIndexFieldName() + "_filter", value.toLowerCase() + separator + value); }else if(configuration.getType().equals(DiscoveryConfigurationParameters.TYPE_DATE)){ //For our sidebar filters that are dates we only add the year Date date = toDate(value); if(date != null){ String indexField = configuration.getIndexFieldName() + ".year"; doc.addField(indexField, DateFormatUtils.formatUTC(date, "yyyy")); //Also save a sort value of this year, this is required for determining the upper & lower bound year of our facet if(doc.getField(indexField + "_sort") == null){ //We can only add one year so take the first one doc.addField(indexField + "_sort", DateFormatUtils.formatUTC(date, "yyyy")); } } else { log.warn("Error while indexing sidebar date field, item: " + item.getHandle() + " metadata field: " + field + " date value: " + date); } } } } if ((sortFields.get(field) != null || recentSubmissionsConfigurationMap.get(field) != null) && !sortFieldsAdded.contains(field)) { //Only add sort value once String type; if(sortFields.get(field) != null){ type = sortFields.get(field).getType(); }else{ type = recentSubmissionsConfigurationMap.get(field).getType(); } if(type.equals(DiscoveryConfigurationParameters.TYPE_DATE)){ Date date = toDate(value); if(date != null){ doc.addField(field + "_dt", date); }else{ log.warn("Error while indexing sort date field, item: " + item.getHandle() + " metadata field: " + field + " date value: " + date); } }else{ doc.addField(field + "_sort", value); } sortFieldsAdded.add(field); } doc.addField(field, value.toLowerCase()); if (meta.language != null && !meta.language.trim().equals("")) { String langField = field + "." + meta.language; doc.addField(langField, value); } } } catch (Exception e) { log.error(e.getMessage(), e); } log.debug(" Added Metadata"); try { DCValue[] values = item.getMetadata("dc.relation.ispartof"); if(values != null && values.length > 0 && values[0] != null && values[0].value != null) { // group on parent String handlePrefix = ConfigurationManager.getProperty("handle.canonical.prefix"); if (handlePrefix == null || handlePrefix.length() == 0) { handlePrefix = "http://hdl.handle.net/"; } doc.addField("publication_grp",values[0].value.replaceFirst(handlePrefix,"") ); } else { // group on self doc.addField("publication_grp", item.getHandle()); } } catch (Exception e){ log.error(e.getMessage(),e); } log.debug(" Added Grouping"); Vector<InputStreamReader> readers = new Vector<InputStreamReader>(); try { // now get full text of any bitstreams in the TEXT bundle // trundle through the bundles Bundle[] myBundles = item.getBundles(); for (Bundle myBundle : myBundles) { if ((myBundle.getName() != null) && myBundle.getName().equals("TEXT")) { // a-ha! grab the text out of the bitstreams Bitstream[] myBitstreams = myBundle.getBitstreams(); for (Bitstream myBitstream : myBitstreams) { try { InputStreamReader is = new InputStreamReader( myBitstream.retrieve()); // get input readers.add(is); // Add each InputStream to the Indexed Document doc.addField("fulltext", IOUtils.toString(is)); log.debug(" Added BitStream: " + myBitstream.getStoreNumber() + " " + myBitstream.getSequenceID() + " " + myBitstream.getName()); } catch (Exception e) { // this will never happen, but compiler is now // happy. log.trace(e.getMessage(), e); } } } } } catch (RuntimeException e) { log.error(e.getMessage(), e); } //Do any additional indexing, depends on the plugins List<SolrServiceIndexPlugin> solrServiceIndexPlugins = new DSpace().getServiceManager().getServicesByType(SolrServiceIndexPlugin.class); for (SolrServiceIndexPlugin solrServiceIndexPlugin : solrServiceIndexPlugins) { solrServiceIndexPlugin.additionalIndex(item, doc); } // write the index and close the inputstreamreaders try { writeDocument(doc); log.info("Wrote Item: " + handle + " to Index"); } catch (RuntimeException e) { log.error("Error while writing item to discovery index: " + handle + " message:"+ e.getMessage(), e); } finally { Iterator<InputStreamReader> itr = readers.iterator(); while (itr.hasNext()) { InputStreamReader reader = itr.next(); if (reader != null) { reader.close(); } } log.debug("closed " + readers.size() + " readers"); } } /** * Create Lucene document with all the shared fields initialized. * * @param type Type of DSpace Object * @param id * @param handle * @param locations @return */ private SolrInputDocument buildDocument(int type, int id, String handle, List<String> locations) { SolrInputDocument doc = new SolrInputDocument(); // want to be able to check when last updated // (not tokenized, but it is indexed) doc.addField(LAST_INDEXED_FIELD, new Date()); // New fields to weaken the dependence on handles, and allow for faster // list display doc.addField("search.resourcetype", Integer.toString(type)); doc.addField("search.resourceid", Integer.toString(id)); // want to be able to search for handle, so use keyword // (not tokenized, but it is indexed) if (handle != null) { // want to be able to search for handle, so use keyword // (not tokenized, but it is indexed) doc.addField("handle", handle); } if (locations != null) { for (String location : locations) { doc.addField("location", location); if (location.startsWith("m")) { doc.addField("location.comm", location.substring(1)); } else { doc.addField("location.coll", location.substring(1)); } } } return doc; } /** * Helper function to retrieve a date using a best guess of the potential * date encodings on a field * * @param t the string to be transformed to a date * @return a date if the formatting was successful, null if not able to transform to a date */ public static Date toDate(String t) { SimpleDateFormat[] dfArr; // Choose the likely date formats based on string length switch (t.length()) { case 4: dfArr = new SimpleDateFormat[]{new SimpleDateFormat("yyyy")}; break; case 6: dfArr = new SimpleDateFormat[]{new SimpleDateFormat("yyyyMM")}; break; case 7: dfArr = new SimpleDateFormat[]{new SimpleDateFormat("yyyy-MM")}; break; case 8: dfArr = new SimpleDateFormat[]{new SimpleDateFormat("yyyyMMdd"), new SimpleDateFormat("yyyy MMM")}; break; case 10: dfArr = new SimpleDateFormat[]{new SimpleDateFormat("yyyy-MM-dd")}; break; case 11: dfArr = new SimpleDateFormat[]{new SimpleDateFormat("yyyy MMM dd")}; break; case 20: dfArr = new SimpleDateFormat[]{new SimpleDateFormat( "yyyy-MM-dd'T'HH:mm:ss'Z'")}; break; default: dfArr = new SimpleDateFormat[]{new SimpleDateFormat( "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'")}; break; } for (SimpleDateFormat df : dfArr) { try { // Parse the date df.setCalendar(Calendar .getInstance(TimeZone.getTimeZone("UTC"))); df.setLenient(false); return df.parse(t); } catch (ParseException pe) { log.error("Unable to parse date format", pe); } } return null; } public static String locationToName(Context context, String field, String value) throws SQLException { if("location.comm".equals(field) || "location.coll".equals(field)){ int type = field.equals("location.comm") ? Constants.COMMUNITY : Constants.COLLECTION; DSpaceObject commColl = DSpaceObject.find(context, type, Integer.parseInt(value)); if(commColl != null) { return commColl.getName(); } } return value; } //******** SearchService implementation public DiscoverResult search(Context context, DSpaceObject dso, DiscoverQuery discoveryQuery) throws SearchServiceException { if(dso != null){ if (dso instanceof Community) { discoveryQuery.addFilterQueries("location:m" + dso.getID()); } else if (dso instanceof Collection) { discoveryQuery.addFilterQueries("location:l" + dso.getID()); } else if (dso instanceof Item){ discoveryQuery.addFilterQueries("handle:" + dso.getHandle()); } } return search(context, discoveryQuery); } public DiscoverResult search(Context context, DiscoverQuery discoveryQuery) throws SearchServiceException { try { SolrQuery solrQuery = new SolrQuery(); String query = "*:*"; if(discoveryQuery.getQuery() != null){ query = discoveryQuery.getQuery(); } solrQuery.setQuery(query); for (int i = 0; i < discoveryQuery.getFilterQueries().size(); i++) { String filterQuery = discoveryQuery.getFilterQueries().get(i); solrQuery.addFilterQuery(filterQuery); } if(discoveryQuery.getDSpaceObjectFilter() != -1){ solrQuery.addFilterQuery("search.resourcetype:" + discoveryQuery.getDSpaceObjectFilter()); } for (int i = 0; i < discoveryQuery.getFieldPresentQueries().size(); i++) { String filterQuery = discoveryQuery.getFieldPresentQueries().get(i); solrQuery.addFilterQuery(filterQuery + ":[* TO *]"); } if(discoveryQuery.getStart() != -1){ solrQuery.setStart(discoveryQuery.getStart()); } if(discoveryQuery.getMaxResults() != -1){ solrQuery.setRows(discoveryQuery.getMaxResults()); } if(discoveryQuery.getSortField() != null){ SolrQuery.ORDER order = SolrQuery.ORDER.asc; if(discoveryQuery.getSortOrder().equals(DiscoverQuery.SORT_ORDER.desc)) order = SolrQuery.ORDER.desc; solrQuery.addSortField(discoveryQuery.getSortField(), order); } for(String property : discoveryQuery.getProperties().keySet()){ List<String> values = discoveryQuery.getProperties().get(property); solrQuery.add(property, values.toArray(new String[values.size()])); } List<DiscoverFacetField> facetFields = discoveryQuery.getFacetFields(); if(0 < facetFields.size()){ //Only add facet information if there are any facets for (DiscoverFacetField facetFieldConfig : facetFields) { String field = transformFacetField(facetFieldConfig, facetFieldConfig.getField(), false); solrQuery.addFacetField(field); // Setting the facet limit in this fashion ensures that each facet can have its own max solrQuery.add("f." + field + "." + FacetParams.FACET_LIMIT, String.valueOf(facetFieldConfig.getLimit())); String facetSort; if(DiscoveryConfigurationParameters.SORT.COUNT.equals(facetFieldConfig.getSortOrder())){ facetSort = FacetParams.FACET_SORT_COUNT; }else{ facetSort = FacetParams.FACET_SORT_INDEX; } solrQuery.add("f." + field + "." + FacetParams.FACET_SORT, facetSort); if(facetFieldConfig.getPrefix() != null){ solrQuery.setFacetPrefix(field, facetFieldConfig.getPrefix()); } } List<String> facetQueries = discoveryQuery.getFacetQueries(); for (String facetQuery : facetQueries) { solrQuery.addFacetQuery(facetQuery); } /* if(discoveryQuery.getFacetLimit() != -1){ solrQuery.setFacetLimit(discoveryQuery.getFacetLimit()); } */ if(discoveryQuery.getFacetMinCount() != -1){ solrQuery.setFacetMinCount(discoveryQuery.getFacetMinCount()); } solrQuery.setParam(FacetParams.FACET_OFFSET, String.valueOf(discoveryQuery.getFacetOffset())); } QueryResponse queryResponse = getSolr().query(solrQuery); return retrieveResult(context, discoveryQuery, queryResponse); } catch (Exception e) { throw new org.dspace.discovery.SearchServiceException(e.getMessage(),e); } } @Override public InputStream searchJSON(DiscoverQuery query, DSpaceObject dso, String jsonIdentifier) throws SearchServiceException { if(dso != null){ if (dso instanceof Community) { query.addFilterQueries("location:m" + dso.getID()); } else if (dso instanceof Collection) { query.addFilterQueries("location:l" + dso.getID()); } else if (dso instanceof Item){ query.addFilterQueries("handle:" + dso.getHandle()); } } return searchJSON(query, jsonIdentifier); } public InputStream searchJSON(DiscoverQuery query, String jsonIdentifier) throws SearchServiceException { Map<String, String> params = new HashMap<String, String>(); String solrRequestUrl = solr.getBaseURL() + "/select"; //Add our default parameters params.put(CommonParams.ROWS, "0"); params.put(CommonParams.WT, "json"); //We uwe json as out output type params.put("json.nl", "map"); params.put("json.wrf", jsonIdentifier); params.put(FacetParams.FACET, Boolean.TRUE.toString()); //Generate our json out of the given params try { params.put(CommonParams.Q, URLEncoder.encode(query.getQuery(), org.dspace.constants.Constants.DEFAULT_ENCODING)); } catch (UnsupportedEncodingException uee) { //Should never occur return null; } params.put(FacetParams.FACET_MINCOUNT, String.valueOf(query.getFacetMinCount())); solrRequestUrl = generateURL(solrRequestUrl, params); if (query.getFacetFields() != null || query.getFilterQueries() != null) { StringBuilder urlBuilder = new StringBuilder(solrRequestUrl); if(query.getFacetFields() != null){ //Add our facet fields for (DiscoverFacetField facetFieldConfig : query.getFacetFields()) { urlBuilder.append("&").append(FacetParams.FACET_FIELD).append("="); //This class can only be used for autocomplete facet fields try { String field = facetFieldConfig.getField() + "_ac"; urlBuilder.append(URLEncoder.encode(field, org.dspace.constants.Constants.DEFAULT_ENCODING)); //Add the sort order urlBuilder.append("&f.").append(field).append("." + FacetParams.FACET_SORT).append("="); if(DiscoveryConfigurationParameters.SORT.COUNT.equals(facetFieldConfig.getSortOrder())){ urlBuilder.append(FacetParams.FACET_SORT_COUNT); }else{ urlBuilder.append(FacetParams.FACET_SORT_INDEX); } urlBuilder.append("&f.").append(field).append("." + FacetParams.FACET_LIMIT).append("=").append(facetFieldConfig.getLimit()); } catch (UnsupportedEncodingException e) { //Ignore this } } } if(query.getFilterQueries() != null){ for (String filterQuery : query.getFilterQueries()) { try { urlBuilder.append("&").append(CommonParams.FQ).append("=").append(URLEncoder.encode(filterQuery, org.dspace.constants.Constants.DEFAULT_ENCODING)); } catch (UnsupportedEncodingException e) { //Ignore this } } } solrRequestUrl = urlBuilder.toString(); } try { GetMethod get = new GetMethod(solrRequestUrl); new HttpClient().executeMethod(get); return get.getResponseBodyAsStream(); } catch (Exception e) { log.error("Error while getting json solr result for discovery search recommendation", e); e.printStackTrace(); } return null; } private String generateURL(String baseURL, Map<String, String> parameters) { boolean first = true; for (String key : parameters.keySet()) { if (first) { baseURL += "?"; first = false; } else { baseURL += "&"; } baseURL += key + "=" + parameters.get(key); } return baseURL; } private DiscoverResult retrieveResult(Context context, DiscoverQuery query, QueryResponse solrQueryResponse) throws SQLException { DiscoverResult result = new DiscoverResult(); if(solrQueryResponse != null){ result.setStart(query.getStart()); result.setMaxResults(query.getMaxResults()); result.setTotalSearchResults(solrQueryResponse.getResults().getNumFound()); List<String> searchFields = query.getSearchFields(); for (SolrDocument doc : solrQueryResponse.getResults()) { DSpaceObject dso = findDSpaceObject(context, doc); if(dso != null){ result.addDSpaceObject(dso); } else { log.error(LogManager.getHeader(context, "Error while retrieving DSpace object from discovery index", "Handle: " + doc.getFirstValue("handle"))); continue; } DiscoverResult.SearchDocument resultDoc = new DiscoverResult.SearchDocument(); //Add information about our search fields for (String field : searchFields){ List<String> valuesAsString = new ArrayList<String>(); for (Object o : doc.getFieldValues(field)) { valuesAsString.add(String.valueOf(o)); } resultDoc.addSearchField(field, valuesAsString.toArray(new String[valuesAsString.size()])); } result.addSearchDocument(dso, resultDoc); } //Resolve our facet field values List<FacetField> facetFields = solrQueryResponse.getFacetFields(); if(facetFields != null){ for (int i = 0; i < facetFields.size(); i++) { FacetField facetField = facetFields.get(i); DiscoverFacetField facetFieldConfig = query.getFacetFields().get(i); List<FacetField.Count> facetValues = facetField.getValues(); if (facetValues != null) { if(facetFieldConfig.getType().equals(DiscoveryConfigurationParameters.TYPE_DATE) && facetFieldConfig.getSortOrder().equals(DiscoveryConfigurationParameters.SORT.VALUE)){ //If we have a date & are sorting by value, ensure that the results are flipped for a proper result Collections.reverse(facetValues); } for (FacetField.Count facetValue : facetValues) { String displayedValue = transformDisplayedValue(context, facetField.getName(), facetValue.getName()); String field = transformFacetField(facetFieldConfig, facetField.getName(), true); result.addFacetResult(field, new DiscoverResult.FacetResult(facetValue.getAsFilterQuery(), displayedValue, facetValue.getCount())); } } } } if(solrQueryResponse.getFacetQuery() != null){ //TODO: do not sort when not a date, just retrieve the facets in the order they where requested ! //At the moment facet queries are only used for dates so we need to sort our results TreeMap<String, Integer> sortedFacetQueries = new TreeMap<String, Integer>(solrQueryResponse.getFacetQuery()); for(String facetQuery : sortedFacetQueries.descendingKeySet()){ //TODO: do not assume this, people may want to use it for other ends, use a regex to make sure //We have a facet query, the values looks something like: dateissued.year:[1990 TO 2000] AND -2000 //Prepare the string from {facet.field.name}:[startyear TO endyear] to startyear - endyear String facetField = facetQuery.substring(0, facetQuery.indexOf(":")); String name = facetQuery.substring(facetQuery.indexOf('[') + 1); name = name.substring(0, name.lastIndexOf(']')).replaceAll("TO", "-"); Integer count = sortedFacetQueries.get(facetQuery); //No need to show empty years if(0 < count){ result.addFacetResult(facetField, new DiscoverResult.FacetResult(facetQuery, name, count)); } } } } return result; } private static DSpaceObject findDSpaceObject(Context context, SolrDocument doc) throws SQLException { Integer type = (Integer) doc.getFirstValue("search.resourcetype"); Integer id = (Integer) doc.getFirstValue("search.resourceid"); String handle = (String) doc.getFirstValue("handle"); if (type != null && id != null) { return DSpaceObject.find(context, type, id); } else if (handle != null) { return HandleManager.resolveToObject(context, handle); } return null; } /** Simple means to return the search result as an InputStream */ public java.io.InputStream searchAsInputStream(DiscoverQuery query) throws SearchServiceException, java.io.IOException { try { org.apache.commons.httpclient.methods.GetMethod method = new org.apache.commons.httpclient.methods.GetMethod(getSolr().getHttpClient().getHostConfiguration().getHostURL() + ""); method.setQueryString(query.toString()); getSolr().getHttpClient().executeMethod(method); return method.getResponseBodyAsStream(); } catch (org.apache.solr.client.solrj.SolrServerException e) { throw new SearchServiceException(e.getMessage(), e); } } public List<DSpaceObject> search(Context context, String query, int offset, int max, String... filterquery) { return search(context, query, null, true, offset, max, filterquery); } public List<DSpaceObject> search(Context context, String query, String orderfield, boolean ascending, int offset, int max, String... filterquery) { try { SolrQuery solrQuery = new SolrQuery(); solrQuery.setQuery(query); solrQuery.setFields("search.resourceid", "search.resourcetype"); solrQuery.setStart(offset); solrQuery.setRows(max); if (orderfield != null) { solrQuery.setSortField(orderfield, ascending ? SolrQuery.ORDER.asc : SolrQuery.ORDER.desc); } if (filterquery != null) { solrQuery.addFilterQuery(filterquery); } QueryResponse rsp = getSolr().query(solrQuery); SolrDocumentList docs = rsp.getResults(); Iterator iter = docs.iterator(); List<DSpaceObject> result = new ArrayList<DSpaceObject>(); while (iter.hasNext()) { SolrDocument doc = (SolrDocument) iter.next(); DSpaceObject o = DSpaceObject.find(context, (Integer) doc.getFirstValue("search.resourcetype"), (Integer) doc.getFirstValue("search.resourceid")); if (o != null) { result.add(o); } } return result; } catch (Exception e) { // Any acception that we get ignore it. // We do NOT want any crashed to shown by the user e.printStackTrace(); return new ArrayList<DSpaceObject>(0); } } public DiscoverFilterQuery toFilterQuery(Context context, String filterQuery) throws SQLException { DiscoverFilterQuery result = new DiscoverFilterQuery(); //TODO: what if user enters something with a ":" in it String field = filterQuery; String value = filterQuery; if(filterQuery.contains(":")) { field = filterQuery.substring(0, filterQuery.indexOf(":")); value = filterQuery.substring(filterQuery.indexOf(":") + 1, filterQuery.length()); }else{ //We have got no field, so we are using everything field = "*"; } value = value.replace("\\", ""); if("*".equals(field)) { field = "all"; } if(filterQuery.startsWith("*:") || filterQuery.startsWith(":")) { filterQuery = filterQuery.substring(filterQuery.indexOf(":") + 1, filterQuery.length()); } // "(tea coffee)".replaceFirst("\\((.*?)\\)", "") value = transformDisplayedValue(context, field, value); result.setField(field); result.setFilterQuery(filterQuery); result.setDisplayedValue(value); return result; } public DiscoverFilterQuery toFilterQuery(Context context, String field, String value) throws SQLException{ DiscoverFilterQuery result = new DiscoverFilterQuery(); result.setField(field); result.setDisplayedValue(transformDisplayedValue(context, field, value)); // TODO: solr escape of value ? result.setFilterQuery((field == null || field.equals("") ? "" : field + ":") + "(" + value + ")"); return result; } @Override public String toSortFieldIndex(String metadataField, String type) { if(type.equals(DiscoveryConfigurationParameters.TYPE_DATE)){ return metadataField + "_dt"; }else{ return metadataField + "_sort"; } } private String transformFacetField(DiscoverFacetField facetFieldConfig, String field, boolean removePostfix) { if(facetFieldConfig.getType().equals(DiscoveryConfigurationParameters.TYPE_TEXT)){ if(removePostfix){ return field.substring(0, field.lastIndexOf("_filter")); }else{ return field + "_filter"; } }else if(facetFieldConfig.getType().equals(DiscoveryConfigurationParameters.TYPE_DATE)){ if(removePostfix){ return field.substring(0, field.lastIndexOf(".year")); }else{ return field + ".year"; } }else if(facetFieldConfig.getType().equals(DiscoveryConfigurationParameters.TYPE_AC)){ if(removePostfix){ return field.substring(0, field.lastIndexOf("_ac")); }else{ return field + "_ac"; } }else{ return field; } } private String transformDisplayedValue(Context context, String field, String value) throws SQLException { if(field.equals("location.comm") || field.equals("location.coll")){ value = locationToName(context, field, value); }else if(field.endsWith("_filter")){ //We have a filter make sure we split ! String separator = new DSpace().getConfigurationService().getProperty("discovery.solr.facets.split.char"); if(separator == null){ separator = FILTER_SEPARATOR; } //Escape any regex chars separator = java.util.regex.Pattern.quote(separator); String[] fqParts = value.split(separator); StringBuffer valueBuffer = new StringBuffer(); int start = fqParts.length / 2; for(int i = start; i < fqParts.length; i++){ valueBuffer.append(fqParts[i]); } value = valueBuffer.toString(); }else if(value.matches("\\((.*?)\\)")) { //The brackets where added for better solr results, remove the first & last one value = value.substring(1, value.length() -1); } return value; } }