/** * The contents of this file are subject to the license and copyright * detailed in the LICENSE and NOTICE files at the root of the source * tree and available online at * * http://www.dspace.org/license/ */ package org.dspace.discovery; import org.apache.log4j.Logger; import org.apache.commons.cli.*; import org.dspace.content.Collection; import org.dspace.content.Community; import org.dspace.content.DSpaceObject; import org.dspace.content.Item; import org.dspace.content.factory.ContentServiceFactory; import org.dspace.content.service.ItemService; import org.dspace.core.Constants; import org.dspace.core.Context; import org.dspace.handle.factory.HandleServiceFactory; import org.dspace.services.factory.DSpaceServicesFactory; import java.io.IOException; import java.sql.SQLException; import java.util.Iterator; /** * Class used to reindex dspace communities/collections/items into discovery * * @author Kevin Van de Velde (kevin at atmire dot com) * @author Mark Diggory (markd at atmire dot com) * @author Ben Bosman (ben at atmire dot com) */ public class IndexClient { private static final Logger log = Logger.getLogger(IndexClient.class); /** * When invoked as a command-line tool, creates, updates, removes content * from the whole index * * @param args the command-line arguments, none used * @throws SQLException * An exception that provides information on a database access error or other errors. * @throws IOException * A general class of exceptions produced by failed or interrupted I/O operations. * @throws SearchServiceException if something went wrong with querying the solr server */ public static void main(String[] args) throws SQLException, IOException, SearchServiceException { Context context = new Context(); context.turnOffAuthorisationSystem(); String usage = "org.dspace.discovery.IndexClient [-cbhf] | [-r <handle>] | [-i <handle>] or nothing to update/clean an existing index."; Options options = new Options(); HelpFormatter formatter = new HelpFormatter(); CommandLine line = null; options.addOption(OptionBuilder .withArgName("handle to remove") .hasArg(true) .withDescription( "remove an Item, Collection or Community from index based on its handle") .create("r")); options.addOption(OptionBuilder .withArgName("handle to add or update") .hasArg(true) .withDescription( "add or update an Item, Collection or Community based on its handle") .create("i")); options.addOption(OptionBuilder .isRequired(false) .withDescription( "clean existing index removing any documents that no longer exist in the db") .create("c")); options.addOption(OptionBuilder .isRequired(false) .withDescription( "(re)build index, wiping out current one if it exists") .create("b")); options.addOption(OptionBuilder .isRequired(false) .withDescription( "Rebuild the spellchecker, can be combined with -b and -f.") .create("s")); options.addOption(OptionBuilder .isRequired(false) .withDescription( "if updating existing index, force each handle to be reindexed even if uptodate") .create("f")); options.addOption(OptionBuilder .isRequired(false) .withDescription( "print this help message") .create("h")); options.addOption(OptionBuilder.isRequired(false).withDescription( "optimize search core").create("o")); try { line = new PosixParser().parse(options, args); } catch (Exception e) { // automatically generate the help statement formatter.printHelp(usage, e.getMessage(), options, ""); System.exit(1); } if (line.hasOption("h")) { // automatically generate the help statement formatter.printHelp(usage, options); System.exit(1); } /** Acquire from dspace-services in future */ /** * new DSpace.getServiceManager().getServiceByName("org.dspace.discovery.SolrIndexer"); */ IndexingService indexer = DSpaceServicesFactory.getInstance().getServiceManager().getServiceByName( IndexingService.class.getName(), IndexingService.class ); if (line.hasOption("r")) { log.info("Removing " + line.getOptionValue("r") + " from Index"); indexer.unIndexContent(context, line.getOptionValue("r")); } else if (line.hasOption("c")) { log.info("Cleaning Index"); indexer.cleanIndex(line.hasOption("f")); } else if (line.hasOption("b")) { log.info("(Re)building index from scratch."); indexer.createIndex(context); checkRebuildSpellCheck(line, indexer); } else if (line.hasOption("o")) { log.info("Optimizing search core."); indexer.optimize(); } else if (line.hasOption('s')) { checkRebuildSpellCheck(line, indexer); } else if (line.hasOption('i')) { final String handle = line.getOptionValue('i'); final DSpaceObject dso = HandleServiceFactory.getInstance().getHandleService().resolveToObject(context, handle); if (dso == null) { throw new IllegalArgumentException("Cannot resolve " + handle + " to a DSpace object"); } log.info("Forcibly Indexing " + handle); // Enable batch mode; we may be indexing a large number of items context.enableBatchMode(true); final long startTimeMillis = System.currentTimeMillis(); final long count = indexAll(indexer, ContentServiceFactory.getInstance().getItemService(), context, dso); final long seconds = (System.currentTimeMillis() - startTimeMillis ) / 1000; log.info("Indexed " + count + " DSpace object" + (count > 1 ? "s" : "") + " in " + seconds + " seconds"); } else { log.info("Updating and Cleaning Index"); indexer.cleanIndex(line.hasOption("f")); indexer.updateIndex(context, line.hasOption("f")); checkRebuildSpellCheck(line, indexer); } log.info("Done with indexing"); } /** * Indexes the given object and all children, if applicable. * * @param indexingService * * @param itemService * * @param context * The relevant DSpace Context. * @param dso * DSpace object to index recursively * @throws IOException * A general class of exceptions produced by failed or interrupted I/O operations. * @throws SearchServiceException in case of a solr exception * @throws SQLException * An exception that provides information on a database access error or other errors. */ private static long indexAll(final IndexingService indexingService, final ItemService itemService, final Context context, final DSpaceObject dso) throws IOException, SearchServiceException, SQLException { long count = 0; indexingService.indexContent(context, dso, true, true); count++; if (dso.getType() == Constants.COMMUNITY) { final Community community = (Community) dso; final String communityHandle = community.getHandle(); for (final Community subcommunity : community.getSubcommunities()) { count += indexAll(indexingService, itemService, context, subcommunity); //To prevent memory issues, discard an object from the cache after processing context.uncacheEntity(subcommunity); } final Community reloadedCommunity = (Community) HandleServiceFactory.getInstance().getHandleService().resolveToObject(context, communityHandle); for (final Collection collection : reloadedCommunity.getCollections()) { count++; indexingService.indexContent(context, collection, true, true); count += indexItems(indexingService, itemService, context, collection); //To prevent memory issues, discard an object from the cache after processing context.uncacheEntity(collection); } } else if (dso.getType() == Constants.COLLECTION) { count += indexItems(indexingService, itemService, context, (Collection) dso); } return count; } /** * Indexes all items in the given collection. * * @param indexingService * * @param itemService * * @param context * The relevant DSpace Context. * @param collection * collection to index * @throws IOException * A general class of exceptions produced by failed or interrupted I/O operations. * @throws SearchServiceException in case of a solr exception * @throws SQLException * An exception that provides information on a database access error or other errors. */ private static long indexItems(final IndexingService indexingService, final ItemService itemService, final Context context, final Collection collection) throws IOException, SearchServiceException, SQLException { long count = 0; final Iterator<Item> itemIterator = itemService.findByCollection(context, collection); while (itemIterator.hasNext()) { Item item = itemIterator.next(); indexingService.indexContent(context, item, true, false); count++; //To prevent memory issues, discard an object from the cache after processing context.uncacheEntity(item); } indexingService.commit(); return count; } /** * Check the command line options and rebuild the spell check if active. * @param line the command line options * @param indexer the solr indexer * @throws SearchServiceException in case of a solr exception */ protected static void checkRebuildSpellCheck(CommandLine line, IndexingService indexer) throws SearchServiceException { if (line.hasOption("s")) { log.info("Rebuilding spell checker."); indexer.buildSpellCheck(); } } }