/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.xoai.app;
import com.lyncode.xoai.dataprovider.exceptions.ConfigurationException;
import com.lyncode.xoai.dataprovider.exceptions.MetadataBindException;
import com.lyncode.xoai.dataprovider.exceptions.WritingXmlException;
import com.lyncode.xoai.dataprovider.xml.XmlOutputContext;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.ConnectException;
import java.sql.SQLException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import javax.xml.stream.XMLStreamException;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.PosixParser;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrQuery.ORDER;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument;
import org.dspace.authorize.factory.AuthorizeServiceFactory;
import org.dspace.authorize.service.AuthorizeService;
import org.dspace.content.Bitstream;
import org.dspace.content.Bundle;
import org.dspace.content.Collection;
import org.dspace.content.Community;
import org.dspace.content.Item;
import org.dspace.content.MetadataValue;
import org.dspace.content.MetadataField;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.content.service.ItemService;
import org.dspace.core.ConfigurationManager;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.xoai.exceptions.CompilingException;
import org.dspace.xoai.services.api.cache.XOAICacheService;
import org.dspace.xoai.services.api.cache.XOAIItemCacheService;
import org.dspace.xoai.services.api.cache.XOAILastCompilationCacheService;
import org.dspace.xoai.services.api.config.ConfigurationService;
import org.dspace.xoai.services.api.CollectionsService;
import org.dspace.xoai.services.api.solr.SolrServerResolver;
import org.dspace.xoai.solr.DSpaceSolrSearch;
import org.dspace.xoai.solr.exceptions.DSpaceSolrException;
import org.dspace.xoai.solr.exceptions.DSpaceSolrIndexerException;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.AnnotationConfigApplicationContext;
import static com.lyncode.xoai.dataprovider.core.Granularity.Second;
import static org.dspace.xoai.util.ItemUtils.retrieveMetadata;
/**
* @author Lyncode Development Team (dspace at lyncode dot com)
*/
@SuppressWarnings("deprecation")
public class XOAI {
private static Logger log = LogManager.getLogger(XOAI.class);
private final Context context;
private boolean optimize;
private final boolean verbose;
private boolean clean;
@Autowired
private SolrServerResolver solrServerResolver;
@Autowired
private XOAILastCompilationCacheService xoaiLastCompilationCacheService;
@Autowired
private XOAIItemCacheService xoaiItemCacheService;
@Autowired
private CollectionsService collectionsService;
private final AuthorizeService authorizeService;
private final ItemService itemService;
private List<String> getFileFormats(Item item) {
List<String> formats = new ArrayList<>();
try {
for (Bundle b : itemService.getBundles(item, "ORIGINAL")) {
for (Bitstream bs : b.getBitstreams()) {
if (!formats.contains(bs.getFormat(context).getMIMEType())) {
formats.add(bs.getFormat(context).getMIMEType());
}
}
}
} catch (SQLException ex) {
log.error(ex.getMessage(), ex);
}
return formats;
}
public XOAI(Context context, boolean optimize, boolean clean, boolean verbose) {
this.context = context;
this.optimize = optimize;
this.clean = clean;
this.verbose = verbose;
// Load necessary DSpace services
this.authorizeService = AuthorizeServiceFactory.getInstance().getAuthorizeService();
this.itemService = ContentServiceFactory.getInstance().getItemService();
}
public XOAI(Context ctx, boolean hasOption) {
this.context = ctx;
this.verbose = hasOption;
// Load necessary DSpace services
this.authorizeService = AuthorizeServiceFactory.getInstance().getAuthorizeService();
this.itemService = ContentServiceFactory.getInstance().getItemService();
}
private void println(String line) {
System.out.println(line);
}
public int index() throws DSpaceSolrIndexerException {
int result = 0;
try {
if (clean) {
clearIndex();
System.out.println("Using full import.");
result = this.indexAll();
} else {
SolrQuery solrParams = new SolrQuery("*:*")
.addField("item.lastmodified")
.addSortField("item.lastmodified", ORDER.desc).setRows(1);
SolrDocumentList results = DSpaceSolrSearch.query(solrServerResolver.getServer(), solrParams);
if (results.getNumFound() == 0) {
System.out.println("There are no indexed documents, using full import.");
result = this.indexAll();
} else
result = this.index((Date) results.get(0).getFieldValue("item.lastmodified"));
}
solrServerResolver.getServer().commit();
if (optimize) {
println("Optimizing Index");
solrServerResolver.getServer().optimize();
println("Index optimized");
}
// Set last compilation date
xoaiLastCompilationCacheService.put(new Date());
return result;
} catch (DSpaceSolrException | SolrServerException | IOException ex) {
throw new DSpaceSolrIndexerException(ex.getMessage(), ex);
}
}
private int index(Date last) throws DSpaceSolrIndexerException {
System.out
.println("Incremental import. Searching for documents modified after: "
+ last.toString());
// Index both in_archive items AND withdrawn items. Withdrawn items will be flagged withdrawn
// (in order to notify external OAI harvesters of their new status)
try {
Iterator<Item> iterator = itemService.findInArchiveOrWithdrawnDiscoverableModifiedSince(
context, last);
return this.index(iterator);
} catch (SQLException ex) {
throw new DSpaceSolrIndexerException(ex.getMessage(), ex);
}
}
private int indexAll() throws DSpaceSolrIndexerException {
System.out.println("Full import");
try {
// Index both in_archive items AND withdrawn items. Withdrawn items will be flagged withdrawn
// (in order to notify external OAI harvesters of their new status)
Iterator<Item> iterator = itemService.findInArchiveOrWithdrawnDiscoverableModifiedSince(
context, null);
return this.index(iterator);
} catch (SQLException ex) {
throw new DSpaceSolrIndexerException(ex.getMessage(), ex);
}
}
private int index(Iterator<Item> iterator)
throws DSpaceSolrIndexerException {
try {
int i = 0;
SolrServer server = solrServerResolver.getServer();
while (iterator.hasNext()) {
try {
server.add(this.index(iterator.next()));
} catch (SQLException | MetadataBindException | ParseException
| XMLStreamException | WritingXmlException ex) {
log.error(ex.getMessage(), ex);
}
i++;
if (i % 100 == 0) System.out.println(i + " items imported so far...");
}
System.out.println("Total: " + i + " items");
server.commit();
return i;
} catch (SolrServerException | IOException ex) {
throw new DSpaceSolrIndexerException(ex.getMessage(), ex);
}
}
private SolrInputDocument index(Item item) throws SQLException, MetadataBindException, ParseException, XMLStreamException, WritingXmlException {
SolrInputDocument doc = new SolrInputDocument();
doc.addField("item.id", item.getID());
boolean pub = this.isPublic(item);
doc.addField("item.public", pub);
String handle = item.getHandle();
doc.addField("item.handle", handle);
doc.addField("item.lastmodified", item.getLastModified());
if (item.getSubmitter() != null) {
doc.addField("item.submitter", item.getSubmitter().getEmail());
}
doc.addField("item.deleted", item.isWithdrawn() ? "true" : "false");
for (Collection col : item.getCollections())
doc.addField("item.collections",
"col_" + col.getHandle().replace("/", "_"));
for (Community com : collectionsService.flatParentCommunities(context, item))
doc.addField("item.communities",
"com_" + com.getHandle().replace("/", "_"));
List<MetadataValue> allData = itemService.getMetadata(item,
Item.ANY, Item.ANY, Item.ANY, Item.ANY);
for (MetadataValue dc : allData) {
MetadataField field = dc.getMetadataField();
String key = "metadata."
+ field.getMetadataSchema().getName() + "."
+ field.getElement();
if (field.getQualifier() != null) {
key += "." + field.getQualifier();
}
doc.addField(key, dc.getValue());
if (dc.getAuthority() != null) {
doc.addField(key + ".authority", dc.getAuthority());
doc.addField(key + ".confidence", dc.getConfidence() + "");
}
}
for (String f : getFileFormats(item)) {
doc.addField("metadata.dc.format.mimetype", f);
}
ByteArrayOutputStream out = new ByteArrayOutputStream();
XmlOutputContext xmlContext = XmlOutputContext.emptyContext(out, Second);
retrieveMetadata(context, item).write(xmlContext);
xmlContext.getWriter().flush();
xmlContext.getWriter().close();
doc.addField("item.compile", out.toString());
if (verbose) {
println("Item with handle " + handle + " indexed");
}
return doc;
}
private boolean isPublic(Item item) {
boolean pub = false;
try {
//Check if READ access allowed on this Item
pub = authorizeService.authorizeActionBoolean(context, item, Constants.READ);
} catch (SQLException ex) {
log.error(ex.getMessage());
}
return pub;
}
private static boolean getKnownExplanation(Throwable t) {
if (t instanceof ConnectException) {
System.err.println("Solr server ("
+ ConfigurationManager.getProperty("oai", "solr.url")
+ ") is down, turn it on.");
return true;
}
return false;
}
private static boolean searchForReason(Throwable t) {
if (getKnownExplanation(t))
return true;
if (t.getCause() != null)
return searchForReason(t.getCause());
return false;
}
private void clearIndex() throws DSpaceSolrIndexerException {
try {
System.out.println("Clearing index");
solrServerResolver.getServer().deleteByQuery("*:*");
solrServerResolver.getServer().commit();
System.out.println("Index cleared");
} catch (SolrServerException | IOException ex) {
throw new DSpaceSolrIndexerException(ex.getMessage(), ex);
}
}
private static void cleanCache(XOAIItemCacheService xoaiItemCacheService, XOAICacheService xoaiCacheService) throws IOException {
System.out.println("Purging cached OAI responses.");
xoaiItemCacheService.deleteAll();
xoaiCacheService.deleteAll();
}
private static final String COMMAND_IMPORT = "import";
private static final String COMMAND_CLEAN_CACHE = "clean-cache";
private static final String COMMAND_COMPILE_ITEMS = "compile-items";
private static final String COMMAND_ERASE_COMPILED_ITEMS = "erase-compiled-items";
public static void main(String[] argv) throws IOException, ConfigurationException {
AnnotationConfigApplicationContext applicationContext = new AnnotationConfigApplicationContext(new Class[]{
BasicConfiguration.class
});
ConfigurationService configurationService = applicationContext.getBean(ConfigurationService.class);
XOAICacheService cacheService = applicationContext.getBean(XOAICacheService.class);
XOAIItemCacheService itemCacheService = applicationContext.getBean(XOAIItemCacheService.class);
Context ctx = null;
try {
CommandLineParser parser = new PosixParser();
Options options = new Options();
options.addOption("c", "clear", false, "Clear index before indexing");
options.addOption("o", "optimize", false,
"Optimize index at the end");
options.addOption("v", "verbose", false, "Verbose output");
options.addOption("h", "help", false, "Shows some help");
options.addOption("n", "number", true, "FOR DEVELOPMENT MUST DELETE");
CommandLine line = parser.parse(options, argv);
String[] validSolrCommands = {COMMAND_IMPORT, COMMAND_CLEAN_CACHE};
String[] validDatabaseCommands = {COMMAND_CLEAN_CACHE, COMMAND_COMPILE_ITEMS, COMMAND_ERASE_COMPILED_ITEMS};
boolean solr = true; // Assuming solr by default
solr = !("database").equals(configurationService.getProperty("oai", "storage"));
boolean run = false;
if (line.getArgs().length > 0) {
if (solr) {
if (Arrays.asList(validSolrCommands).contains(line.getArgs()[0])) {
run = true;
}
} else {
if (Arrays.asList(validDatabaseCommands).contains(line.getArgs()[0])) {
run = true;
}
}
}
if (!line.hasOption('h') && run) {
System.out.println("OAI 2.0 manager action started");
long start = System.currentTimeMillis();
String command = line.getArgs()[0];
if (COMMAND_IMPORT.equals(command)) {
ctx = new Context();
XOAI indexer = new XOAI(ctx,
line.hasOption('o'),
line.hasOption('c'),
line.hasOption('v'));
applicationContext.getAutowireCapableBeanFactory().autowireBean(indexer);
int imported = indexer.index();
if (imported > 0) cleanCache(itemCacheService, cacheService);
} else if (COMMAND_CLEAN_CACHE.equals(command)) {
cleanCache(itemCacheService, cacheService);
} else if (COMMAND_COMPILE_ITEMS.equals(command)) {
ctx = new Context();
XOAI indexer = new XOAI(ctx, line.hasOption('v'));
applicationContext.getAutowireCapableBeanFactory().autowireBean(indexer);
indexer.compile();
cleanCache(itemCacheService, cacheService);
} else if (COMMAND_ERASE_COMPILED_ITEMS.equals(command)) {
cleanCompiledItems(itemCacheService);
cleanCache(itemCacheService, cacheService);
}
System.out.println("OAI 2.0 manager action ended. It took "
+ ((System.currentTimeMillis() - start) / 1000)
+ " seconds.");
} else {
usage();
}
} catch (Throwable ex) {
if (!searchForReason(ex)) {
ex.printStackTrace();
}
log.error(ex.getMessage(), ex);
}
finally
{
// Abort our context, if still open
if(ctx!=null && ctx.isValid())
ctx.abort();
}
}
private static void cleanCompiledItems(XOAIItemCacheService itemCacheService) throws IOException {
System.out.println("Purging compiled items");
itemCacheService.deleteAll();
}
private void compile() throws CompilingException {
Iterator<Item> iterator;
try {
Date last = xoaiLastCompilationCacheService.get();
if (last == null) {
System.out.println("Retrieving all items to be compiled");
iterator = itemService.findAll(context);
} else {
System.out.println("Retrieving items modified after " + last + " to be compiled");
iterator = itemService.findByLastModifiedSince(context, last);
}
while (iterator.hasNext()) {
Item item = iterator.next();
if (verbose) System.out.println("Compiling item with handle: " + item.getHandle());
xoaiItemCacheService.put(item, retrieveMetadata(context, item));
}
xoaiLastCompilationCacheService.put(new Date());
} catch (SQLException | IOException e) {
throw new CompilingException(e);
}
System.out.println("Items compiled");
}
private static void usage() {
boolean solr = true; // Assuming solr by default
solr = !("database").equals(ConfigurationManager.getProperty("oai", "storage"));
if (solr) {
System.out.println("OAI Manager Script");
System.out.println("Syntax: oai <action> [parameters]");
System.out.println("> Possible actions:");
System.out.println(" " + COMMAND_IMPORT + " - To import DSpace items into OAI index and cache system");
System.out.println(" " + COMMAND_CLEAN_CACHE + " - Cleans the OAI cached responses");
System.out.println("> Parameters:");
System.out.println(" -o Optimize index after indexing (" + COMMAND_IMPORT + " only)");
System.out.println(" -c Clear index (" + COMMAND_IMPORT + " only)");
System.out.println(" -v Verbose output");
System.out.println(" -h Shows this text");
} else {
System.out.println("OAI Manager Script");
System.out.println("Syntax: oai <action> [parameters]");
System.out.println("> Possible actions:");
System.out.println(" " + COMMAND_CLEAN_CACHE + " - Cleans the OAI cached responses");
System.out.println(" " + COMMAND_COMPILE_ITEMS + " - Compiles all DSpace items");
System.out.println(" " + COMMAND_ERASE_COMPILED_ITEMS + " - Erase the OAI compiled items");
System.out.println("> Parameters:");
System.out.println(" -v Verbose output");
System.out.println(" -h Shows this text");
}
}
}