/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.authority.indexer;
import org.apache.commons.collections.CollectionUtils;
import org.apache.solr.client.solrj.SolrServerException;
import org.dspace.authority.AuthorityValue;
import org.dspace.authority.AuthorityValueFinder;
import org.dspace.authority.AuthorityValueGenerator;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.dspace.content.Metadatum;
import org.dspace.content.Item;
import org.dspace.content.ItemIterator;
import org.dspace.core.ConfigurationManager;
import org.dspace.core.Context;
import org.dspace.services.ConfigurationService;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.beans.factory.annotation.Autowired;
import java.net.MalformedURLException;
import java.sql.SQLException;
import java.util.*;
/**
* DSpaceAuthorityIndexer is used in IndexClient, which is called by the AuthorityConsumer and the indexing-script.
* <p/>
* An instance of DSpaceAuthorityIndexer is bound to a list of items.
* This can be one item or all items too depending on the init() method.
* <p/>
* DSpaceAuthorityIndexer lets you iterate over each metadata value
* for each metadata field defined in dspace.cfg with 'authority.author.indexer.field'
* for each item in the list.
* <p/>
* <p/>
*
* @author Antoine Snyers (antoine at atmire.com)
* @author Kevin Van de Velde (kevin at atmire dot com)
* @author Ben Bosman (ben at atmire dot com)
* @author Mark Diggory (markd at atmire dot com)
*/
public class DSpaceAuthorityIndexer implements AuthorityIndexerInterface, InitializingBean {
private static final Logger log = Logger.getLogger(DSpaceAuthorityIndexer.class);
private ItemIterator itemIterator;
private Item currentItem;
/**
* The list of metadata fields which are to be indexed *
*/
private List<String> metadataFields;
private int currentFieldIndex;
private int currentMetadataIndex;
private boolean useCache;
private Map<String, AuthorityValue> cache;
private AuthorityValue nextValue;
private Context context;
private AuthorityValueFinder authorityValueFinder;
@Autowired(required = true)
protected ConfigurationService configurationService;
@Override
public void afterPropertiesSet() throws Exception {
int counter = 1;
String field;
metadataFields = new ArrayList<String>();
while ((field = configurationService.getProperty("authority.author.indexer.field." + counter)) != null) {
metadataFields.add(field);
counter++;
}
}
public void init(Context context, Item item) {
ArrayList<Integer> itemList = new ArrayList<Integer>();
itemList.add(item.getID());
this.itemIterator = new ItemIterator(context, itemList);
try {
currentItem = this.itemIterator.next();
} catch (SQLException e) {
log.error("Error while retrieving an item in the metadata indexer");
}
initialize(context);
}
public void init(Context context) {
init(context, false);
}
public void init(Context context, boolean useCache) {
try {
this.itemIterator = Item.findAll(context);
currentItem = this.itemIterator.next();
} catch (SQLException e) {
log.error("Error while retrieving all items in the metadata indexer");
}
initialize(context);
this.useCache = useCache;
}
private void initialize(Context context) {
this.context = context;
this.authorityValueFinder = new AuthorityValueFinder();
currentFieldIndex = 0;
currentMetadataIndex = 0;
useCache = false;
cache = new HashMap<>();
}
public AuthorityValue nextValue() {
return nextValue;
}
public boolean hasMore() {
if (currentItem == null) {
return false;
}
// 1. iterate over the metadata values
String metadataField = metadataFields.get(currentFieldIndex);
Metadatum[] values = currentItem.getMetadataByMetadataString(metadataField);
if (currentMetadataIndex < values.length) {
prepareNextValue(metadataField, values[currentMetadataIndex]);
currentMetadataIndex++;
return true;
} else {
// 2. iterate over the metadata fields
if ((currentFieldIndex + 1) < metadataFields.size()) {
currentFieldIndex++;
//Reset our current metadata index since we are moving to another field
currentMetadataIndex = 0;
return hasMore();
} else {
// 3. iterate over the items
try {
if (itemIterator.hasNext()) {
currentItem = itemIterator.next();
//Reset our current field index
currentFieldIndex = 0;
//Reset our current metadata index
currentMetadataIndex = 0;
} else {
currentItem = null;
}
return hasMore();
} catch (SQLException e) {
currentItem = null;
log.error("Error while retrieving next item in the author indexer",e);
return false;
}
}
}
}
/**
* This method looks at the authority of a metadata.
* If the authority can be found in solr, that value is reused.
* Otherwise a new authority value will be generated that will be indexed in solr.
* If the authority starts with AuthorityValueGenerator.GENERATE, a specific type of AuthorityValue will be generated.
* Depending on the type this may involve querying an external REST service
*
* @param metadataField Is one of the fields defined in dspace.cfg to be indexed.
* @param value Is one of the values of the given metadataField in one of the items being indexed.
*/
private void prepareNextValue(String metadataField, Metadatum value) {
nextValue = null;
String content = value.value;
String authorityKey = value.authority;
//We only want to update our item IF our UUID is not present or if we need to generate one.
boolean requiresItemUpdate = StringUtils.isBlank(authorityKey) || StringUtils.startsWith(authorityKey, AuthorityValueGenerator.GENERATE);
if (StringUtils.isNotBlank(authorityKey) && !authorityKey.startsWith(AuthorityValueGenerator.GENERATE)) {
// !uid.startsWith(AuthorityValueGenerator.GENERATE) is not strictly necessary here but it prevents exceptions in solr
nextValue = authorityValueFinder.findByUID(context, authorityKey);
}
if (nextValue == null && StringUtils.isBlank(authorityKey) && useCache) {
// A metadata without authority is being indexed
// If there is an exact match in the cache, reuse it rather than adding a new one.
AuthorityValue cachedAuthorityValue = cache.get(content);
if (cachedAuthorityValue != null) {
nextValue = cachedAuthorityValue;
}
}
if (nextValue == null) {
nextValue = AuthorityValueGenerator.generate(context, authorityKey, content, metadataField.replaceAll("\\.", "_"));
}
if (nextValue != null && requiresItemUpdate) {
nextValue.updateItem(currentItem, value);
try {
currentItem.update();
} catch (Exception e) {
log.error("Error creating a metadatavalue's authority", e);
}
}
if (useCache) {
cache.put(content, nextValue);
}
}
public void close() {
itemIterator.close();
itemIterator = null;
cache.clear();
}
public boolean isConfiguredProperly() {
boolean isConfiguredProperly = true;
if(CollectionUtils.isEmpty(metadataFields)){
log.warn("Authority indexer not properly configured, no metadata fields configured for indexing. Check the \"authority.author.indexer.field\" properties.");
isConfiguredProperly = false;
}
return isConfiguredProperly;
}
}