SolrProxyIterator.java example

Explorer

newspaper-batch-event-framework-master
- item-event-framework
  - autonomous-component
    - src
      - main
        java
        dk
        statsbiblioteket
        medieplatform
        autonomous
        AutonomousComponent.java
        AutonomousComponentUtils.java
        AutonomousWorker.java
        CallResult.java
        ConcurrencyConnectionStateListener.java
        CouldNotGetLockException.java
        LockingException.java
      - test
        java
        dk
        statsbiblioteket
        medieplatform
        autonomous
        AutonomousComponentTest.java
        TestingComponent.java
  - item-event-framework-common
    - src
      - main
        java
        dk
        statsbiblioteket
        medieplatform
        autonomous
        AbstractRunnableComponent.java
        CommunicationException.java
        ConfigConstants.java
        DomsItemFactory.java
        Event.java
        EventStorer.java
        EventTrigger.java
        InitialisationException.java
        Item.java
        ItemFactory.java
        NotFoundException.java
        ResultCollector.java
        RunnableComponent.java
      - test
        java
        dk
        statsbiblioteket
        medieplatform
        autonomous
        ResultCollectorTest.java
  - sboi-doms-autonomous-component
    - src
      - main
        java
        dk
        statsbiblioteket
        medieplatform
        autonomous
        SBOIDomsAutonomousComponentUtils.java
  - sboi-doms-event-framework
    - src
      - main
        java
        dk
        statsbiblioteket
        medieplatform
        autonomous
        DomsEventStorage.java
        DomsEventStorageFactory.java
        PremisManipulator.java
        PremisManipulatorFactory.java
        SBOIEventIndex.java
        SolrJConnector.java
        SolrProxyIterator.java
      - test
        java
        dk
        statsbiblioteket
        medieplatform
        autonomous
        PremisManipulatorTest.java
        SBOIEventIT.java
        SBOIEventIndexTest.java
        SolrProxyIteratorTest.java
- newspaper-batch-event-framework
  - batch-iterator
    - src
      - main
        java
        dk
        statsbiblioteket
        newspaper
        treenode
        NodeType.java
        TreeNode.java
        TreeNodeState.java
        TreeNodeStateWithChildren.java
        TreeNodeWithChildren.java
      - test
        java
        dk
        statsbiblioteket
        newspaper
        treenode
        TestEventHelper.java
        TreeNodeStateTest.java
        TreeNodeStateWithChildrenTest.java
        TreeNodesStructurePrint.java
  - hadoop-component
    - src
      - main
        java
        dk
        statsbiblioteket
        medieplatform
        hadoop
        AbstractHadoopRunnableComponent.java
  - hadoop-helpers
    - hadoop-doms-integration
      - src
        main
        java
        dk
        statsbiblioteket
        medieplatform
        hadoop
        AbstractDomsReducer.java
        DomsSaverReducer.java
        Utils.java
        test
        java
        dk
        statsbiblioteket
        medieplatform
        hadoop
        DomsSaverReducerTest.java
    - hadoop-file-converters
      - src
        main
        java
        dk
        statsbiblioteket
        medieplatform
        hadoop
        ConvertMapper.java
        WrapperMapper.java
        test
        java
        dk
        statsbiblioteket
        medieplatform
        hadoop
        ConvertMapperTest.java
  - newspaper-autonomous-component
    - src
      - main
        java
        dk
        statsbiblioteket
        medieplatform
        autonomous
        NewspaperBatchAutonomousComponentUtils.java
        NewspaperDomsEventStorage.java
        NewspaperDomsEventStorageFactory.java
        NewspaperSBOIEventStorage.java
      - test
        java
        dk
        statsbiblioteket
        medieplatform
        autonomous
        AbstractFedoraMockup.java
        FedoraMockupBatchNoRoundTripObject.java
        FedoraMockupEmpty.java
        NewspaperDomsEventStorageIntegrationTest.java
        NewspaperDomsEventStorageTest.java
        NewspaperSBOIEventStorageTest.java
  - newspaper-batch-event-framework-common
    - src
      - main
        java
        dk
        statsbiblioteket
        medieplatform
        autonomous
        Batch.java
        BatchItemFactory.java
        EventAccessor.java
  - sample-autonomous-component
    - src
      - main
        java
        dk
        statsbiblioteket
        medieplatform
        autonomous
        SampleComponent.java
        SampleRunnableComponent.java
      - test
        java
        dk
        statsbiblioteket
        medieplatform
        autonomous
        MockupIteratorSuper.java
        SampleRunnableComponentTest.java
  - tree-processor
    - src
      - main
        java
        dk
        statsbiblioteket
        medieplatform
        autonomous
        iterator
        AbstractIterator.java
        common
        AttributeParsingEvent.java
        DataFileNodeBeginsParsingEvent.java
        DataFileNodeEndsParsingEvent.java
        DelegatingTreeIterator.java
        InMemoryAttributeParsingEvent.java
        NodeBeginsParsingEvent.java
        NodeEndParsingEvent.java
        ParsingEvent.java
        ParsingEventType.java
        TreeIterator.java
        eventhandlers
        ConsoleLogger.java
        DefaultTreeEventHandler.java
        EventHandlerFactory.java
        EventRunner.java
        InjectingTreeEventHandler.java
        MultiThreadedEventRunner.java
        TreeEventHandler.java
        fedora3
        ConfigurableFilter.java
        FedoraTreeFilter.java
        IteratorForFedora3.java
        JerseyAttributeParsingEvent.java
        JerseyContentsAttributeParsingEvent.java
        YesFilter.java
        filesystem
        FileAttributeParsingEvent.java
        SimpleIteratorForFilesystems.java
        transforming
        CommonTransformingIterator.java
        DatafileIterator.java
        TransformingIteratorForFileSystems.java
        VirtualIteratorForFileSystems.java
      - test
        java
        dk
        statsbiblioteket
        medieplatform
        autonomous
        AbstractTests.java
        iterator
        eventhandlers
        EventRunnerTest.java
        MultiThreadedEventRunnerTest.java
        fedora3
        IteratorForFedora3Test.java
        IteratorForFedora3TestWireMocked.java
        JerseyContentsAttributeParsingEventTest.java
        JerseyContentsAttributeParsingEventTestWireMocked.java
        filesystem
        InvalidDirTest.java
        IteratorForFileSystemsTestTestdata1.java
        SimpleIteratorForFilesystemsTest.java
        TransformingIteratorForFileSystemsTest.java
        TransformingIteratorForFileSystemsTestTestdata1.java
  - tree-processor-runnable-component
    - src
      - main
        java
        dk
        statsbiblioteket
        medieplatform
        autonomous
        TreeProcessorAbstractRunnableComponent.java
      - test
        java
        dk
        statsbiblioteket
        medieplatform
        autonomous
        TestingRunnableComponent.java
        TreeProcessorAbstractRunnableComponentTest.java
- process-monitor
  - process-monitor-datasource
    - process-monitor-datasource-interfaces
      - src
        main
        java
        dk
        statsbiblioteket
        medieplatform
        autonomous
        processmonitor
        datasources
        DataSource.java
        NotWorkingProperlyException.java
    - process-monitor-datasource-mockup
      - src
        main
        java
        dk
        statsbiblioteket
        medieplatform
        autonomous
        processmonitor
        datasources
        DataSourceMockup.java
        test
        java
        dk
        statsbiblioteket
        medieplatform
        autonomous
        processmonitor
        datasources
        DataSourceMockupTest.java
    - process-monitor-datasource-tck
      - src
        main
        java
        dk
        statsbiblioteket
        medieplatform
        autonomous
        processmonitor
        datasources
        TCKTestSuite.java
  - sboi-datasource
    - src
      - main
        java
        dk
        statsbiblioteket
        medieplatform
        autonomous
        processmonitor
        datasources
        SBOIDatasource.java
        SBOIDatasourceConfiguration.java
      - test
        java
        dk
        statsbiblioteket
        medieplatform
        autonomous
        processmonitor
        datasources
        SBOIDatasourceTest.java

package dk.statsbiblioteket.medieplatform.autonomous;

import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.slf4j.Logger;

import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;

/**
 * This is the solr proxy iterator. This is the thing that handles paged solr searched, without
 * exposing this behaivour. It implements Iterator and should be treatable as just a normal iterator.
 * When a fixed number of hits have been retrieved from solr, it performs a search to get the
 * next set of hits. It will continue to do so while there are hits in solr.
 *
 * All hits are sorted by item creation time. This way, the sorting is stable, and any changes or additions
 * will always happen in the end of the list. Thus, we can use offset in the list to do paging.
 * @param <T> the type of items
 */
public class SolrProxyIterator<T extends Item> implements Iterator<T> {
    public static final String PREMIS_NO_DETAILS = "premis_no_details";
    public static final String LAST_MODIFIED = "lastmodified_date";
    public static final String SORT_DATE = "initial_date";
    private static Logger log = org.slf4j.LoggerFactory.getLogger(SolrProxyIterator.class);


    protected Iterator<T> items = null;


    protected final String queryString;
    protected final boolean details;
    protected final HttpSolrServer summaSearch;
    protected final PremisManipulatorFactory<T> premisManipulatorFactory;
    protected final DomsEventStorage<T> domsEventStorage;
    protected final int rows;
    protected int start = 0;
    protected int position = 0;


    /**
     * Create a new solr proxy iterator
     * @param queryString the query string for solr
     * @param details should details be fetched from DOMS or Solr. True means that details are fetched from doms. False means use only what is in the sboi index, which lacks certain fields
     * @param summaSearch the http solr server to query
     * @param premisManipulatorFactory the premis factory to parse the premis into items
     * @param domsEventStorage the doms event storage to use, if details is true. Can be null if details are false
     */
    public SolrProxyIterator(String queryString, boolean details, HttpSolrServer summaSearch,
                             PremisManipulatorFactory<T> premisManipulatorFactory,
                             DomsEventStorage<T> domsEventStorage, int pageSize) {
        this.queryString = queryString;
        this.details = details;
        this.summaSearch = summaSearch;
        this.premisManipulatorFactory = premisManipulatorFactory;
        this.domsEventStorage = domsEventStorage;
        rows = pageSize;
        search();
    }

    @Override
    /**
     * If at least one item remains in the cache, return true. Otherwise, do a search in sboi for more hits.
     * If any more hits are found return true and put them in cache. Otherwise return false.
     */
    public synchronized boolean hasNext() {
        if (position >= rows) {
            start += rows;
            position = 0;
            search();
        }
        return items.hasNext();
    }

    /**
     * Perform a search in sboi and replace the field items with the result of this search
     * @see #items
     */
    protected void search() {
        try {
            SolrQuery query = new SolrQuery();
            query.setQuery(queryString);
            query.setRows(rows); //Fetch size. Do not go over 1000 unless you specify fields to fetch which does not include content_text
            query.setStart(start);
            //IMPORTANT!Only use facets if needed.
            query.set("facet", "false"); //very important. Must overwrite to false. Facets are very slow and expensive.
            query.setFields(SBOIEventIndex.UUID, LAST_MODIFIED);
            if (!details) {
                query.addField(PREMIS_NO_DETAILS);
            }

            query.addSort(SORT_DATE, SolrQuery.ORDER.asc);

            QueryResponse response = summaSearch.query(query, SolrRequest.METHOD.POST);
            SolrDocumentList results = response.getResults();
            List<T> hits = new ArrayList<>();
            for (SolrDocument result : results) {
                T hit;
                String uuid = result.getFirstValue(SBOIEventIndex.UUID).toString();
                String lastModified = result.getFirstValue(LAST_MODIFIED).toString();

                if (!details) { //no details, so we can retrieve everything from Summa
                    String blob;
                    if (result.getFirstValue(PREMIS_NO_DETAILS) == null) {
                        hit = premisManipulatorFactory.createInitialPremisBlob(uuid).toItem();
                    } else {
                        blob = result.getFirstValue(PREMIS_NO_DETAILS).toString();
                        hit = premisManipulatorFactory.createFromStringBlob(blob).toItem();
                    }
                } else {//Details requested so go to DOMS
                    try {
                        hit = domsEventStorage.getItemFromDomsID(uuid);
                    } catch (NotFoundException e) {
                        continue;
                    }
                }
                hit.setDomsID(uuid);
                hit.setLastModified(parseDate(lastModified));
                hits.add(hit);
            }
            items = hits.iterator();
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    /**
     * Parse a annoying fedora date
     * @param lastModified the date
     * @return as a date
     */
    private Date parseDate(String lastModified) {
        SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSX");
        try {
            return format.parse(lastModified);
        } catch (ParseException e) {
            log.warn("Failed to parse date {}", lastModified, e);
            return null;
        }
    }


    /**
     * Get the next hit. If there is no next hit, perform a search for more hits. If no more hits throw
     * NoSuchElementOperation, otherwise return next hit.
     * @return next hit
     * @throws java.util.NoSuchElementException if no more cached hits and no more hits in sboi.
     */
    @Override
    public synchronized T next() {

        if (hasNext()) {
            position++;
            return items.next();
        } else {
            throw new NoSuchElementException();
        }
    }

    /**
     * Unsupported Exception
     * @throws java.lang.UnsupportedOperationException when called
     */
    @Override
    public void remove() {
        throw new UnsupportedOperationException();
    }
}