SearchService.java example

Explorer
DataHubSystem-master
/*
 * Data Hub Service (DHuS) - For Space data distribution.
 * Copyright (C) 2013,2014,2015,2016 GAEL Systems
 *
 * This file is part of DHuS software sources.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
package fr.gael.dhus.service;

import java.io.IOException;
import java.util.AbstractList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.Suggestion;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.security.access.prepost.PreAuthorize;
import org.springframework.stereotype.Service;

import fr.gael.dhus.database.object.Collection;
import fr.gael.dhus.database.object.MetadataIndex;
import fr.gael.dhus.database.object.Product;
import fr.gael.dhus.search.DHusSearchException;
import fr.gael.dhus.search.SolrDao;
import fr.gael.dhus.service.metadata.MetadataType;
import fr.gael.dhus.service.metadata.SolrField;

@Service
public class SearchService extends WebService
{
   /** Logger. */
   private static final Logger LOGGER = LogManager.getLogger(SearchService.class);

   /** Autowired dependency. */
   @Autowired
   private SolrDao solrDao;

   /** Autowired dependency. */
   @Autowired
   private CollectionService collectionService;

   /** Autowired dependency. */
   @Autowired
   private ProductService productService;

   /** Autowired dependency. */
   @Autowired
   private MetadataTypeService metadataTypeService;


   /**
    * Indexes or Reindexes a product.
    * {@link Product#getId()} is the unique key in the index.
    * @param product a product.
    */
   public void index(Product product)
   {
      try
      {
         long start = System.currentTimeMillis();

         SolrInputDocument doc = toInputDocument(product);
         LOGGER.debug("Indexing product '" + product.getPath() + "'");

         try
         {
            solrDao.index(doc);
         }
         catch (IOException | SolrServerException e)
         {
            LOGGER.error("Cannot index product", e);
            return;
         }

         long end = System.currentTimeMillis();
         LOGGER.info("Indexed product in  " + (end - start) + "ms");
      }
      catch (Exception e)
      {
         LOGGER.error("Cannot index product", e);
      }
   }

   /**
    * Removes the given product from the index.
    * @param product to remove.
    */
   public void remove(Product product)
   {
      try
      {
         solrDao.remove(product.getId());
      }
      catch (Exception ex)
      {
         LOGGER.error("Cannot remove product " + product.getIdentifier() + "from index", ex);
      }
   }

   /**
    * Updates the given product from the index.
    * @param product to update.
    */
   public void update(Product product)
   {
      index(product);
   }

   /**
    * Paginated search for system operations.
    * @param query Solr query `q` parameter.
    * @return an iterator of found products.
    */
   public Iterator<Product> search(String query)
   {
      try
      {
         final Iterator<SolrDocument> it = solrDao.scroll(new SolrQuery(query));

         return new Iterator<Product>()
         {
            @Override
            public boolean hasNext()
            {
               return it.hasNext();
            }

            @Override
            public Product next()
            {
               return productService.getProduct((Long) it.next().get("id"));
            }

            @Override
            public void remove()
            {
               productService.deleteProduct((Long) it.next().get("id"));
            }
         };
      }
      catch (IOException|SolrServerException ex)
      {
         LOGGER.error("An exception occured while searching", ex);
      }
      return Collections.EMPTY_LIST.iterator();
   }

   /**
    * Search.
    * <p>
    * Set `start` and `rows` values in the SolrQuery parameter to paginate the results.<br>
    * <strong>If no `rows` have been set, solr will only return 10 documents, no more.</strong>
    * <p>
    * To get the total number of document matching the given query, use {@code res.getNumFound()}.
    *
    * @param query a SolrQuery with at least a 'q' parameter set.
    * @return A list of solr document matching the given query.
    */
   @PreAuthorize("hasRole('ROLE_SEARCH')")
   public SolrDocumentList search(SolrQuery query)
   {
      Objects.requireNonNull(query);

      query.setQuery(solrDao.updateQuery(query.getQuery()));
      try
      {
         return solrDao.search(query).getResults();
      }
      catch (SolrServerException | IOException ex)
      {
         LOGGER.error(ex);
         throw new DHusSearchException("An exception occured while searching", ex);
      }
   }

   /**
    * Returns the product associated with the given solr document.
    * @param doc Index entry for a product, are returned by {@link #search(SolrQuery)}.
    * @return A product (database object).
    */
   public Product asProduct(SolrDocument doc)
   {
      Long pid = Long.class.cast(doc.get("id"));
      return productService.getProduct(pid);
   }

   /**
    * Returns how many solr documents match the given query.
    * @param query a solr `q` query.
    * @return solr document count.
    * @deprecated use {@link #search(SolrQuery)}{@code .getNumFound()}.
    */
   @Deprecated
   @PreAuthorize("hasRole('ROLE_SEARCH')")
   public int getResultCount(String query)
   {
      try
      {
         query = solrDao.updateQuery(query);
         return (int) solrDao.search(new SolrQuery(query)).getResults().getNumFound();
      }
      catch (SolrServerException | IOException ex)
      {
         LOGGER.error(ex);
         throw new DHusSearchException("An exception occured while searching", ex);
      }
   }

   /**
    * Returns a list of suggestions for the given input.
    * @param input search input.
    * @return list of suggestions.
    */
   @PreAuthorize("hasRole('ROLE_SEARCH')")
   public List<String> getSuggestions(String input)
   {
      try
      {
         final List<Suggestion> sggs =
               solrDao.getSuggestions(input).getSuggestions().get("suggest");
         return new AbstractList<String>()
         {
            @Override
            public String get(int index)
            {
               return sggs.get(index).getTerm();
            }

            @Override
            public int size()
            {
               return sggs.size();
            }
         };
      }
      catch (IOException|SolrServerException ex)
      {
         LOGGER.error("Cannot get suggestions from Solr", ex);
      }
      return Collections.emptyList();
   }

   /**
    * Integrity check.
    */
   public void checkIndex()
   {
      try
      {
         SolrQuery query = new SolrQuery("*:*");
         query.setFilterQueries("*");
         query.setStart(0);
         Iterator<SolrDocument> it = solrDao.scroll(query);
         while (it.hasNext())
         {
            SolrDocument doc = it.next();
            Long pid = (Long) doc.get("id");
            Product product = productService.systemGetProduct(pid);
            if (product == null)
            {
               Long id = (Long) doc.getFieldValue("id");
               LOGGER.warn("Removing unknown product " + id + " from solr index");
               try
               {
                  solrDao.remove(id);
                  // decrease the offset, because a product has been removed
                  query.setStart(query.getStart() - 1);
               }
               catch (IOException e)
               {
                  LOGGER.error("Cannot remove Solr entry " + id, e);
               }
            }
         }
      }
      catch (IOException|SolrServerException ex)
      {
         LOGGER.error("Cannot check the index", ex);
      }
   }

   /**
    * Optimize the index, merges every segment of the index into one monolithic file.
    * Optimizing is very expensive, and if the index is constantly changing,
    * the slight performance boost will not last long...
    * The tradeoff is not often worth it for a non static index.
    * <p>
    * Blocking method, will block until optimization is complete. Solr won't respond to
    * search queries until optimization is done.
    */
   public void optimizeIndex()
   {
      try
      {
         solrDao.optimize();
      }
      catch (IOException|SolrServerException ex)
      {
         LOGGER.error("Cannot optimize index", ex);
      }
   }

   /**
    * Wipes the current index and reindex everything from the DataBase.
    */
   public void fullReindex()
   {
      try
      {
         solrDao.removeAll();

         long start = System.currentTimeMillis();

         final Iterator<Product> products = productService.systemGetProducts(null, null, 0);

         if (!products.hasNext())
         {
            LOGGER.warn("Reindex: table PRODUCTS is empty, aborting...");
            return;
         }

         // Makes an adaptor for SolrDao#batchIndex(...)
         Iterator<SolrInputDocument> it = new Iterator<SolrInputDocument>()
         {
            @Override
            public boolean hasNext()
            {
               return products.hasNext();
            }

            @Override
            public SolrInputDocument next()
            {
               Product product = products.next();
               product.setIndexes(productService.getIndexes(product.getId()));
               return toInputDocument(product);
            }

            @Override
            public void remove()
            {
               throw new UnsupportedOperationException("Do not use remove().");
            }
         };

         // Best config for bulk reindex
         // see: http://lucidworks.com/blog/2013/08/23/understanding-transaction-logs-softcommit-and-commit-in-sorlcloud/
         Map<String, String> config = new HashMap<>();
         config.put("updateHandler.autoSoftCommit.maxDocs", "-1");     // Opens a new searcher (the slowest operation).
         config.put("updateHandler.autoSoftCommit.maxTime", "-1");     // Opens a new searcher (the slowest operation).
         config.put("updateHandler.autoCommit.maxDocs", "-1");         // Time based autocommit is better.
         config.put("updateHandler.autoCommit.maxTime", "60000");      // 1 minute, controls the size of tlog files.
         config.put("updateHandler.autoCommit.openSearcher", "false"); // Opens a new searcher (the slowest operation).
         solrDao.setProperties(config);

         solrDao.batchIndex(it);
         solrDao.optimize();

         solrDao.unsetProperties(config.keySet());

         LOGGER.info("Full reindex done in " + (System.currentTimeMillis() - start) + "ms");
      }
      catch (IOException | SolrServerException ex)
      {
         LOGGER.error("Failed to reindex", ex);
      }
   }

   /**
    * Makes a SolrInputDocument from a Product database object.
    * The returned document can be indexed as is.
    * @param product to convert.
    * @return an indexable solr document.
    */
   private SolrInputDocument toInputDocument(Product product)
   {
      String path = product.getPath().toString();
      if (path.startsWith("/")) // FIXME: should be done by the ingestion process!
      {
         path="file:/" + path;
      }

      SolrInputDocument doc = new SolrInputDocument();

      // Metadatas
      List<MetadataIndex> indices = product.getIndexes();
      if (indices != null && !indices.isEmpty())
      {
         for (MetadataIndex index : indices)
         {
            String type = index.getType();

            // Only textual information stored in field contents (full-text search)
            if ((type == null) || type.isEmpty() || "text/plain".equals(type))
            {
               doc.addField("contents", index.getValue());
            }

            // next line is considered bad practice:
            //doc.addField("contents", index.getQueryable());

            MetadataType mt = metadataTypeService
                  .getMetadataTypeByName(product.getItemClass(), index.getName());
            SolrField sf = (mt != null)? mt.getSolrField(): null;

            if (sf != null || index.getQueryable() != null)
            {
               Boolean is_multivalued = (sf != null)? sf.isMultiValued(): null;
               String field_name = (sf != null)? sf.getName(): index.getQueryable().toLowerCase();

               if (is_multivalued != null && is_multivalued)
               {
                  doc.addField(field_name, index.getValue());
               }
               else
               {
                  doc.setField(field_name, index.getValue());
               }

               //LOGGER.debug("Added " + field_name + ":" + index.getValue());
            }
         }
      }
      else
      {
         LOGGER.warn("Product '" + product.getIdentifier() + "' contains no metadata");
      }

      // DHuS Attributes
      doc.setField("id", product.getId());
      doc.setField("uuid", product.getUuid());
      doc.setField("path", path);

      // Collections
      List<Collection> collections = collectionService.getCollectionsOfProduct(product);
      if (collections != null && !collections.isEmpty())
      {
         for (Collection collection : collections)
         {
            doc.addField("collection", collection.getName());
         }
      }

      return doc;
   }
}