/* * Data Hub Service (DHuS) - For Space data distribution. * Copyright (C) 2013,2014,2015,2016 GAEL Systems * * This file is part of DHuS software sources. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package fr.gael.dhus.service; import java.io.IOException; import java.util.AbstractList; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Objects; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.response.Suggestion; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrInputDocument; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.security.access.prepost.PreAuthorize; import org.springframework.stereotype.Service; import fr.gael.dhus.database.object.Collection; import fr.gael.dhus.database.object.MetadataIndex; import fr.gael.dhus.database.object.Product; import fr.gael.dhus.search.DHusSearchException; import fr.gael.dhus.search.SolrDao; import fr.gael.dhus.service.metadata.MetadataType; import fr.gael.dhus.service.metadata.SolrField; @Service public class SearchService extends WebService { /** Logger. */ private static final Logger LOGGER = LogManager.getLogger(SearchService.class); /** Autowired dependency. */ @Autowired private SolrDao solrDao; /** Autowired dependency. */ @Autowired private CollectionService collectionService; /** Autowired dependency. */ @Autowired private ProductService productService; /** Autowired dependency. */ @Autowired private MetadataTypeService metadataTypeService; /** * Indexes or Reindexes a product. * {@link Product#getId()} is the unique key in the index. * @param product a product. */ public void index(Product product) { try { long start = System.currentTimeMillis(); SolrInputDocument doc = toInputDocument(product); LOGGER.debug("Indexing product '" + product.getPath() + "'"); try { solrDao.index(doc); } catch (IOException | SolrServerException e) { LOGGER.error("Cannot index product", e); return; } long end = System.currentTimeMillis(); LOGGER.info("Indexed product in " + (end - start) + "ms"); } catch (Exception e) { LOGGER.error("Cannot index product", e); } } /** * Removes the given product from the index. * @param product to remove. */ public void remove(Product product) { try { solrDao.remove(product.getId()); } catch (Exception ex) { LOGGER.error("Cannot remove product " + product.getIdentifier() + "from index", ex); } } /** * Updates the given product from the index. * @param product to update. */ public void update(Product product) { index(product); } /** * Paginated search for system operations. * @param query Solr query `q` parameter. * @return an iterator of found products. */ public Iterator<Product> search(String query) { try { final Iterator<SolrDocument> it = solrDao.scroll(new SolrQuery(query)); return new Iterator<Product>() { @Override public boolean hasNext() { return it.hasNext(); } @Override public Product next() { return productService.getProduct((Long) it.next().get("id")); } @Override public void remove() { productService.deleteProduct((Long) it.next().get("id")); } }; } catch (IOException|SolrServerException ex) { LOGGER.error("An exception occured while searching", ex); } return Collections.EMPTY_LIST.iterator(); } /** * Search. * <p> * Set `start` and `rows` values in the SolrQuery parameter to paginate the results.<br> * <strong>If no `rows` have been set, solr will only return 10 documents, no more.</strong> * <p> * To get the total number of document matching the given query, use {@code res.getNumFound()}. * * @param query a SolrQuery with at least a 'q' parameter set. * @return A list of solr document matching the given query. */ @PreAuthorize("hasRole('ROLE_SEARCH')") public SolrDocumentList search(SolrQuery query) { Objects.requireNonNull(query); query.setQuery(solrDao.updateQuery(query.getQuery())); try { return solrDao.search(query).getResults(); } catch (SolrServerException | IOException ex) { LOGGER.error(ex); throw new DHusSearchException("An exception occured while searching", ex); } } /** * Returns the product associated with the given solr document. * @param doc Index entry for a product, are returned by {@link #search(SolrQuery)}. * @return A product (database object). */ public Product asProduct(SolrDocument doc) { Long pid = Long.class.cast(doc.get("id")); return productService.getProduct(pid); } /** * Returns how many solr documents match the given query. * @param query a solr `q` query. * @return solr document count. * @deprecated use {@link #search(SolrQuery)}{@code .getNumFound()}. */ @Deprecated @PreAuthorize("hasRole('ROLE_SEARCH')") public int getResultCount(String query) { try { query = solrDao.updateQuery(query); return (int) solrDao.search(new SolrQuery(query)).getResults().getNumFound(); } catch (SolrServerException | IOException ex) { LOGGER.error(ex); throw new DHusSearchException("An exception occured while searching", ex); } } /** * Returns a list of suggestions for the given input. * @param input search input. * @return list of suggestions. */ @PreAuthorize("hasRole('ROLE_SEARCH')") public List<String> getSuggestions(String input) { try { final List<Suggestion> sggs = solrDao.getSuggestions(input).getSuggestions().get("suggest"); return new AbstractList<String>() { @Override public String get(int index) { return sggs.get(index).getTerm(); } @Override public int size() { return sggs.size(); } }; } catch (IOException|SolrServerException ex) { LOGGER.error("Cannot get suggestions from Solr", ex); } return Collections.emptyList(); } /** * Integrity check. */ public void checkIndex() { try { SolrQuery query = new SolrQuery("*:*"); query.setFilterQueries("*"); query.setStart(0); Iterator<SolrDocument> it = solrDao.scroll(query); while (it.hasNext()) { SolrDocument doc = it.next(); Long pid = (Long) doc.get("id"); Product product = productService.systemGetProduct(pid); if (product == null) { Long id = (Long) doc.getFieldValue("id"); LOGGER.warn("Removing unknown product " + id + " from solr index"); try { solrDao.remove(id); // decrease the offset, because a product has been removed query.setStart(query.getStart() - 1); } catch (IOException e) { LOGGER.error("Cannot remove Solr entry " + id, e); } } } } catch (IOException|SolrServerException ex) { LOGGER.error("Cannot check the index", ex); } } /** * Optimize the index, merges every segment of the index into one monolithic file. * Optimizing is very expensive, and if the index is constantly changing, * the slight performance boost will not last long... * The tradeoff is not often worth it for a non static index. * <p> * Blocking method, will block until optimization is complete. Solr won't respond to * search queries until optimization is done. */ public void optimizeIndex() { try { solrDao.optimize(); } catch (IOException|SolrServerException ex) { LOGGER.error("Cannot optimize index", ex); } } /** * Wipes the current index and reindex everything from the DataBase. */ public void fullReindex() { try { solrDao.removeAll(); long start = System.currentTimeMillis(); final Iterator<Product> products = productService.systemGetProducts(null, null, 0); if (!products.hasNext()) { LOGGER.warn("Reindex: table PRODUCTS is empty, aborting..."); return; } // Makes an adaptor for SolrDao#batchIndex(...) Iterator<SolrInputDocument> it = new Iterator<SolrInputDocument>() { @Override public boolean hasNext() { return products.hasNext(); } @Override public SolrInputDocument next() { Product product = products.next(); product.setIndexes(productService.getIndexes(product.getId())); return toInputDocument(product); } @Override public void remove() { throw new UnsupportedOperationException("Do not use remove()."); } }; // Best config for bulk reindex // see: http://lucidworks.com/blog/2013/08/23/understanding-transaction-logs-softcommit-and-commit-in-sorlcloud/ Map<String, String> config = new HashMap<>(); config.put("updateHandler.autoSoftCommit.maxDocs", "-1"); // Opens a new searcher (the slowest operation). config.put("updateHandler.autoSoftCommit.maxTime", "-1"); // Opens a new searcher (the slowest operation). config.put("updateHandler.autoCommit.maxDocs", "-1"); // Time based autocommit is better. config.put("updateHandler.autoCommit.maxTime", "60000"); // 1 minute, controls the size of tlog files. config.put("updateHandler.autoCommit.openSearcher", "false"); // Opens a new searcher (the slowest operation). solrDao.setProperties(config); solrDao.batchIndex(it); solrDao.optimize(); solrDao.unsetProperties(config.keySet()); LOGGER.info("Full reindex done in " + (System.currentTimeMillis() - start) + "ms"); } catch (IOException | SolrServerException ex) { LOGGER.error("Failed to reindex", ex); } } /** * Makes a SolrInputDocument from a Product database object. * The returned document can be indexed as is. * @param product to convert. * @return an indexable solr document. */ private SolrInputDocument toInputDocument(Product product) { String path = product.getPath().toString(); if (path.startsWith("/")) // FIXME: should be done by the ingestion process! { path="file:/" + path; } SolrInputDocument doc = new SolrInputDocument(); // Metadatas List<MetadataIndex> indices = product.getIndexes(); if (indices != null && !indices.isEmpty()) { for (MetadataIndex index : indices) { String type = index.getType(); // Only textual information stored in field contents (full-text search) if ((type == null) || type.isEmpty() || "text/plain".equals(type)) { doc.addField("contents", index.getValue()); } // next line is considered bad practice: //doc.addField("contents", index.getQueryable()); MetadataType mt = metadataTypeService .getMetadataTypeByName(product.getItemClass(), index.getName()); SolrField sf = (mt != null)? mt.getSolrField(): null; if (sf != null || index.getQueryable() != null) { Boolean is_multivalued = (sf != null)? sf.isMultiValued(): null; String field_name = (sf != null)? sf.getName(): index.getQueryable().toLowerCase(); if (is_multivalued != null && is_multivalued) { doc.addField(field_name, index.getValue()); } else { doc.setField(field_name, index.getValue()); } //LOGGER.debug("Added " + field_name + ":" + index.getValue()); } } } else { LOGGER.warn("Product '" + product.getIdentifier() + "' contains no metadata"); } // DHuS Attributes doc.setField("id", product.getId()); doc.setField("uuid", product.getUuid()); doc.setField("path", path); // Collections List<Collection> collections = collectionService.getCollectionsOfProduct(product); if (collections != null && !collections.isEmpty()) { for (Collection collection : collections) { doc.addField("collection", collection.getName()); } } return doc; } }