/* * This is eMonocot, a global online biodiversity information resource. * * Copyright © 2011–2015 The Board of Trustees of the Royal Botanic Gardens, Kew and The University of Oxford * * eMonocot is free software: you can redistribute it and/or modify it under the terms of the * GNU Affero General Public License as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * eMonocot is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * The complete text of the GNU Affero General Public License is in the source repository as the file * ‘COPYING’. It is also available from <http://www.gnu.org/licenses/>. */ package org.emonocot.persistence.dao.hibernate; import java.io.BufferedReader; import java.io.IOException; import java.io.StringReader; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.commons.lang3.StringUtils; import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.response.FacetField; import org.apache.solr.client.solrj.response.Group; import org.apache.solr.client.solrj.response.GroupCommand; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.params.FacetParams; import org.emonocot.api.autocomplete.Match; import org.emonocot.model.Base; import org.emonocot.pager.CellSet; import org.emonocot.pager.Cube; import org.emonocot.pager.DefaultPageImpl; import org.emonocot.pager.FacetName; import org.emonocot.pager.Level; import org.emonocot.pager.Page; import org.emonocot.persistence.dao.SearchableDao; import org.hibernate.ObjectNotFoundException; import org.springframework.beans.factory.annotation.Autowired; /** * * @author ben * * @param <T> */ public abstract class SearchableDaoImpl<T extends Base> extends DaoImpl<T> implements SearchableDao<T> { private SolrServer solrServer = null; @Autowired public void setSolrServer(SolrServer solrServer) { this.solrServer = solrServer; } /** * Does this DAO search for SearchableObjects? * @return */ protected boolean isSearchableObject() { return true; } /** * * @param newType * Set the type of object handled by this class * @param searchTypes * Set the subclasses of T to be searched for */ public SearchableDaoImpl(final Class<T> newType) { super(newType); } /** * @param query * A lucene query * @param spatialQuery * A spatial query to filter the results by * @param pageSize * The maximum number of results to return * @param pageNumber * The offset (in pageSize chunks, 0-based) from the beginning of * the recordset * @param facets * The names of the facets you want to calculate * @param selectedFacets * A map of facets which you would like to restrict the search by * @param sort * A representation for the order results should be returned in * @param fetch * Set the fetch profile * @return a Page from the resultset * @throws SolrServerException */ public final Page<T> search(final String query, final String spatialQuery, final Integer pageSize, final Integer pageNumber, final String[] facets, Map<String, String> facetPrefixes, final Map<String, String> selectedFacets, final String sort, final String fetch) throws SolrServerException { SolrQuery solrQuery = prepareQuery(query, sort, pageSize, pageNumber, selectedFacets); solrQuery.set("spellcheck", "true"); solrQuery.set("spellcheck.collate", "true"); solrQuery.set("spellcheck.count", "1"); solrQuery.set("bq", "base.class_s:org.emonocot.model.Taxon^2.0"); // Filter the searchable objects out solrQuery.addFilterQuery("base.class_searchable_b:" + isSearchableObject()); if (spatialQuery != null && spatialQuery.trim().length() != 0) { solrQuery.addFilterQuery(spatialQuery); } if (facets != null && facets.length != 0) { solrQuery.setFacet(true); solrQuery.setFacetMinCount(1); solrQuery.setFacetSort(FacetParams.FACET_SORT_INDEX); for(String facet : facets) { if(facet.equals("base.class_s")) { solrQuery.setParam("f.base.class_s.facet.sort", FacetParams.FACET_SORT_COUNT); } if(facet.endsWith("_dt")) { /** * Is a date facet. Once Solr 4.2 is released, we can implement variable length buckets, but for now * stick with fixed buckets https://issues.apache.org/jira/browse/SOLR-2366 */ solrQuery.add("facet.range",facet); solrQuery.add("f." + facet + ".facet.range.start","NOW/DAY-1YEARS"); solrQuery.add("f." + facet + ".facet.range.end","NOW/DAY"); solrQuery.add("f." + facet + ".facet.range.gap","+1MONTH"); } else { solrQuery.addFacetField(facet); } includeMissing(solrQuery, facet); } if(facetPrefixes != null) { for(String facet : facetPrefixes.keySet()) { solrQuery.add("f." + facet + ".facet.prefix",facetPrefixes.get(facet)); } } } QueryResponse queryResponse = solrServer.query(solrQuery); List<T> results = new ArrayList<T>(); for(SolrDocument solrDocument : queryResponse.getResults()) { T object = loadObjectForDocument(solrDocument); enableProfilePostQuery(object, fetch); results.add(object); } Long totalResults = new Long(queryResponse.getResults().getNumFound()); Page<T> page = new DefaultPageImpl<T>(totalResults.intValue(), pageNumber, pageSize, results, queryResponse); if(selectedFacets != null) { page.setSelectedFacets(selectedFacets); } page.setSort(sort); return page; } private void includeMissing(SolrQuery solrQuery, String facet) { try { FacetName fn = FacetName.fromString(facet); if(fn != null && fn.isIncludeMissing()) { solrQuery.set("f." + fn.getSolrField() + ".facet.missing", true); } } catch (IllegalArgumentException e) { logger.debug("Unable to find a facet for " + facet); } } public List<Match> autocomplete(String query, Integer pageSize, Map<String, String> selectedFacets) throws SolrServerException { SolrQuery solrQuery = new SolrQuery(); if (query != null && !query.trim().equals("")) { //String searchString = query.trim().replace(" ", "+"); solrQuery.setQuery(query); } else { return new ArrayList<Match>(); } // Filter the searchable objects out solrQuery.addFilterQuery("base.class_searchable_b:" + isSearchableObject()); // Set additional result parameters solrQuery.setRows(pageSize); if(selectedFacets != null && !selectedFacets.isEmpty()) { for(String facetName : selectedFacets.keySet()) { solrQuery.addFilterQuery(facetName + ":" + selectedFacets.get(facetName)); } } solrQuery.set("defType","edismax"); solrQuery.set("qf", "autocomplete^3 autocompleteng"); solrQuery.set("pf", "autocompletenge"); solrQuery.set("fl","autocomplete,id"); solrQuery.setHighlight(true); solrQuery.set("hl.fl", "autocomplete"); solrQuery.set("hl.snippets",3); solrQuery.setHighlightSimplePre("<b>"); solrQuery.setHighlightSimplePost("</b>"); solrQuery.set("group","true"); solrQuery.set("group.field", "autocomplete"); QueryResponse queryResponse = solrServer.query(solrQuery); List<Match> results = new ArrayList<Match>(); Map<String,Match> matchMap = new HashMap<String,Match>(); for(GroupCommand groupCommand : queryResponse.getGroupResponse().getValues()) { for (Group group : groupCommand.getValues()) { for (SolrDocument solrDocument : group.getResult()) { Match match = new Match(); String label = filter((String) solrDocument.get("autocomplete")); match.setLabel(label); match.setValue(label); matchMap.put((String) solrDocument.get("id"), match); results.add(match); } } } for(String documentId : matchMap.keySet()) { if(queryResponse.getHighlighting().containsKey(documentId)) { Map<String, List<String>> highlightedTerms = queryResponse.getHighlighting().get(documentId); if(highlightedTerms.containsKey("autocomplete")) { matchMap.get(documentId).setLabel(highlightedTerms.get("autocomplete").get(0)); } } } return results; } private String filter(String value) { StringBuilder out = new StringBuilder(); StringReader strReader = new StringReader(value); try { HTMLStripCharFilter html = new HTMLStripCharFilter(new BufferedReader(strReader)); char[] cbuf = new char[1024 * 10]; while (true) { int count = html.read(cbuf); if (count == -1) break; // end of stream mark is -1 if (count > 0) out.append(cbuf, 0, count); } html.close(); } catch (IOException e) { throw new RuntimeException("Failed stripping HTML for value: " + value, e); } return out.toString(); } @Override public Page<SolrDocument> searchForDocuments(String query, Integer pageSize, Integer pageNumber, Map<String, String> selectedFacets, String sort) throws SolrServerException { SolrQuery solrQuery = prepareQuery(query, sort, pageSize, pageNumber, selectedFacets); QueryResponse queryResponse = solrServer.query(solrQuery); Long totalResults = new Long(queryResponse.getResults().getNumFound()); Page<SolrDocument> page = new DefaultPageImpl<SolrDocument>(totalResults.intValue(), pageNumber, pageSize, queryResponse.getResults(), queryResponse); if(selectedFacets != null) { page.setSelectedFacets(selectedFacets); } page.setSort(sort); return page; } @Override public T loadObjectForDocument(SolrDocument solrDocument) { try { Class clazz = Class.forName((String)solrDocument.getFieldValue("base.class_s")); Long id = (Long) solrDocument.getFieldValue("base.id_l"); T t = (T) getSession().load(clazz, id); t.getIdentifier(); return t; } catch (ClassNotFoundException cnfe) { throw new RuntimeException("Could not instantiate search result", cnfe); } catch (ObjectNotFoundException onfe) { return null; } } public CellSet analyse(String rows, String cols, Integer firstCol, Integer maxCols, Integer firstRow, Integer maxRows, Map<String, String> selectedFacets, String[] facets, Cube cube) throws SolrServerException { SolrQuery query = new SolrQuery(); query.setQuery("*:*"); SolrQuery totalQuery = new SolrQuery(); totalQuery.setQuery("*:*"); // We're not interested in the results themselves query.setRows(1); query.setStart(0); totalQuery.setRows(1); totalQuery.setStart(0); if(rows == null) { query.setFacet(true); query.setFacetMinCount(1); query.setFacetSort(FacetParams.FACET_SORT_INDEX); query.addFacetField(cube.getDefaultLevel()); includeMissing(query,cube.getDefaultLevel()); includeMissing(totalQuery,cube.getDefaultLevel()); if (maxRows != null) { totalQuery.setFacet(true); totalQuery.setFacetMinCount(1); totalQuery.addFacetField("{!key=totalRows}" + cube.getDefaultLevel()); query.add("f." + cube.getDefaultLevel() + ".facet.limit", maxRows.toString()); query.add("f." + cube.getDefaultLevel() + ".facet.mincount", "1"); if (firstRow != null) { query.add("f." + cube.getDefaultLevel() + ".facet.offset", firstRow.toString()); } } } else if(cols == null) { query.setFacet(true); query.setFacetMinCount(1); query.setFacetSort(FacetParams.FACET_SORT_INDEX); query.addFacetField(rows); includeMissing(query,rows); includeMissing(totalQuery,rows); if (maxRows != null) { totalQuery.setFacet(true); totalQuery.setFacetMinCount(1); totalQuery.addFacetField("{!key=totalRows}"+rows); query.add("f." + rows + ".facet.limit", maxRows.toString()); query.add("f." + rows + ".facet.mincount", "1"); if (firstRow != null) { query.add("f." + rows + ".facet.offset", firstRow.toString()); } } if(cube.getLevel(rows).isMultiValued() && cube.getLevel(rows).getHigher() != null) { Level higher = cube.getLevel(rows).getHigher(); totalQuery.add("f." + rows + ".facet.prefix",selectedFacets.get(higher.getFacet()) + "_"); query.add("f." + rows + ".facet.prefix",selectedFacets.get(higher.getFacet()) + "_"); } } else { query.setFacet(true); query.setFacetMinCount(1); query.setFacetSort(FacetParams.FACET_SORT_INDEX); query.addFacetField(rows); includeMissing(query,rows); includeMissing(totalQuery,rows); if (maxRows != null) { totalQuery.setFacet(true); totalQuery.setFacetMinCount(1); totalQuery.addFacetField("{!key=totalRows}"+rows); query.add("f." + rows + ".facet.limit", maxRows.toString()); query.add("f." + rows + ".facet.mincount", "1"); if (firstRow != null) { query.add("f." + rows + ".facet.offset", firstRow.toString()); } } if(cube.getLevel(rows).isMultiValued() && cube.getLevel(rows).getHigher() != null) { Level higher = cube.getLevel(rows).getHigher(); totalQuery.add("f." + rows + ".facet.prefix",selectedFacets.get(higher.getFacet()) + "_"); query.add("f." + rows + ".facet.prefix",selectedFacets.get(higher.getFacet()) + "_"); } query.addFacetField(cols); includeMissing(query,cols); if (maxCols != null) { totalQuery.setFacet(true); totalQuery.setFacetMinCount(1); totalQuery.addFacetField("{!key=totalCols}"+cols); /** * Facet pivot does not behave the same way on columns - the limit is */ //query.add("f." + cols + ".facet.limit", maxCols.toString()); //query.add("f." + cols + ".facet.mincount", "1"); //if (firstCol != null) { // query.add("f." + cols + ".facet.offset", firstCol.toString()); //} } if(cube.getLevel(cols).isMultiValued() && cube.getLevel(cols).getHigher() != null) { Level higher = cube.getLevel(cols).getHigher(); totalQuery.add("f." + cols + ".facet.prefix",selectedFacets.get(higher.getFacet()) + "_"); query.add("f." + cols + ".facet.prefix",selectedFacets.get(higher.getFacet()) + "_"); } query.addFacetPivotField(rows + "," + cols); } if (selectedFacets != null && !selectedFacets.isEmpty()) { for (String facetName : selectedFacets.keySet()) { String facetValue = selectedFacets.get(facetName); if(StringUtils.isNotEmpty(facetValue)) { totalQuery.addFilterQuery(facetName + ":" + selectedFacets.get(facetName)); query.addFilterQuery(facetName + ":" + selectedFacets.get(facetName)); } else {//Subtract/Exclude documents with any value for the facet totalQuery.addFilterQuery("-" + facetName + ":[* TO *]"); query.addFilterQuery("-" + facetName + ":[* TO *]"); } } } if (facets != null && facets.length != 0) { query.setFacet(true); query.setFacetMinCount(1); query.setFacetSort(FacetParams.FACET_SORT_INDEX); for (String facetName : facets) { if(rows != null && rows.equals(facetName)) { } else if(cols != null && cols.equals(facetName)) { } else if(rows == null && facetName.equals(cube.getDefaultLevel())) { } else { includeMissing(query,facetName); query.addFacetField(facetName); } } } QueryResponse response = solrServer.query(query); QueryResponse totalResponse = solrServer.query(totalQuery); FacetField totalRows = null; FacetField totalCols = null; if (totalResponse.getFacetField("totalRows") != null) { totalRows = totalResponse.getFacetField("totalRows"); } if (totalResponse.getFacetField("totalCols") != null) { totalCols = totalResponse.getFacetField("totalCols"); } CellSet cellSet = new CellSet(response, selectedFacets, query, rows, cols, firstRow, maxRows, firstCol, maxCols, totalRows, totalCols, cube); return cellSet; } /** * Prepares a {@link SolrQuery} with the parameters passed in * @param query * @param sort * @param pageSize * @param pageNumber * @param selectedFacets * @return A {@link SolrQuery} that can be customised before passing to a {@link SolrServer} */ protected SolrQuery prepareQuery(String query, String sort, Integer pageSize, Integer pageNumber, Map<String,String> selectedFacets){ SolrQuery solrQuery = new SolrQuery(); if (query != null && !query.trim().equals("")) { String searchString = null; if (query.indexOf(":") != -1) { searchString = query; } else { // replace spaces with '+' so that we search on terms searchString = query.trim().replace(" ", "+"); solrQuery.set("defType","edismax"); solrQuery.set("qf", "searchable.label_sort searchable.solrsummary_t"); } solrQuery.setQuery(searchString); } else { solrQuery.set("defType","edismax"); solrQuery.set("qf", "searchable.label_sort searchable.solrsummary_t"); solrQuery.setQuery("*:*"); } if (sort != null && sort.length() != 0) { for(String singleSort : sort.split(",")) { if(singleSort.equals("_asc")) { //Do nothing } else if(singleSort.endsWith("_asc")) { String sortField = singleSort.substring(0,singleSort.length() - 4); solrQuery.addSortField(sortField, SolrQuery.ORDER.asc); } else if(singleSort.endsWith("_desc")) { String sortField = singleSort.substring(0,singleSort.length() - 5); solrQuery.addSortField(sortField, SolrQuery.ORDER.desc); } } } if (pageSize != null) { solrQuery.setRows(pageSize); if (pageNumber != null) { solrQuery.setStart(pageSize * pageNumber); } } if(selectedFacets != null && !selectedFacets.isEmpty()) { for(String facetName : selectedFacets.keySet()) { String facetValue = selectedFacets.get(facetName); if(StringUtils.isNotEmpty(facetValue)) { solrQuery.addFilterQuery(facetName + ":" + selectedFacets.get(facetName)); } else {//Subtract/Exclude documents with any value for the facet solrQuery.addFilterQuery("-" + facetName + ":[* TO *]"); } } } return solrQuery; } }