/*
* Copyright 2015-Present Entando Inc. (http://www.entando.com) All rights reserved.
*
* This library is free software; you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 2.1 of the License, or (at your option)
* any later version.
*
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*/
package com.agiletec.plugins.jacms.aps.system.services.searchengine;
import com.agiletec.aps.system.common.tree.ITreeNode;
import org.entando.entando.aps.system.services.searchengine.FacetedContentsResult;
import com.agiletec.aps.system.exception.ApsSystemException;
import com.agiletec.aps.system.services.group.Group;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexNotFoundException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.BytesRef;
import org.entando.entando.aps.system.services.searchengine.SearchEngineFilter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Data Access Object dedita alle operazioni di ricerca
* ad uso del motore di ricerca interno.
* @author E.Santoboni
*/
public class SearcherDAO implements ISearcherDAO {
private static final Logger _logger = LoggerFactory.getLogger(SearcherDAO.class);
/**
* Inizializzazione del searcher.
* @param dir La cartella locale contenitore dei dati persistenti.
* @throws ApsSystemException In caso di errore
*/
@Override
public void init(File dir) throws ApsSystemException {
this._indexDir = dir;
}
private IndexSearcher getSearcher() throws IOException {
FSDirectory directory = new SimpleFSDirectory(_indexDir);
IndexReader reader = DirectoryReader.open(directory);
IndexSearcher searcher = new IndexSearcher(reader);
return searcher;
}
private void releaseResources(IndexSearcher searcher) throws ApsSystemException {
try {
if (searcher != null) {
searcher.getIndexReader().close();
}
} catch (IOException e) {
throw new ApsSystemException("Error closing searcher", e);
}
}
@Override
public FacetedContentsResult searchFacetedContents(SearchEngineFilter[] filters,
Collection<ITreeNode> categories, Collection<String> allowedGroups) throws ApsSystemException {
return searchContents(filters, categories, allowedGroups, true);
}
/**
* Ricerca una lista di identificativi di contenuto in base
* ai filtri immessi.
* @param filters i filtri da applicare alla ricerca.
* @param categories Le categorie da applicare alla ricerca.
* @param allowedGroups I gruppi autorizzati alla visualizzazione. Nel caso
* che la collezione sia nulla o vuota, la ricerca sarĂ effettuata su contenuti
* referenziati con il gruppo "Ad accesso libero". Nel caso che nella collezione
* sia presente il gruppo degli "Amministratori", la ricerca produrrĂ un'insieme
* di identificativi di contenuto non filtrati per gruppo.
* @return La lista di identificativi contenuto.
* @throws ApsSystemException
*/
@Override
public List<String> searchContentsId(SearchEngineFilter[] filters,
Collection<ITreeNode> categories, Collection<String> allowedGroups) throws ApsSystemException {
return this.searchContents(filters, categories, allowedGroups, false).getContentsId();
}
protected FacetedContentsResult searchContents(SearchEngineFilter[] filters,
Collection<ITreeNode> categories, Collection<String> allowedGroups, boolean faceted) throws ApsSystemException {
FacetedContentsResult result = new FacetedContentsResult();
List<String> contentsId = new ArrayList<String>();
IndexSearcher searcher = null;
try {
searcher = this.getSearcher();
Query query = null;
if ((null == filters || filters.length == 0)
&& (null == categories || categories.isEmpty())
&& (allowedGroups != null && allowedGroups.contains(Group.ADMINS_GROUP_NAME))) {
query = new MatchAllDocsQuery();
} else {
query = this.createQuery(filters, categories, allowedGroups);
}
TopDocs topDocs = searcher.search(query, null, 1000);
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
Map<String, Integer> occurrences = new HashMap<String, Integer>();
if (scoreDocs.length > 0) {
for (int index=0; index<scoreDocs.length; index++) {
Document doc = searcher.doc(scoreDocs[index].doc);
contentsId.add(doc.get(IIndexerDAO.CONTENT_ID_FIELD_NAME));
if (faceted) {
Set<String> codes = new HashSet<String>();
String[] categoryPaths = doc.getValues(IIndexerDAO.CONTENT_CATEGORY_FIELD_NAME);
for (int i = 0; i < categoryPaths.length; i++) {
String categoryPath = categoryPaths[i];
String[] paths = categoryPath.split(IIndexerDAO.CONTENT_CATEGORY_SEPARATOR);
codes.addAll(Arrays.asList(paths));
}
Iterator<String> iter = codes.iterator();
while (iter.hasNext()) {
String code = iter.next();
Integer value = occurrences.get(code);
if (null == value) {
value = 0;
}
occurrences.put(code, (value+1));
}
}
}
}
result.setOccurrences(occurrences);
result.setContentsId(contentsId);
} catch (IndexNotFoundException inf) {
_logger.error("no index was found in the Directory", inf);
} catch (Throwable t) {
_logger.error("Error extracting documents", t);
throw new ApsSystemException("Error extracting documents", t);
} finally {
this.releaseResources(searcher);
}
return result;
}
protected Query createQuery(SearchEngineFilter[] filters,
Collection<ITreeNode> categories, Collection<String> allowedGroups) {
BooleanQuery mainQuery = new BooleanQuery();
if (filters != null && filters.length > 0) {
for (int i = 0; i < filters.length; i++) {
SearchEngineFilter filter = filters[i];
Query fieldQuery = this.createQuery(filter);
mainQuery.add(fieldQuery, BooleanClause.Occur.MUST);
}
}
if (allowedGroups == null) {
allowedGroups = new HashSet<String>();
}
if (!allowedGroups.contains(Group.ADMINS_GROUP_NAME)) {
if (!allowedGroups.contains(Group.FREE_GROUP_NAME)) {
allowedGroups.add(Group.FREE_GROUP_NAME);
}
BooleanQuery groupsQuery = new BooleanQuery();
Iterator<String> iterGroups = allowedGroups.iterator();
while (iterGroups.hasNext()) {
String group = iterGroups.next();
TermQuery groupQuery = new TermQuery(new Term(IIndexerDAO.CONTENT_GROUP_FIELD_NAME, group));
groupsQuery.add(groupQuery, BooleanClause.Occur.SHOULD);
}
mainQuery.add(groupsQuery, BooleanClause.Occur.MUST);
}
if (null != categories && !categories.isEmpty()) {
BooleanQuery categoriesQuery = new BooleanQuery();
Iterator<ITreeNode> cateIter = categories.iterator();
while (cateIter.hasNext()) {
ITreeNode category = cateIter.next();
String path = category.getPath(IIndexerDAO.CONTENT_CATEGORY_SEPARATOR, false);
TermQuery categoryQuery = new TermQuery(new Term(IIndexerDAO.CONTENT_CATEGORY_FIELD_NAME, path));
categoriesQuery.add(categoryQuery, BooleanClause.Occur.MUST);
}
mainQuery.add(categoriesQuery, BooleanClause.Occur.MUST);
}
return mainQuery;
}
private Query createQuery(SearchEngineFilter filter) {
BooleanQuery fieldQuery = new BooleanQuery();
String key = filter.getKey();
String attachmentKey = key + IIndexerDAO.ATTACHMENT_FIELD_SUFFIX;
Object value = filter.getValue();
if (null != value) {
if (value instanceof String) {
SearchEngineFilter.TextSearchOption option = filter.getTextSearchOption();
if (null == option) {
option = SearchEngineFilter.TextSearchOption.AT_LEAST_ONE_WORD;
}
String stringValue = value.toString();
String[] values = stringValue.split("\\s+");
if (!option.equals(SearchEngineFilter.TextSearchOption.EXACT)) {
BooleanClause.Occur bc = BooleanClause.Occur.SHOULD;
if (option.equals(SearchEngineFilter.TextSearchOption.ALL_WORDS)) {
bc = BooleanClause.Occur.MUST;
} else if (option.equals(SearchEngineFilter.TextSearchOption.ANY_WORD)) {
bc = BooleanClause.Occur.MUST_NOT;
}
for (int i = 0; i < values.length; i++) {
TermQuery term = new TermQuery(new Term(key, values[i].toLowerCase()));
//NOTE: search lower case....
if (filter.isIncludeAttachments()) {
BooleanQuery compositeQuery = new BooleanQuery();
compositeQuery.add(term, BooleanClause.Occur.SHOULD);
TermQuery termAttachment = new TermQuery(new Term(attachmentKey, values[i].toLowerCase()));
compositeQuery.add(termAttachment, BooleanClause.Occur.SHOULD);
fieldQuery.add(compositeQuery, bc);
} else {
fieldQuery.add(term, bc);
}
}
} else {
PhraseQuery phraseQuery = new PhraseQuery();
for (int i = 0; i < values.length; i++) {
//NOTE: search lower case....
phraseQuery.add(new Term(key, values[i].toLowerCase()));
}
if (filter.isIncludeAttachments()) {
fieldQuery.add(phraseQuery, BooleanClause.Occur.SHOULD);
PhraseQuery phraseQuery2 = new PhraseQuery();
for (int i = 0; i < values.length; i++) {
//NOTE: search lower case....
phraseQuery2.add(new Term(attachmentKey, values[i].toLowerCase()));
}
fieldQuery.add(phraseQuery2, BooleanClause.Occur.SHOULD);
} else {
return phraseQuery;
}
}
} else if (value instanceof Date) {
String toString = DateTools.timeToString(((Date) value).getTime(), DateTools.Resolution.MINUTE);
TermQuery term = new TermQuery(new Term(filter.getKey(), toString));
fieldQuery.add(term, BooleanClause.Occur.MUST);
} else if (value instanceof Number) {
TermQuery term = new TermQuery(new Term(filter.getKey(), value.toString()));
fieldQuery.add(term, BooleanClause.Occur.MUST);
}
} else {
if (filter.getStart() instanceof Number || filter.getEnd() instanceof Number) {
//.............................. TODO
} else {
String start = null;
String end = null;
if (filter.getStart() instanceof Date || filter.getEnd() instanceof Date) {
if (null != filter.getStart()) {
start = DateTools.timeToString(((Date) filter.getStart()).getTime(), DateTools.Resolution.MINUTE);
}
if (null != filter.getEnd()) {
end = DateTools.timeToString(((Date) filter.getEnd()).getTime(), DateTools.Resolution.MINUTE);
}
} else {
start = (null != filter.getStart()) ? filter.getStart().toString().toLowerCase() : null;
end = (null != filter.getEnd()) ? filter.getEnd().toString().toLowerCase() : null;
}
BytesRef byteStart = (null != start) ? new BytesRef(start.getBytes()) : null;
BytesRef byteEnd = (null != end) ? new BytesRef(end.getBytes()) : null;
TermRangeQuery range = new TermRangeQuery(filter.getKey(), byteStart, byteEnd, true, true);
fieldQuery.add(range, BooleanClause.Occur.MUST);
}
}
return fieldQuery;
}
@Override
public void close() {
// nothing to do
}
private File _indexDir;
}