/*
* Constellation - An open source and standard compliant SDI
* http://www.constellation-sdi.org
*
* Copyright 2014 Geomatys.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.constellation.sos.io.lucene;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.queryparser.classic.QueryParser.Operator;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.Version;
import org.geotoolkit.lucene.IndexingException;
import org.geotoolkit.lucene.SearchingException;
import org.geotoolkit.lucene.filter.SerialChainFilter;
import org.geotoolkit.lucene.filter.SpatialQuery;
import org.geotoolkit.lucene.index.LuceneIndexSearcher;
import org.geotoolkit.observation.ObservationResult;
import java.io.File;
import java.io.IOException;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import static org.constellation.sos.ws.SOSUtils.unLuceneTimeValue;
// Apache Lucene dependencies
// constellation dependencies
// Geotoolkit dependencies
/**
* A Lucene searcher for an index connected to an O&M DataSource.
*
* @author Guilhem Legal (Geomatys)
*/
public class LuceneObservationSearcher extends LuceneIndexSearcher {
/**
* Build a new index searcher with the index located in the specified directory.
* The index directory path must be :
* <configDir path>/<serviceID>index-<some timestamp number>
*
* @param configDir A directory containing the lucene index directory.
* @param serviceID The identifier of the index/service
* @throws IndexingException
*/
public LuceneObservationSearcher(final File configDir, final String serviceID) throws IndexingException {
super(configDir, serviceID, new WhitespaceAnalyzer(Version.LUCENE_46), false);
}
/**
* This method proceed a lucene search and returns a list of ObservationResult.
*
* @param spatialQuery The lucene query string with spatial filters.
*
* @return A List of Observation result..
*/
public List<ObservationResult> doResultSearch(SpatialQuery spatialQuery) throws SearchingException {
final Query simpleQuery = new TermQuery(new Term("metafile", "doc"));
try {
final long start = System.currentTimeMillis();
final List<ObservationResult> results = new ArrayList<>();
int maxRecords = (int) searcher.collectionStatistics("id").maxDoc();
if (maxRecords == 0) {
LOGGER.severe("The index seems to be empty.");
maxRecords = 1;
}
final String field = "Title";
final QueryParser parser = new QueryParser(Version.LUCENE_46, field, analyzer);
parser.setDefaultOperator(Operator.AND);
// we enable the leading wildcard mode if the first character of the query is a '*'
if (spatialQuery.getQuery().indexOf(":*") != -1 || spatialQuery.getQuery().indexOf(":?") != -1 || spatialQuery.getQuery().indexOf(":(*") != -1
|| spatialQuery.getQuery().indexOf(":(+*") != -1 || spatialQuery.getQuery().indexOf(":+*") != -1) {
parser.setAllowLeadingWildcard(true);
BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
}
LOGGER.log(Level.FINER, "before parse:{0}", spatialQuery.getQuery());
final Query query = parser.parse(spatialQuery.getQuery());
final Filter filter = spatialQuery.getSpatialFilter();
final int operator = spatialQuery.getLogicalOperator();
final Sort sort = spatialQuery.getSort();
String sorted = "no Sorted";
if (sort != null) {
sorted = "order by: " + sort.toString();
}
String f = "no Filter";
if (filter != null) {
f = filter.toString();
}
LOGGER.log(logLevel, "Searching for result: " + query.toString(field) + '\n' + SerialChainFilter.valueOf(operator) + '\n' + f + '\n' + sorted + '\n' + "max records: " + maxRecords);
// simple query with an AND
if (operator == SerialChainFilter.AND || (operator == SerialChainFilter.OR && filter == null)) {
final TopDocs docs;
if (sort != null) {
docs = searcher.search(query, filter, maxRecords, sort);
} else {
docs = searcher.search(query, filter, maxRecords);
}
for (ScoreDoc doc : docs.scoreDocs) {
final ObservationResult or = getObservationResult(searcher.doc(doc.doc));
results.add(or);
}
// for a OR we need to perform many request
} else if (operator == SerialChainFilter.OR) {
final TopDocs hits1;
final TopDocs hits2;
if (sort != null) {
hits1 = searcher.search(query, null, maxRecords, sort);
hits2 = searcher.search(simpleQuery, spatialQuery.getSpatialFilter(), maxRecords, sort);
} else {
hits1 = searcher.search(query, maxRecords);
hits2 = searcher.search(simpleQuery, spatialQuery.getSpatialFilter(), maxRecords);
}
for (ScoreDoc doc : hits1.scoreDocs) {
final ObservationResult or = getObservationResult(searcher.doc(doc.doc));
results.add(or);
}
for (ScoreDoc doc : hits2.scoreDocs) {
final ObservationResult or = getObservationResult(searcher.doc(doc.doc));
if (!results.contains(or)) {
results.add(or);
}
}
// for a NOT we need to perform many request
} else if (operator == SerialChainFilter.NOT) {
final TopDocs hits1;
if (sort != null) {
hits1 = searcher.search(query, filter, maxRecords, sort);
} else {
hits1 = searcher.search(query, filter, maxRecords);
}
final List<ObservationResult> unWanteds = new ArrayList<>();
for (ScoreDoc doc : hits1.scoreDocs) {
final ObservationResult or = getObservationResult(searcher.doc(doc.doc));
unWanteds.add(or);
}
final TopDocs hits2;
if (sort != null) {
hits2 = searcher.search(simpleQuery, null, maxRecords, sort);
} else {
hits2 = searcher.search(simpleQuery, maxRecords);
}
for (ScoreDoc doc : hits2.scoreDocs) {
final ObservationResult or = getObservationResult(searcher.doc(doc.doc));
if (!unWanteds.contains(or)) {
results.add(or);
}
}
} else {
throw new IllegalArgumentException("unsupported logical Operator");
}
// if we have some subQueries we execute it separely and merge the result
if (spatialQuery.getSubQueries().size() > 0) {
final SpatialQuery sub = spatialQuery.getSubQueries().get(0);
final List<ObservationResult> subResults = doResultSearch(sub);
for (ObservationResult r : results) {
if (!subResults.contains(r)) {
results.remove(r);
}
}
}
LOGGER.log(logLevel, results.size() + " total matching documents (" + (System.currentTimeMillis() - start) + "ms)");
return results;
} catch (ParseException ex) {
throw new SearchingException("Parse Exception while performing lucene request", ex);
} catch (IOException ex) {
throw new SearchingException("IO Exception while performing lucene request", ex);
}
}
/**
* Return an observationResult from a Lucene document.
*
* @param d A lucene document.
*
* @return an observationResult containing the id and the time period of the observation.
*/
private ObservationResult getObservationResult(final Document d) {
Timestamp begin = null;
Timestamp end = null;
try {
final String timeBegin = d.get("sampling_time_begin");
if (timeBegin != null) {
begin = Timestamp.valueOf(unLuceneTimeValue(timeBegin));
}
} catch (IllegalArgumentException ex) {
LOGGER.log(logLevel, "unable to parse the timestamp");
}
try {
final String timeEnd = d.get("sampling_time_end");
if (timeEnd != null) {
end = Timestamp.valueOf(unLuceneTimeValue(timeEnd));
}
} catch (IllegalArgumentException ex) {
LOGGER.log(logLevel, "unable to parse the timestamp");
}
return new ObservationResult(d.get("id"), begin, end);
}
}