/* See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * Esri Inc. licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.esri.gpt.catalog.lucene; import com.esri.gpt.catalog.discovery.Discoverables; import com.esri.gpt.catalog.discovery.Discoverable; import com.esri.gpt.catalog.discovery.DiscoveredRecord; import com.esri.gpt.catalog.discovery.DiscoveredRecords; import com.esri.gpt.catalog.discovery.DiscoveryException; import com.esri.gpt.catalog.discovery.DiscoveryFilter; import com.esri.gpt.catalog.discovery.DiscoveryQuery; import com.esri.gpt.catalog.discovery.DiscoveryQueryAdapter; import com.esri.gpt.catalog.discovery.DiscoveryResult; import com.esri.gpt.catalog.discovery.LogicalClause; import com.esri.gpt.catalog.discovery.PropertyMeaningType; import com.esri.gpt.catalog.discovery.Sortable; import com.esri.gpt.catalog.discovery.Sortables; import com.esri.gpt.framework.context.RequestContext; import com.esri.gpt.framework.security.identity.AuthenticationStatus; import com.esri.gpt.framework.security.metadata.MetadataAcl; import com.esri.gpt.framework.util.Val; import com.esri.gpt.server.csw.provider.components.QueryOptions; import java.io.IOException; import java.net.URLEncoder; import java.util.ArrayList; import java.util.logging.Logger; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Filter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TopDocs; /** * Adapts a catalog discovery query to the Lucene model. */ public class LuceneQueryAdapter extends DiscoveryQueryAdapter { /** class variables ========================================================= */ /** The Logger. */ private static Logger LOGGER = Logger.getLogger(LuceneQueryAdapter.class.getName()); /** If the request filter is empty, we'll query all records if the * request max record number is <= this threshold. */ private static int QUERYALL_THRESHOLD = 10000; /** Log a warning if the number of records to process is >= this value. */ private static int TOOMANY_WARNING_THRESHOLD = 1000; /** instance variables ====================================================== */ private boolean hasScoredExpression = false; private LuceneIndexAdapter indexAdpter; private int maxDoc = -1; /** constructors ============================================================ */ /** Default constructor. */ public LuceneQueryAdapter() {} /** properties ============================================================== */ /** * Gets the flag indicating whether or not a scored expression exists within the query. * <p/>This flag supports relevance sorting. * @return true if a scored expression exists */ protected boolean getHasScoredExpression() { return hasScoredExpression; } /** * Sets the flag indicating whether or not a scored expression exists within the query. * <p/>This flag supports relevance sorting. * @param hasScoredExpression true if a scored expression exists */ protected void setHasScoredExpression(boolean hasScoredExpression) { this.hasScoredExpression = hasScoredExpression; } /** * Gets the index adapter. * @return the index adapter */ protected LuceneIndexAdapter getIndexAdapter() { return this.indexAdpter; } /** * Gets the maxDoc() count returned by the Lucene IndexSearcher. * @return the index adapter */ protected int getMaxDoc() { return this.maxDoc; } /** methods ================================================================= */ /** * Executes a query for metadata documents. * @param context the active request context * @param discoveryQuery the query to execute * @throws DiscoveryException if an exception occurs */ public void execute(RequestContext context, DiscoveryQuery discoveryQuery) throws DiscoveryException { LOGGER.finer("Executing DiscoveryQuery..."); this.indexAdpter = new LuceneIndexAdapter(context); try { executeQuery(discoveryQuery); } catch (ParseException e) { String sMsg = "Error querying documents:\n "+Val.chkStr(e.getMessage()); throw new DiscoveryException(sMsg,e); } catch (CorruptIndexException e) { String sMsg = "Error querying documents:\n "+Val.chkStr(e.getMessage()); throw new DiscoveryException(sMsg,e); } catch (IOException e) { String sMsg = "Error querying documents:\n "+Val.chkStr(e.getMessage()); throw new DiscoveryException(sMsg,e); } } /** * Executes a query against a Lucene index. * @param discoveryQuery the query to execute */ protected void executeQuery(DiscoveryQuery discoveryQuery) throws DiscoveryException, ParseException, CorruptIndexException, IOException { IndexSearcher searcher = null; try { // initialize searcher = getIndexAdapter().newSearcher(); this.maxDoc = searcher.maxDoc(); boolean bExecuteQuery = true; boolean bProcessHits = true; RequestContext reqContext = this.getIndexAdapter().getRequestContext(); BooleanQuery rootQuery = new BooleanQuery(); DiscoveryFilter discoveryFilter = discoveryQuery.getFilter(); DiscoveryResult discoveryResult = discoveryQuery.getResult(); Discoverables returnables = discoveryQuery.getReturnables(); if ((returnables == null) || (returnables.size() == 0) || (discoveryFilter.getMaxRecords() <= 0)) { bProcessHits = false; } // CSW query provider options boolean isDublinCoreResponse = true; boolean isBriefResponse = false; boolean isSummaryResponse = false; QueryOptions cswQueryOptions = (QueryOptions)reqContext.getObjectMap().get( "com.esri.gpt.server.csw.provider.components.QueryOptions"); // build the query (if no query was supplied, we'll query everything) LogicalClauseAdapter logicalAdapter = new LogicalClauseAdapter(this); LogicalClause rootClause = discoveryFilter.getRootClause(); if ((rootClause == null) || (rootClause.getClauses().size() == 0)) { if (discoveryFilter.getMaxRecords() <= QUERYALL_THRESHOLD) { LOGGER.finer("No filter was supplied, querying all..."); logicalAdapter.appendSelectAll(rootQuery); } else { LOGGER.finer("No filter was supplied, query will not be executed."); bExecuteQuery = false; } } else { logicalAdapter.adaptLogicalClause(rootQuery,rootClause); if ((rootQuery.clauses() == null) && (rootQuery.clauses().size() > 0)) { bExecuteQuery = false; } } if (!bExecuteQuery) return; // execute the query and process the hits if required // set the sort option Sort sortOption = null; if (bProcessHits && (searcher.maxDoc() > 0)) { sortOption = makeSortOption(discoveryQuery); } // filters Filter filter = null; // make the access control filter MetadataAcl acl = new MetadataAcl(reqContext); AuthenticationStatus auth = reqContext.getUser().getAuthenticationStatus(); boolean bAdmin = auth.getAuthenticatedRoles().hasRole("gptAdministrator"); if (!bAdmin && !acl.isPolicyUnrestricted()) { String[] aclValues = acl.makeUserAcl(); filter = new AclFilter(Storeables.FIELD_ACL,aclValues); } // isPartOf filter filter = IsPartOfFilter.make(reqContext,filter); // make the schema filter if (cswQueryOptions != null) { String schemaName = Val.chkStr(cswQueryOptions.getSchemaFilter()); if (schemaName.length() > 0) { filter = new SchemaFilter(schemaName,filter); isDublinCoreResponse = cswQueryOptions.isDublinCoreResponse(); if (!isDublinCoreResponse) { String elementSetType = Val.chkStr(cswQueryOptions.getElementSetType()); if (elementSetType.equalsIgnoreCase("brief")) { isBriefResponse = true; } else if (elementSetType.equalsIgnoreCase("summary")) { isSummaryResponse = true; } } } } // determine the start/end positions int startRecord = discoveryFilter.getStartRecord() - 1; int maxRecords = discoveryFilter.getMaxRecords(); if (startRecord < 0) startRecord = 0; int recordsPerPage = maxRecords; if (recordsPerPage <= 0) recordsPerPage = 1; int hitsToReturn = startRecord + recordsPerPage; int nextRecord = 0; int numDocs = 0; // execute the query LOGGER.finer("Executing Lucene Query:\n"+rootQuery); TopDocs topDocs = null; if (filter != null) { if (sortOption != null) { topDocs = searcher.search(rootQuery,filter,hitsToReturn,sortOption); } else { topDocs = searcher.search(rootQuery,filter,hitsToReturn); } } else { if (sortOption != null) { topDocs = searcher.search(rootQuery,filter,hitsToReturn,sortOption); } else { topDocs = searcher.search(rootQuery,hitsToReturn); } } // determine the hit count int totalHits = topDocs.totalHits; ScoreDoc[] scoreDocs = topDocs.scoreDocs; if ((scoreDocs != null) && (scoreDocs.length) > 0) { numDocs = scoreDocs.length; if (totalHits > numDocs) { nextRecord = numDocs + 1; } } discoveryResult.setNumberOfHits(totalHits); LOGGER.finer("Total query hits: "+totalHits); if (startRecord > (totalHits - 1)) bProcessHits = false; if (maxRecords <= 0) bProcessHits = false; int nTotal = numDocs - startRecord; if (!bProcessHits) return; // warn if many records were requested if (nTotal >= TOOMANY_WARNING_THRESHOLD) { LOGGER.warning("A request to process "+nTotal+ " discovery records was recieved and will be exceuted.\n"+discoveryQuery.toString()); } // process the hits, build the results LOGGER.finer("Processing "+nTotal+" records from: "+(startRecord+1)+" to: "+numDocs); Storeable storeable; DiscoveredRecords records = discoveryResult.getRecords(); IndexReader reader = searcher.getIndexReader(); for (int i=startRecord; i<numDocs; i++) { ScoreDoc scoreDoc = scoreDocs[i]; Document document = reader.document(scoreDoc.doc); DiscoveredRecord record = new DiscoveredRecord(); // Dublin Core based responses if (isDublinCoreResponse) { for (Discoverable target: returnables) { ArrayList<Object> values = new ArrayList<Object>(); storeable = (Storeable)target.getStorable(); if (storeable instanceof AnyTextProperty) { values = null; } else if (storeable instanceof GeometryProperty) { GeometryProperty geom = (GeometryProperty)storeable; values.add(geom.readEnvelope(document)); } else if (target.getMeaning().getMeaningType().equals(PropertyMeaningType.XMLURL)) { String uuid = document.get(Storeables.FIELD_UUID); uuid = URLEncoder.encode(uuid,"UTF-8"); values.add("?getxml="+uuid); } else { DatastoreField retrievalField = storeable.getRetrievalField(); Field[] fields = document.getFields(retrievalField.getName()); if (fields != null) { for (Field f: fields) { Object value = retrievalField.makeValueToReturn(f.stringValue()); values.add(value); } } } if (values != null) { Object [] oValues = null; if (values.size() >= 0) oValues = values.toArray(); record.addField(target,oValues); } } // non Dublin Core based responses } else { String responseXml = null; if (isBriefResponse && (responseXml == null)) { Field field = document.getField(Storeables.FIELD_XML_BRIEF); if (field != null) { responseXml = field.stringValue(); } } else if (isSummaryResponse && (responseXml == null)) { Field field = document.getField(Storeables.FIELD_XML_SUMMARY); if (field != null) { responseXml = field.stringValue(); } } else if (responseXml == null) { Field field = document.getField(Storeables.FIELD_XML); if (field != null) { responseXml = field.stringValue(); } } record.setResponseXml(responseXml); } onRecord(record, document); records.add(record); } int nPopulated = records.size(); LOGGER.finer("Populated "+nPopulated+" records."); } finally { getIndexAdapter().closeSearcher(searcher); } } /** * Called before a record is being added to the collection * @param record discovered record * @param document Lucene document */ protected void onRecord(DiscoveredRecord record, Document document) { // TODO: override to customize behavior } /** * Makes the sort option for the query. * @param discoveryQuery the active query */ private Sort makeSortOption(DiscoveryQuery discoveryQuery) { Sort sortOption = null; Sortables sortables = discoveryQuery.getSortables(); ArrayList<SortField> sortFields = new ArrayList<SortField>(); if ((sortables != null) && (sortables.size() > 0)) { Storeable storable; for (Sortable sortable: sortables) { storable = (Storeable)sortable.getStorable(); if (storable != null) { DatastoreField comparisonField = storable.getComparisonField(); if (comparisonField != null) { int sortFieldType = comparisonField.sortFieldType(); LOGGER.finer("Sorting on "+comparisonField.getName()+" "+sortable.getDirection()); if (sortable.getDirection().equals(Sortable.SortDirection.ASC)) { sortFields.add(new SortField(comparisonField.getName(),sortFieldType,false)); } else { sortFields.add(new SortField(comparisonField.getName(),sortFieldType,true)); } } } } } // if sort fields have not been supplied and the query does not contain // a scored expression then sort by descending date if (sortFields.size() == 0) { if (!this.getHasScoredExpression()) { String sModifiedDate = Storeables.FIELD_DATEMODIFIED; sortFields.add(new SortField(sModifiedDate,SortField.LONG,true)); LOGGER.finer("Auto-sorting on "+sModifiedDate+" DESC"); } else { LOGGER.finer("Sorting on relevance."); } } if (sortFields.size() > 0) { sortOption = new Sort(sortFields.toArray(new SortField[0])); } return sortOption; } }