/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nutch.searcher; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.nutch.plugin.*; import org.apache.nutch.searcher.Query.Clause; import org.apache.nutch.util.ObjectCache; import org.apache.hadoop.conf.Configuration; import java.util.*; import org.apache.lucene.search.BooleanQuery; /** Creates and caches {@link QueryFilter} implementing plugins. QueryFilter * implementations should define either the "fields" or "raw-fields" attributes * for any fields that they process, otherwise these will be ignored by the * query parser. Raw fields are parsed as a single Query.Term, including * internal punctuation, while non-raw fields are parsed containing punctuation * are parsed as multi-token Query.Phrase's. */ public class QueryFilters { private static final Log LOG = LogFactory.getLog(QueryFilters.class); private QueryFilter[] queryFilters; private HashSet<String> FIELD_NAMES ; private HashSet<String> RAW_FIELD_NAMES; private static List<String> parseFieldNames(Extension extension, String attribute) { String fields = extension.getAttribute(attribute); if (fields == null) fields = ""; return Arrays.asList(fields.split("[,\\s]")); } public QueryFilters(Configuration conf) { ObjectCache objectCache = ObjectCache.get(conf); this.queryFilters = (QueryFilter[]) objectCache.getObject(QueryFilter.class .getName()); if (this.queryFilters == null) { try { ExtensionPoint point = PluginRepository.get(conf) .getExtensionPoint(QueryFilter.X_POINT_ID); if (point == null) throw new RuntimeException(QueryFilter.X_POINT_ID + " not found."); Extension[] extensions = point.getExtensions(); FIELD_NAMES = new HashSet<String>(); RAW_FIELD_NAMES = new HashSet<String>(); QueryFilter[] filters = new QueryFilter[extensions.length]; for (int i = 0; i < extensions.length; i++) { Extension extension = extensions[i]; List<String> fieldNames = parseFieldNames(extension, "fields"); List<String> rawFieldNames = parseFieldNames(extension, "raw-fields"); if (fieldNames.size() == 0 && rawFieldNames.size() == 0) { if (LOG.isWarnEnabled()) { LOG.warn("QueryFilter: " + extension.getId() + " names no fields."); } continue; } filters[i] = (QueryFilter) extension.getExtensionInstance(); FIELD_NAMES.addAll(fieldNames); FIELD_NAMES.addAll(rawFieldNames); objectCache.setObject("FIELD_NAMES", FIELD_NAMES); RAW_FIELD_NAMES.addAll(rawFieldNames); objectCache.setObject("RAW_FIELD_NAMES", RAW_FIELD_NAMES); } objectCache.setObject(QueryFilter.class.getName(), filters); } catch (PluginRuntimeException e) { throw new RuntimeException(e); } this.queryFilters = (QueryFilter[]) objectCache.getObject(QueryFilter.class .getName()); } else { // cache already filled FIELD_NAMES = (HashSet<String>) objectCache.getObject("FIELD_NAMES"); RAW_FIELD_NAMES = (HashSet<String>) objectCache.getObject("RAW_FIELD_NAMES"); } } /** Run all defined filters. */ public BooleanQuery filter(Query input) throws QueryException { // first check that all field names are claimed by some plugin Clause[] clauses = input.getClauses(); for (int i = 0; i < clauses.length; i++) { Clause c = clauses[i]; if (!isField(c.getField())) throw new QueryException("Not a known field name:"+c.getField()); } // then run each plugin BooleanQuery output = new BooleanQuery(); for (int i = 0; i < this.queryFilters.length; i++) { output = this.queryFilters[i].filter(input, output); } return output; } public boolean isField(String name) { return FIELD_NAMES.contains(name); } public boolean isRawField(String name) { return RAW_FIELD_NAMES.contains(name); } }