/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nutch.searcher;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.nutch.plugin.Extension;
import org.apache.nutch.plugin.PluginDescriptor;
import org.apache.nutch.plugin.PluginRepository;
import org.apache.nutch.searcher.Query.Clause;
import org.apache.nutch.searcher.Query.Phrase;
import org.apache.nutch.searcher.Query.Term;
public class MetaTagsQueryFilter extends Configured implements QueryFilter {
private static final Log LOG = LogFactory.getLog(MetaTagsQueryFilter.class);
private Set<String> fields = new HashSet<String>();
@Override
public void setConf(Configuration conf) {
super.setConf(conf);
if (conf == null) {
return;
}
// retrieve the plugin info and extract the list of fields
PluginRepository pr = PluginRepository.get(conf);
PluginDescriptor pd = pr.getPluginDescriptor("parse-metatags");
Extension[] exts = pd.getExtensions();
for (Extension e : exts) {
String flds = e.getAttribute("fields");
if (flds != null) {
fields.addAll(Arrays.asList(flds.split("[\\s,]")));
}
flds = e.getAttribute("raw-fields");
if (flds != null) {
fields.addAll(Arrays.asList(flds.split("[\\s,]")));
}
}
LOG.info("Query meta fields: " + fields.toString());
}
@Override
public BooleanQuery filter(Query input, BooleanQuery translation)
throws QueryException {
for (Clause c : input.getClauses()) {
if (!fields.contains(c.getField())) {
continue;
}
org.apache.lucene.search.Query q;
if (c.isPhrase()) {
PhraseQuery pq = new PhraseQuery();
Phrase p = c.getPhrase();
for (Term t : p.getTerms()) {
pq.add(new org.apache.lucene.index.Term(c.getField(), t.toString()));
}
q = pq;
} else {
Term t = c.getTerm();
q = new TermQuery(new org.apache.lucene.index.Term(c.getField(), t.toString()));
}
translation.add(q, c.isRequired() ? Occur.MUST : c.isProhibited() ? Occur.MUST_NOT : Occur.SHOULD);
}
return translation;
}
}