/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nutch.searcher.more; import org.apache.nutch.searcher.Query; import org.apache.nutch.searcher.Query.Clause; import org.apache.nutch.searcher.QueryFilter; import org.apache.nutch.searcher.QueryException; import org.apache.hadoop.conf.Configuration; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.RangeQuery; import org.apache.lucene.index.Term; import java.util.regex.Pattern; import java.util.regex.Matcher; /** * Handles "date:" query clauses, causing them to search the field "date" * indexed by MoreIndexingFilter.java * * @author John Xing */ public class DateQueryFilter implements QueryFilter { public static final Log LOG = LogFactory.getLog(DateQueryFilter.class); private static final String FIELD_NAME = "date"; // query syntax is defined as date:yyyymmdd-yyyymmdd private static final Pattern pattern = Pattern.compile("^(\\d{8})-(\\d{8})$"); private Configuration conf; public BooleanQuery filter(Query input, BooleanQuery output) throws QueryException { // examine each clause in the Nutch query Clause[] clauses = input.getClauses(); for (int i = 0; i <clauses.length; i++) { Clause c = clauses[i]; //skip if not date clauses if (!c.getField().equals(FIELD_NAME)) continue; String x = c.getTerm().toString(); Matcher matcher = pattern.matcher(x); if (!matcher.matches()) { throw new QueryException("Wrong query syntax "+FIELD_NAME+":"+x); } // do it as lucene RangeQuery Term xLower = new Term(FIELD_NAME, matcher.group(1)); Term xUpper = new Term(FIELD_NAME, matcher.group(2)); // inclusive RangeQuery rangeQuery = new RangeQuery(xLower, xUpper, true); rangeQuery.setBoost(0.0f); // trigger filterization output.add(rangeQuery, (c.isProhibited() ? BooleanClause.Occur.MUST_NOT : (c.isRequired() ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD ) )); } return output; } public void setConf(Configuration conf) { this.conf = conf; } public Configuration getConf() { return this.conf; } }