/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nutch.searcher;
import java.io.File;
import java.io.IOException;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.nutch.indexer.FsDirectory;
import org.apache.nutch.indexer.NutchSimilarity;
/** Implements {@link Searcher} and {@link HitDetailer} for either a single
* merged index, or a set of indexes. */
public class IndexSearcher implements Searcher, HitDetailer {
private org.apache.lucene.search.Searcher luceneSearcher;
private org.apache.lucene.index.IndexReader reader;
private LuceneQueryOptimizer optimizer;
private FileSystem fs;
private Configuration conf;
private QueryFilters queryFilters;
/** Construct given a number of indexes. */
public IndexSearcher(Path[] indexDirs, Configuration conf) throws IOException {
IndexReader[] readers = new IndexReader[indexDirs.length];
this.conf = conf;
this.fs = FileSystem.get(conf);
for (int i = 0; i < indexDirs.length; i++) {
readers[i] = IndexReader.open(getDirectory(indexDirs[i]));
}
init(new MultiReader(readers), conf);
}
/** Construct given a single merged index. */
public IndexSearcher(Path index, Configuration conf)
throws IOException {
this.conf = conf;
this.fs = FileSystem.get(conf);
init(IndexReader.open(getDirectory(index)), conf);
}
private void init(IndexReader reader, Configuration conf) throws IOException {
this.reader = reader;
this.luceneSearcher = new org.apache.lucene.search.IndexSearcher(reader);
this.luceneSearcher.setSimilarity(new NutchSimilarity());
this.optimizer = new LuceneQueryOptimizer(conf);
this.queryFilters = new QueryFilters(conf);
}
private Directory getDirectory(Path file) throws IOException {
if ("file".equals(this.fs.getUri().getScheme())) {
Path qualified = file.makeQualified(FileSystem.getLocal(conf));
File fsLocal = new File(qualified.toUri());
return FSDirectory.getDirectory(fsLocal.getAbsolutePath());
} else {
return new FsDirectory(this.fs, file, false, this.conf);
}
}
public Hits search(Query query, int numHits,
String dedupField, String sortField, boolean reverse)
throws IOException {
org.apache.lucene.search.BooleanQuery luceneQuery =
this.queryFilters.filter(query);
return translateHits
(optimizer.optimize(luceneQuery, luceneSearcher, numHits,
sortField, reverse),
dedupField, sortField);
}
public String getExplanation(Query query, Hit hit) throws IOException {
return luceneSearcher.explain(this.queryFilters.filter(query),
Integer.valueOf(hit.getUniqueKey())).toHtml();
}
public HitDetails getDetails(Hit hit) throws IOException {
Document doc = luceneSearcher.doc(Integer.valueOf(hit.getUniqueKey()));
List docFields = doc.getFields();
String[] fields = new String[docFields.size()];
String[] values = new String[docFields.size()];
for (int i = 0; i < docFields.size(); i++) {
Field field = (Field)docFields.get(i);
fields[i] = field.name();
values[i] = field.stringValue();
}
return new HitDetails(fields, values);
}
public HitDetails[] getDetails(Hit[] hits) throws IOException {
HitDetails[] results = new HitDetails[hits.length];
for (int i = 0; i < hits.length; i++)
results[i] = getDetails(hits[i]);
return results;
}
private Hits translateHits(TopDocs topDocs,
String dedupField, String sortField)
throws IOException {
String[] dedupValues = null;
if (dedupField != null)
dedupValues = FieldCache.DEFAULT.getStrings(reader, dedupField);
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
int length = scoreDocs.length;
Hit[] hits = new Hit[length];
for (int i = 0; i < length; i++) {
int doc = scoreDocs[i].doc;
WritableComparable sortValue; // convert value to writable
if (sortField == null) {
sortValue = new FloatWritable(scoreDocs[i].score);
} else {
Object raw = ((FieldDoc)scoreDocs[i]).fields[0];
if (raw instanceof Integer) {
sortValue = new IntWritable(((Integer)raw).intValue());
} else if (raw instanceof Float) {
sortValue = new FloatWritable(((Float)raw).floatValue());
} else if (raw instanceof String) {
sortValue = new Text((String)raw);
} else {
throw new RuntimeException("Unknown sort value type!");
}
}
String dedupValue = dedupValues == null ? null : dedupValues[doc];
hits[i] = new Hit(Integer.toString(doc), sortValue, dedupValue);
}
return new Hits(topDocs.totalHits, hits);
}
public void close() throws IOException {
if (luceneSearcher != null) { luceneSearcher.close(); }
if (reader != null) { reader.close(); }
}
}