package org.apache.lucene.demo;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Date;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.FilterIndexReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.codecs.standard.StandardCodec;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LanguageModelQuery;
import org.apache.lucene.search.LanguageModelSimilarityProvider;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
/** Simple command-line based search demo. */
public class SearchFiles {
/** Use the norms from one field for all fields. Norms are read into memory,
* using a byte of memory per document per searched field. This can cause
* search of large collections with a large number of fields to run out of
* memory. If all of the fields contain only a single token, then the norms
* are all identical, then single norm vector may be shared. */
private static class OneNormsReader extends FilterIndexReader {
private String field;
public OneNormsReader(IndexReader in, String field) {
super(in);
this.field = field;
}
@Override
public byte[] norms(String field) throws IOException {
return in.norms(this.field);
}
}
private SearchFiles() {}
/** Simple command-line based search demo. */
public static void main(String[] args) throws Exception {
String index = "../LMIndex/";
String field = "text";
//String queries = "search.txt";
//int repeat = 0;
//boolean raw = false;
//String normsField = null;
//boolean paging = true;
int hitsPerPage = 10;
BufferedReader in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
CodecProvider cp = new CodecProvider();
SimilarityProvider sp = new LanguageModelSimilarityProvider();
//SimpleTextCodec codec = new SimpleTextCodec();
StandardCodec codec = new StandardCodec();
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_40, new SimpleAnalyzer());
cp.register(codec);
config.setCodecProvider(cp);
config.setSimilarityProvider(sp);
IndexReader reader = IndexReader.open(FSDirectory.open(new File(index))); // only searching, so read-only=true
IndexSearcher searcher = new IndexSearcher(reader);
searcher.setSimilarityProvider(sp);
LanguageModelQuery query = new LanguageModelQuery();
query.addTerm(new Term(field, "crime"));
query.addTerm(new Term(field, "organized"));
doPagingSearch(in, searcher, query, hitsPerPage, true, true);
//doStreamingSearch(searcher, query);
reader.close();
}
/**
* This method uses a custom HitCollector implementation which simply prints out
* the docId and score of every matching document.
*
* This simulates the streaming search use case, where all hits are supposed to
* be processed, regardless of their relevance.
*/
public static void doStreamingSearch(final IndexSearcher searcher, Query query) throws IOException {
Collector streamingHitCollector = new Collector() {
private Scorer scorer;
private int docBase;
// simply print docId and score of every matching document
@Override
public void collect(int doc) throws IOException {
System.out.println("doc=" + doc + docBase + " score=" + scorer.score());
}
@Override
public boolean acceptsDocsOutOfOrder() {
return true;
}
@Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
}
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
// TODO Auto-generated method stub
}
};
searcher.search(query, streamingHitCollector);
}
/**
* This demonstrates a typical paging search scenario, where the search engine presents
* pages of size n to the user. The user can then go to the next page if interested in
* the next hits.
*
* When the query is executed for the first time, then only enough results are collected
* to fill 5 result pages. If the user wants to page beyond this limit, then the query
* is executed another time and all hits are collected.
*
*/
public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query,
int hitsPerPage, boolean raw, boolean interactive) throws IOException {
// Collect enough docs to show 5 pages
TopScoreDocCollector collector = TopScoreDocCollector.create(
5 * hitsPerPage, false);
searcher.search(query, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
int numTotalHits = collector.getTotalHits();
System.out.println(numTotalHits + " total matching documents");
int start = 0;
int end = Math.min(numTotalHits, hitsPerPage);
while (true) {
if (end > hits.length) {
System.out.println("Only results 1 - " + hits.length +" of " + numTotalHits + " total matching documents collected.");
System.out.println("Collect more (y/n) ?");
String line = in.readLine();
if (line.length() == 0 || line.charAt(0) == 'n') {
break;
}
collector = TopScoreDocCollector.create(numTotalHits, false);
searcher.search(query, collector);
hits = collector.topDocs().scoreDocs;
}
end = Math.min(hits.length, start + hitsPerPage);
for (int i = start; i < end; i++) {
if (raw) { // output raw format
System.out.println("doc="+hits[i].doc+" score="+hits[i].score);
continue;
}
Document doc = searcher.doc(hits[i].doc);
String docno = doc.get("DOCNO");
if (docno != null) {
System.out.println((i+1) + ". " + docno);
} else {
System.out.println((i+1) + ". " + "No path for this document");
}
}
if (!interactive) {
break;
}
if (numTotalHits >= end) {
boolean quit = false;
while (true) {
System.out.print("Press ");
if (start - hitsPerPage >= 0) {
System.out.print("(p)revious page, ");
}
if (start + hitsPerPage < numTotalHits) {
System.out.print("(n)ext page, ");
}
System.out.println("(q)uit or enter number to jump to a page.");
String line = in.readLine();
if (line.length() == 0 || line.charAt(0)=='q') {
quit = true;
break;
}
if (line.charAt(0) == 'p') {
start = Math.max(0, start - hitsPerPage);
break;
} else if (line.charAt(0) == 'n') {
if (start + hitsPerPage < numTotalHits) {
start+=hitsPerPage;
}
break;
} else {
int page = Integer.parseInt(line);
if ((page - 1) * hitsPerPage < numTotalHits) {
start = (page - 1) * hitsPerPage;
break;
} else {
System.out.println("No such page");
}
}
}
if (quit) break;
end = Math.min(numTotalHits, start + hitsPerPage);
}
}
}
}