package lia.advsearching; /** * Copyright Manning Publications Co. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific lan */ import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermFreqVector; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import lia.common.TestUtil; import java.io.IOException; import java.io.File; // From chapter 5 public class BooksLikeThis { public static void main(String[] args) throws IOException { Directory dir = TestUtil.getBookIndexDirectory(); IndexReader reader = IndexReader.open(dir); int numDocs = reader.maxDoc(); BooksLikeThis blt = new BooksLikeThis(reader); for (int i = 0; i < numDocs; i++) { // #1 System.out.println(); Document doc = reader.document(i); System.out.println(doc.get("title")); Document[] docs = blt.docsLike(i, 10); // #2 if (docs.length == 0) { System.out.println(" None like this"); } for (Document likeThisDoc : docs) { System.out.println(" -> " + likeThisDoc.get("title")); } } reader.close(); dir.close(); } private IndexReader reader; private IndexSearcher searcher; public BooksLikeThis(IndexReader reader) { this.reader = reader; searcher = new IndexSearcher(reader); } public Document[] docsLike(int id, int max) throws IOException { Document doc = reader.document(id); String[] authors = doc.getValues("author"); BooleanQuery authorQuery = new BooleanQuery(); // #3 for (String author : authors) { // #3 authorQuery.add(new TermQuery(new Term("author", author)), // #3 BooleanClause.Occur.SHOULD); // #3 } authorQuery.setBoost(2.0f); TermFreqVector vector = // #4 reader.getTermFreqVector(id, "subject"); // #4 BooleanQuery subjectQuery = new BooleanQuery(); // #4 for (String vecTerm : vector.getTerms()) { // #4 TermQuery tq = new TermQuery( // #4 new Term("subject", vecTerm)); // #4 subjectQuery.add(tq, BooleanClause.Occur.SHOULD); // #4 } BooleanQuery likeThisQuery = new BooleanQuery(); // #5 likeThisQuery.add(authorQuery, BooleanClause.Occur.SHOULD); // #5 likeThisQuery.add(subjectQuery, BooleanClause.Occur.SHOULD); // #5 likeThisQuery.add(new TermQuery( // #6 new Term("isbn", doc.get("isbn"))), BooleanClause.Occur.MUST_NOT); // #6 // System.out.println(" Query: " + // likeThisQuery.toString("contents")); TopDocs hits = searcher.search(likeThisQuery, 10); int size = max; if (max > hits.scoreDocs.length) size = hits.scoreDocs.length; Document[] docs = new Document[size]; for (int i = 0; i < size; i++) { docs[i] = reader.document(hits.scoreDocs[i].doc); } return docs; } } /* #1 Iterate over every book #2 Look up books like this #3 Boosts books by same author #4 Use terms from "subject" term vectors #5 Create final query #6 Exclude current book */