/* * Copyright 2013 SciFY NPO <info@scify.org>. * * This product is part of the NewSum Free Software. * For more information about NewSum visit * * http://www.scify.gr/site/en/our-projects/completed-projects/newsum-menu-en * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * If this code or its output is used, extended, re-engineered, integrated, * or embedded to any extent in another software or hardware, there MUST be * an explicit attribution to this work in the resulting source code, * the packaging (where such packaging exists), or user interface * (where such an interface exists). * The attribution must be of the form "Powered by NewSum, SciFY" */ package org.scify.NewSumServer.Server.Searching; import gr.demokritos.iit.jinsect.utils; import java.io.File; import java.io.IOException; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.logging.Level; import java.util.logging.Logger; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.LimitTokenCountAnalyzer; import org.apache.lucene.analysis.el.GreekAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.scify.NewSumServer.Server.Utils.Main; /** * The Class used for searching in an indexed directory. * @author George K. <gkiom@scify.org> */ public class Searcher { private final static Logger LOGGER = Main.getLogger(); private Analyzer anal; /** * The map containing the (docId, filename) data */ private HashMap<Integer, String> docFiles = new HashMap<Integer, String>(); /** * Searches the index directory for the specified query. * @param fIndexDir The directory where the indexed files are stored * @param lLoc The locale that the indexed is created in * @param sQuery The search term * @param iMaxHits The max number of results to be returned. * @return A list of scoredocs which correspond to the search entry */ public List<ScoreDoc> searchIndex(File fIndexDir, Locale lLoc, String sQuery, int iMaxHits) { try { // Open the Directory of the Indexed Files, using // the FSDirectory class Directory FSDir = FSDirectory.open(fIndexDir); // Create the reader class on the Dir IndexReader reader = IndexReader.open(FSDir); IndexSearcher searcher = new IndexSearcher(reader); String dField = "text"; // Pass this from the Indexer Class? // Must Use the Same Analyzer as the index Class, otherwise // results will be awkward. So it get's analyzer from Indexer class // Create the query Parser on the Field that we want to parse if (lLoc.toString().equals("el")) { anal = new GreekAnalyzer(Version.LUCENE_36); } else if (lLoc.toString().equals("en")) { // The standard analyzer Analyzer stdAnal = new StandardAnalyzer(Version.LUCENE_36); anal = new LimitTokenCountAnalyzer(stdAnal, Integer.MAX_VALUE); } QueryParser parser = new QueryParser(Version.LUCENE_36, dField, anal); try { Query q = parser.parse(sQuery); // Search the Index with the Query TopDocs hits = searcher.search(q, iMaxHits); ScoreDoc[] scoreDocs = hits.scoreDocs; //debug start System.out.println("files found: " + scoreDocs.length); //debug end // Iterate over the scoredocs for (int n = 0; n < scoreDocs.length; n++) { ScoreDoc sd = scoreDocs[n]; float score = sd.score; int docId = sd.doc; Document d = searcher.doc(docId); String filename = d.get("file"); // System.out.println //debug // (filename+": "+"Score: "+score+" - "+ "DocID: "+ docId); //Save the <docID, filename> data to the map this.docFiles.put(docId, filename); } // Sort the Docs according to their scores List<ScoreDoc> returnList = sortScoreDocs(scoreDocs); Collections.reverse(returnList); return returnList; } catch (ParseException ex) { LOGGER.log(Level.SEVERE, "Could not parse query {0}", sQuery); } catch (NullPointerException ex) { LOGGER.log(Level.WARNING, ex.getMessage()); return null; } } catch (IOException ex) { LOGGER.log(Level.SEVERE, "Could not open Directory {0}", fIndexDir.getPath()); } return null; } /** * Sorts The ScoreDocs according to their score values. * @param scoreDocs The Documents returned by the searcher * @return A sorted List of the scoreDocs * @throws CorruptIndexException * @throws IOException */ private List<ScoreDoc> sortScoreDocs(ScoreDoc[] scoreDocs) throws CorruptIndexException, IOException { List<ScoreDoc> scoreDocsList = Arrays.asList(scoreDocs); Collections.sort(scoreDocsList, new Comparator<ScoreDoc>() { @Override public int compare(ScoreDoc o1, ScoreDoc o2) { return (o1.score >= o2.score) ? ((o1.score > o2.score) ? 1:0 ) : -1; } }); // debug // for (ScoreDoc i : scoreDocsList ) { // System.out.println(i.toString() + " :: "); // } // debug end return scoreDocsList; } /** * * @return The Map containing the (docId, filename) info */ public HashMap<Integer, String> getDocFiles() { return this.docFiles; } }