package org.sakaiproject.search.util; import java.io.IOException; import java.io.StringReader; import java.util.Vector; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.spell.SpellChecker; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Version; public class DidYouMeanParser { private static Log log = LogFactory.getLog(DidYouMeanParser.class); private String defaultField; private Directory spellIndexDirectory; private IndexReader origionalIndex; public DidYouMeanParser(String defaultField, Directory spellIndexDirectory) { this.defaultField = defaultField; this.spellIndexDirectory = spellIndexDirectory; } public DidYouMeanParser(String defaultField, Directory spellIndexDirectory, IndexReader origionalIndex) { this.defaultField = defaultField; this.spellIndexDirectory = spellIndexDirectory; this.origionalIndex = origionalIndex; } public Query parse(String queryString) throws ParseException { QueryParser queryParser = new QueryParser(Version.LUCENE_29, defaultField, new StandardAnalyzer(Version.LUCENE_29)); queryParser.setDefaultOperator(QueryParser.Operator.AND); return queryParser.parse(queryString); } public Query suggest(String queryString) throws ParseException { QuerySuggester querySuggester = new QuerySuggester(defaultField, new StandardAnalyzer(Version.LUCENE_29)); querySuggester.setDefaultOperator(QueryParser.Operator.AND); Query query = querySuggester.parse(queryString); if (querySuggester.hasSuggestedQuery()) { log.info("got a suggestion: " + query.toString()); return query; } return null; } private class QuerySuggester extends QueryParser { private boolean suggestedQuery = false; public QuerySuggester(String field, Analyzer analyzer) { super(Version.LUCENE_29, field, analyzer); } protected Query getFieldQuery(String field, String queryText) throws ParseException { // Copied from org.apache.lucene.queryParser.QueryParser // replacing construction of TermQuery with call to getTermQuery() // which finds close matches. TokenStream source = getAnalyzer().tokenStream(field, new StringReader(queryText)); Vector<String> v = new Vector<String>(); Token t; while (true) { try { t = source.next(); } catch (IOException e) { t = null; } if (t == null) break; v.addElement(t.term()); } try { source.close(); } catch (IOException e) { // ignore } if (v.size() == 0) return null; else if (v.size() == 1) return new TermQuery(getTerm(field, (String) v.elementAt(0))); else { PhraseQuery q = new PhraseQuery(); q.setSlop(getPhraseSlop()); for (int i = 0; i < v.size(); i++) { q.add(getTerm(field, (String) v.elementAt(i))); } return q; } } private Term getTerm(String field, String queryText) throws ParseException { log.info("getting similar terms for: " + queryText); SpellChecker spellChecker = null; try { spellChecker = new SpellChecker(spellIndexDirectory); if (spellChecker.exist(queryText)) { return new Term(field, queryText); } String[] similarWords = null; if (origionalIndex == null) { similarWords = spellChecker.suggestSimilar(queryText, 1); } else { similarWords = spellChecker.suggestSimilar(queryText, 1, origionalIndex, defaultField, true); } if (similarWords.length == 0) { log.info("no suggestions found"); return new Term(field, queryText); } suggestedQuery = true; log.info("got a suggestion!"); return new Term(field, similarWords[0]); } catch (IOException e) { throw new ParseException(e.getMessage()); } } public boolean hasSuggestedQuery() { return suggestedQuery; } } }