/**
* Copyright (c) 2008--2015 Red Hat, Inc.
*
* This software is licensed to you under the GNU General Public License,
* version 2 (GPLv2). There is NO WARRANTY for this software, express or
* implied, including the implied warranties of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. You should have received a copy of GPLv2
* along with this software; if not, see
* http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
*
* Red Hat trademarks are not licensed under GPLv2. No permission is
* granted to use or replicate Red Hat trademarks that are incorporated
* in this software or its documentation.
*/
package com.redhat.satellite.search.index.ngram.tests;
import com.redhat.satellite.search.index.ngram.NGramAnalyzer;
import com.redhat.satellite.search.index.ngram.NGramQueryParser;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
public class NGramQueryParserTest extends NGramTestSetup {
private static Logger log = Logger.getLogger(NGramQueryParserTest.class);
public NGramQueryParserTest() {
super();
}
public void testBasicQueryParse() throws Exception {
String defaultField = new String("name");
NGramQueryParser parser = new NGramQueryParser(defaultField, new NGramAnalyzer(min_ngram, max_ngram));
String txt = new String("spell");
Query q = parser.parse(txt);
log.info("testBasicQueryParse() query = " + q.toString());
assertTrue(q.toString().compareTo("name:s name:p name:e name:l name:l " +
"name:sp name:pe name:el name:ll name:spe name:pel name:ell " +
"name:spel name:pell name:spell") == 0);
}
/**
* We want to make sure that when searching for multiple terms, each term will become it's
* own BooleanQuery.
* */
public void testMultiPhraseQueryParse() throws Exception {
String defaultField = new String("name");
NGramQueryParser parser = new NGramQueryParser(defaultField, new NGramAnalyzer(min_ngram, max_ngram));
String txt = new String("spell* virt manager");
Query q = parser.parse(txt);
assertTrue(q instanceof BooleanQuery);
BooleanQuery bq = (BooleanQuery)q;
assertTrue(bq.getClauses().length == 3);
}
public void testWildcardQueryParse() throws Exception {
NGramQueryParser parser = new NGramQueryParser("name", new NGramAnalyzer(min_ngram, max_ngram));
String txt = new String("spell*");
Query q = parser.parse(txt);
log.info("Wildcard query = " + q.toString());
assertTrue(q.toString().compareTo("name:spell*") == 0);
}
public void testQueryParseWithSpecialChars() throws Exception {
String queryString = new String("spell* virt- manager+");
log.info("testQueryParserWithSpecialChars(): query string is: " + queryString);
NGramQueryParser parser = new NGramQueryParser("name", new NGramAnalyzer(min_ngram, max_ngram));
Query q = parser.parse(queryString);
log.info("Using NGramQueryParser query = " + q.toString());
QueryParser origParser = new QueryParser("name", new StandardAnalyzer());
q = origParser.parse(queryString);
log.info("Using QueryParser query = " + q.toString());
}
public Hits performSearch(Directory dir, String query, boolean useMust)
throws Exception {
NGramQueryParser parser = new NGramQueryParser("name",
new NGramAnalyzer(min_ngram, max_ngram), useMust);
IndexSearcher searcher = new IndexSearcher(dir);
Query q = parser.parse(query);
Hits hits = searcher.search(q);
log.info("Original Query = " + query);
log.info("Parsed Query = " + q.toString());
log.info("Hits.length() = " + hits.length());
for (int i=0; i < hits.length(); i++) {
log.debug("Document<"+hits.id(i)+"> = " + hits.doc(i));
//Explanation explain = searcher.explain(q, hits.id(i));
//log.debug("explain = " + explain.toString());
}
return hits;
}
public void testFreeFormQueryParse() throws Exception {
String queryString = new String("name:spell -description:another");
log.info("Original query: " + queryString);
NGramQueryParser parser = new NGramQueryParser("name",
new NGramAnalyzer(min_ngram, max_ngram), true);
Query q = parser.parse(queryString);
log.info("NGramQueryParser parsed query: " + q.toString());
QueryParser origParser = new QueryParser("name", new StandardAnalyzer());
q = origParser.parse(queryString);
log.info("QueryParser parsed query = " + q.toString());
}
public void testFreeFormSearch() throws Exception {
Hits hits = null;
String query = null;
boolean useMust = true;
// Grab all packages with name "spell" AND
// description does NOT contain "another"
query = "name:spell -description:another";
hits = performSearch(this.ngramDir, query, useMust);
displayHits(hits);
assertTrue(hits.length() == 2);
// Grab all packages with name "virt" AND
// description MUST have "factory" in it
query = "name:virt +description:factory";
hits = performSearch(this.ngramDir, query, useMust);
displayHits(hits);
assertTrue(hits.length() == 2);
// Grab all packages with name "virt"
query = "name:virt description:factory";
hits = performSearch(this.ngramDir, query, useMust);
displayHits(hits);
assertTrue(hits.length() == 4);
query = "name:virt OR description:factory";
hits = performSearch(this.ngramDir, query, useMust);
displayHits(hits);
assertTrue(hits.length() == 4);
query = "name:virt AND description:factory";
hits = performSearch(this.ngramDir, query, useMust);
displayHits(hits);
assertTrue(hits.length() == 1);
query = "name:virt -description:factory";
hits = performSearch(this.ngramDir, query, useMust);
displayHits(hits);
assertTrue(hits.length() == 2);
}
/**
*
* */
public void testBasicSearch() throws Exception {
Hits hits;
String query;
boolean useMust = false;
query = "spell";
hits = performSearch(this.ngramDir, query, useMust);
displayHits(hits);
assertTrue(thresholdHits(hits) == 5);
assertTrue(hits.length() == 16);
query = "aspelll";
hits = performSearch(this.ngramDir, query, useMust);
displayHits(hits);
assertTrue(thresholdHits(hits) == 4);
assertTrue(hits.length() == 17);
query = "aspell";
hits = performSearch(this.ngramDir, query, useMust);
displayHits(hits);
assertTrue(thresholdHits(hits) == 4);
assertTrue(hits.length() == 17);
query = "pel";
hits = performSearch(this.ngramDir, query, useMust);
displayHits(hits);
assertTrue(thresholdHits(hits) == 8);
assertTrue(hits.length() == 16);
query = "gtk";
hits = performSearch(this.ngramDir, query, useMust);
displayHits(hits);
assertTrue(thresholdHits(hits) == 7);
assertTrue(hits.length() == 17);
// We want a search for kernel-hugemem to return kernel-hugemem as top hit
// but currently, kernel-hugemem-devel is matchin instead. This test
// is a placeholder as we explore ways to fix this.
query = "((name:kernel-hugemem)^2 (description:kernel-hugemem) " +
"(filename:kernel-hugemem))";
hits = performSearch(this.ngramDir, query, useMust);
displayHits(hits);
assertTrue(thresholdHits(hits) == 3);
assertTrue(hits.length() == 20);
String firstHitName = hits.doc(0).get("name");
assertTrue(firstHitName.compareToIgnoreCase("kernel-hugemem-devel") == 0);
}
}