/** * Copyright (c) 2008--2015 Red Hat, Inc. * * This software is licensed to you under the GNU General Public License, * version 2 (GPLv2). There is NO WARRANTY for this software, express or * implied, including the implied warranties of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. You should have received a copy of GPLv2 * along with this software; if not, see * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt. * * Red Hat trademarks are not licensed under GPLv2. No permission is * granted to use or replicate Red Hat trademarks that are incorporated * in this software or its documentation. */ package com.redhat.satellite.search.index.ngram.tests; import java.io.IOException; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import com.redhat.satellite.search.index.ngram.NGramAnalyzer; import org.apache.log4j.Logger; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.Hits; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import junit.framework.TestCase; public class NGramTestSetup extends TestCase { private static Logger log = Logger.getLogger(NGramTestSetup.class); protected RAMDirectory ngramDir; protected RAMDirectory stanDir; protected double score_threshold = .10; protected int min_ngram = 1; protected int max_ngram = 5; protected List<Map<String,String>> items = new LinkedList<Map<String, String>>(); public NGramTestSetup() { super(); } protected void addItem(String name, String description, String filename) { Map<String, String> item = new HashMap<String, String>(); item.put("name", name); item.put("description", description); item.put("filename", filename); items.add(item); } protected void initItems() { addItem("spellingbee", "spelling application", "spellingbee-1.0.rpm"); addItem("aspelling", "another spelling program alternate spell", "aspelling-1.0.rpm"); addItem("aspell", "another spelling program", "aspell-0.3.4.rpm"); addItem("spell", "spelling program", "spell-4943.rpm"); addItem("slelp", "application with spelling error", "slelp1-43.rpm"); addItem("libvirt", "virtualization library", "virt-pkg-1.rpm"); addItem("virt-manager", "blah blah application", "virt-pkg-1.rpm"); addItem("virtualfactory", "virtual factory something", "virtfact-04.rpm"); addItem("newFactory", "factory test application", "newFactory-1.9.rpm"); addItem("gtk+-devel", "development library for gtk", "gtk+-devel-10.rpm"); addItem("gtk+", "runtime library", "gtk+-30.rpm"); addItem("gtk-doc", "documentation for gtk", "gtk-doc-393.rpm"); addItem("authconfig-gtk", "authentication related gtk", "authconfig-gtk-039.rpm"); addItem("mtr-gtk", "blah blah mtr gtk", "mtr-gtk-039.rpm"); addItem("ghostscript-gtk", "printting support application gtk", "ghostscript-gtk-30.rpm"); addItem("gnome-bluetooth-libs", "library for bluetooth support", "gnome-bluetooth-libs-3.4.rpm"); addItem("scim-bridge-gtk", "blah blah scim gtk", "scim-bridge-gtk-494.rpm"); addItem("kernel", "linux kernel package", "kernel-094.rpm"); addItem("kernel-hugemem", "This package includes an SMP version of " + "the Linux kernel which supports systems with 16 Gigabytes " + "of memory or more.", "kernel-hugemem-2.6.9-84.EL.i686"); addItem("kernel-hugemem-devel", "This package provides kernel " + "headers +and makefiles sufficient to build modules against " + "the hugemem kernel package.", "kernel-hugemem-devel-2.6.9-84.EL.i686"); } /** * Creates an index in RAM * */ public void setUp() throws Exception { super.setUp(); initItems(); this.stanDir = new RAMDirectory(); IndexWriter stanWriter = new IndexWriter(this.stanDir, new StandardAnalyzer(), true); this.ngramDir = new RAMDirectory(); IndexWriter ngramWriter = new IndexWriter(this.ngramDir, new NGramAnalyzer(min_ngram, max_ngram), true); for (Map<String, String> item: items) { String name = item.get("name"); String descp = item.get("description"); Document doc = new Document(); doc.add(new Field("name", name, Field.Store.YES, Field.Index.TOKENIZED)); doc.add(new Field("description", descp, Field.Store.YES, Field.Index.TOKENIZED)); stanWriter.addDocument(doc); ngramWriter.addDocument(doc); } stanWriter.close(); ngramWriter.close(); } public Hits performSearch(Directory dir, Analyzer alyz, String query) throws Exception { QueryParser parser = new QueryParser("name", alyz); IndexSearcher searcher = new IndexSearcher(dir); Query q = parser.parse(query); Hits hits = searcher.search(q); return hits; } protected int thresholdHits(Hits hits) throws IOException { /** We could consider doing thresholding as a relative thing... * instead of checking against an absolute value, we grab top score * then filter based on difference from that... */ int counter = 0; for (int i=0; i < hits.length(); i++) { if (hits.score(i) >= score_threshold) { counter++; } else { break; } } return counter; } protected void displayHits(Hits hits) throws IOException { for (int i = 0; i < hits.length(); i++) { Document doc = hits.doc(i); String name = doc.get("name"); String description = doc.get("description"); log.info("Hit<" + i + "> Score< " + hits.score(i) + "> name = <" + name + "> description = <" + description + ">"); } } }