/*
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
licenses@blazegraph.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Oct 1, 2008
*/
package com.bigdata.search;
import java.io.StringReader;
import java.util.concurrent.TimeUnit;
import com.bigdata.rdf.lexicon.ITextIndexer.FullTextQuery;
/**
* Unit test for prefix and exact match searches. Prefix search allows a query
* "bro" to match "brown" rather than requiring an exact match on the search
* term(s). Exact match searches should only visit tuples which match the full
* length of the token (once encoded as a Unicode sort key).
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
* @version $Id$
*/
public class TestPrefixSearch extends AbstractSearchTest {
/**
*
*/
public TestPrefixSearch() {
}
/**
* @param name
*/
public TestPrefixSearch(String name) {
super(name);
}
public void test_prefixSearch() throws InterruptedException {
final double minCosine = .4;
final double maxCosine = 1.0d;
final int minRank = 1;
final int maxRank = Integer.MAX_VALUE;// (was 10000)
final boolean matchAllTerms = false;
final long timeout = Long.MAX_VALUE;
final TimeUnit unit = TimeUnit.MILLISECONDS;
final String regex = null;
init();
/*
* Index document(s).
*/
final long docId = 12L;
final int fieldId = 3;
final String languageCode = "EN";
{
final TokenBuffer<Long> buffer = new TokenBuffer<Long>(2, getNdx());
// index a document. ("The" is a stopword).
getNdx().index(buffer, docId, fieldId, languageCode,
new StringReader("The quick brown dog"));
// index a document. ("The" is a stopword).
getNdx().index(buffer, docId + 1, fieldId, languageCode,
new StringReader("The slow brown cow"));
buffer.flush();
}
/* Search (exact match on one document, partial match on the other) */
{
final Hiterator<?> itr = getNdx().search(new FullTextQuery("The quick brown dog",
languageCode, false/* prefixMatch */
, regex, matchAllTerms, false/* matchExact*/, minCosine, maxCosine,
minRank, maxRank, timeout, unit));
if (log.isInfoEnabled())
log.info("hits:" + itr);
assertEquals(2, getNdx().count(new FullTextQuery("The quick brown dog",
languageCode, false/* prefixMatch */)));
assertTrue(itr.hasNext());
final IHit<?> hit1 = itr.next();
assertEquals(12L,hit1.getDocId());
/*
* Note: with cosine computation only the first hit is visited.
*/
assertFalse(itr.hasNext());
}
/*
* Search (prefix matches on one document, partial prefix matches on
* the other)
*/
{
final Hiterator<?> itr = getNdx().search(new FullTextQuery("The qui bro do",
languageCode, true/*prefixMatch*/, regex, matchAllTerms, false/* matchExact*/, minCosine, maxCosine,
minRank, maxRank, timeout, unit));
if(log.isInfoEnabled()) log.info("hits:" + itr);
assertEquals(2, getNdx().count(new FullTextQuery("The qui bro do",
languageCode, true/*prefixMatch*/)));
assertTrue(itr.hasNext());
final IHit<?> hit1 = itr.next();
assertEquals(12L,hit1.getDocId());
/*
* Note: with cosine computation only the first hit is visited.
*/
assertFalse(itr.hasNext());
}
/*
* Search (one term, prefix match on that term in both documents
* (the prefix match is an exact match in this case)).
*/
{
final Hiterator<?> itr = getNdx()
.search(new FullTextQuery("brown", languageCode, false/* prefixMatch */, regex, matchAllTerms, false/* matchExact*/, minCosine, maxCosine,
minRank, maxRank, timeout, unit));
if(log.isInfoEnabled())
log.info("hits:" + itr);
assertEquals(2, getNdx()
.count(new FullTextQuery("brown", languageCode, false/* prefixMatch */, regex, matchAllTerms, false/* matchExact*/, minCosine, maxCosine,
minRank, maxRank, timeout, unit)));
}
/*
* Search (one term, exact match on that term in both documents).
*/
{
final Hiterator<?> itr = getNdx()
.search(new FullTextQuery("brown", languageCode, true/* prefixMatch */, regex, matchAllTerms, false/* matchExact*/, minCosine, maxCosine,
minRank, maxRank, timeout, unit));
if(log.isInfoEnabled()) log.info("hits:" + itr);
assertEquals(2, getNdx()
.count(new FullTextQuery("brown", languageCode, true/* prefixMatch */, regex, matchAllTerms, false/* matchExact*/, minCosine, maxCosine,
minRank, maxRank, timeout, unit)));
}
/*
* Search (one term, prefix match on that term in both documents).
*/
{
final Hiterator<?> itr = getNdx()
.search(new FullTextQuery("bro", languageCode, true/* prefixMatch */, regex, matchAllTerms, false/* matchExact*/, minCosine, maxCosine,
minRank, maxRank, timeout, unit));
if(log.isInfoEnabled()) log.info("hits:" + itr);
assertEquals(2, getNdx()
.count(new FullTextQuery("bro", languageCode, true/* prefixMatch */, regex, matchAllTerms, false/* matchExact*/, minCosine, maxCosine,
minRank, maxRank, timeout, unit)));
}
/*
* Search (one term, no exact match on that term).
*/
{
final Hiterator<?> itr = getNdx()
.search(new FullTextQuery("bro", languageCode, false/* prefixMatch */, regex, matchAllTerms, false/* matchExact*/, minCosine, maxCosine,
minRank, maxRank, timeout, unit));
if(log.isInfoEnabled())
log.info("hits:" + itr);
assertEquals(0, itr.size());
}
/*
* Search (one term, prefix match on that term in one document).
*/
{
final Hiterator<?> itr = getNdx()
.search(new FullTextQuery("qui", languageCode, true/* prefixMatch */, regex, matchAllTerms, false/* matchExact*/, minCosine, maxCosine,
minRank, maxRank, timeout, unit));
if(log.isInfoEnabled())
log.info("hits:" + itr);
assertEquals(1, itr.size());
}
/*
* Search (one term, no exact match on that term).
*/
{
final Hiterator<?> itr = getNdx()
.search(new FullTextQuery("qui", languageCode, false/* prefixMatch */, regex, matchAllTerms, false/* matchExact*/, minCosine, maxCosine,
minRank, maxRank, timeout, unit));
if (log.isInfoEnabled())
log.info("hits:" + itr);
assertEquals(0, itr.size());
}
/*
* Search (one term, exact match on that term in one document).
*/
{
final Hiterator<?> itr = getNdx()
.search(new FullTextQuery("quick", languageCode, false/* prefixMatch */, regex, matchAllTerms, false/* matchExact*/, minCosine, maxCosine,
minRank, maxRank, timeout, unit));
if (log.isInfoEnabled())
log.info("hits:" + itr);
assertEquals(1, itr.size());
}
}
}