/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.concordance.windowvisitor;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.corpus.stats.IDFIndexCalc;
import org.apache.lucene.corpus.stats.TermIDF;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.concordance.ConcordanceTestBase;
import org.apache.lucene.search.concordance.classic.impl.IndexIdDocIdBuilder;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.store.Directory;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
public class TestConcordanceArrayWindowSearcher extends ConcordanceTestBase {
@BeforeClass
public static void beforeClass() throws Exception {
// NOOP for now
}
@AfterClass
public static void afterClass() throws Exception {
// NOOP for now
}
@Test
public void testSimple() throws Exception {
String[] docs = new String[]{"a b c d e f", "b c d g f e"};
Analyzer analyzer = getAnalyzer(
MockTokenFilter.EMPTY_STOPSET, 50, 100);
Directory directory = getDirectory(analyzer, docs);
IndexReader reader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = new IndexSearcher(reader);
IDFIndexCalc idfCalc = new IDFIndexCalc(reader);
CooccurVisitor visitor = new CooccurVisitor(
FIELD, 10, 10, new WGrammer(1, 1, false), idfCalc, 100, true);
visitor.setMinTermFreq(0);
ConcordanceArrayWindowSearcher searcher = new ConcordanceArrayWindowSearcher();
SpanQuery q = new SpanTermQuery(new Term(FIELD, "d"));
searcher.search(indexSearcher, FIELD, q, null, analyzer, visitor,
new IndexIdDocIdBuilder());
List<TermIDF> results = ((CooccurVisitor) visitor).getResults();
Map<String, Integer> truth = new HashMap<String, Integer>();
truth.put("a", 1);
truth.put("g", 1);
truth.put("b", 2);
truth.put("c", 2);
truth.put("e", 2);
truth.put("f", 2);
assertEquals(truth.size(), results.size());
for (TermIDF r : results) {
assertEquals(r.getTerm(), truth.get(r.getTerm()).intValue(), r.getTermFreq());
}
visitor = new CooccurVisitor(
FIELD, 1, 1, new WGrammer(1, 1, false), idfCalc, 100, true);
((CooccurVisitor) visitor).setMinTermFreq(0);
searcher = new ConcordanceArrayWindowSearcher();
q = new SpanTermQuery(new Term(FIELD, "d"));
searcher.search(indexSearcher, FIELD, q, null, analyzer, visitor, new IndexIdDocIdBuilder());
results = ((CooccurVisitor) visitor).getResults();
truth.clear();
truth.put("c", 2);
truth.put("e", 1);
truth.put("g", 1);
assertEquals(truth.size(), results.size());
for (TermIDF r : results) {
assertEquals(r.getTerm(), truth.get(r.getTerm()).intValue(), r.getTermFreq());
}
reader.close();
directory.close();
}
@Test
public void testWithStops() throws Exception {
String[] docs = new String[]{"a b the d the f", "b c the d the e"};
Analyzer analyzer = getAnalyzer(
MockTokenFilter.ENGLISH_STOPSET, 50, 100);
Directory directory = getDirectory(analyzer, docs);
IndexReader reader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = new IndexSearcher(reader);
IDFIndexCalc idfer = new IDFIndexCalc(reader);
CooccurVisitor visitor = new CooccurVisitor(
FIELD, 10, 10, new WGrammer(1, 1, false), idfer, 100, true);
((CooccurVisitor) visitor).setMinTermFreq(0);
ConcordanceArrayWindowSearcher searcher = new ConcordanceArrayWindowSearcher();
SpanQuery q = new SpanTermQuery(new Term(FIELD, "d"));
searcher.search(indexSearcher, FIELD, q, null, analyzer, visitor,
new IndexIdDocIdBuilder());
List<TermIDF> results = ((CooccurVisitor) visitor).getResults();
Map<String, Integer> truth = new HashMap<String, Integer>();
truth.put("b", 2);
truth.put("c", 1);
truth.put("e", 1);
truth.put("f", 1);
assertEquals(truth.size(), results.size());
for (TermIDF r : results) {
assertEquals(r.getTerm(), truth.get(r.getTerm()).intValue(), r.getTermFreq());
}
reader.close();
directory.close();
}
@Test
public void testSimpleMultiValuedField() throws Exception {
String[] vals = new String[]{"a b c d e f", "b c d g f e"};
List<String[]> docs = new ArrayList<String[]>();
docs.add(vals);
Analyzer analyzer = getAnalyzer(
MockTokenFilter.EMPTY_STOPSET, 50, 100);
Directory directory = getDirectory(analyzer, docs);
IndexReader reader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = new IndexSearcher(reader);
IDFIndexCalc idfer = new IDFIndexCalc(reader);
CooccurVisitor visitor = new CooccurVisitor(
FIELD, 10, 10, new WGrammer(1, 1, false), idfer, 100, true);
((CooccurVisitor) visitor).setMinTermFreq(0);
ConcordanceArrayWindowSearcher searcher = new ConcordanceArrayWindowSearcher();
SpanQuery q = new SpanTermQuery(new Term(FIELD, "d"));
searcher.search(indexSearcher, FIELD, q, null, analyzer, visitor,
new IndexIdDocIdBuilder());
List<TermIDF> results = ((CooccurVisitor) visitor).getResults();
Map<String, Integer> truth = new HashMap<String, Integer>();
truth.put("a", 1);
truth.put("g", 1);
truth.put("b", 2);
truth.put("c", 2);
truth.put("e", 2);
truth.put("f", 2);
assertEquals(truth.size(), results.size());
for (TermIDF r : results) {
assertEquals(r.getTerm(), truth.get(r.getTerm()).intValue(), r.getTermFreq());
}
visitor = new CooccurVisitor(
FIELD, 1, 1, new WGrammer(1, 1, false), idfer, 100, true);
((CooccurVisitor) visitor).setMinTermFreq(0);
searcher = new ConcordanceArrayWindowSearcher();
q = new SpanTermQuery(new Term(FIELD, "d"));
searcher.search(indexSearcher, FIELD,
q, null, analyzer, visitor, new IndexIdDocIdBuilder());
results = ((CooccurVisitor) visitor).getResults();
truth.clear();
truth.put("c", 2);
truth.put("e", 1);
truth.put("g", 1);
assertEquals(truth.size(), results.size());
for (TermIDF r : results) {
assertEquals(r.getTerm(), truth.get(r.getTerm()).intValue(), r.getTermFreq());
}
reader.close();
directory.close();
}
@Test
public void testClockworkOrangeMultiValuedFieldProblem() throws Exception {
/*
* test handling of target spread out over several indices in a multivalued
* field array
*/
String[] doc = new String[]{"a b c the", "clockwork", "orange d e f "};
List<String[]> docs = new ArrayList<String[]>();
docs.add(doc);
Analyzer analyzer = getAnalyzer(
MockTokenFilter.EMPTY_STOPSET, 0, 0);
Directory directory = getDirectory(analyzer, docs);
IndexReader reader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = new IndexSearcher(reader);
IDFIndexCalc idfer = new IDFIndexCalc(reader);
CooccurVisitor visitor = new CooccurVisitor(
FIELD, 10, 10, new WGrammer(1, 1, false), idfer, 100, true);
visitor.setMinTermFreq(0);
ConcordanceArrayWindowSearcher searcher = new ConcordanceArrayWindowSearcher();
SpanQuery q1 = new SpanTermQuery(
new Term(FIELD, "the"));
SpanQuery q2 = new SpanTermQuery(new Term(FIELD,
"clockwork"));
SpanQuery q3 = new SpanTermQuery(new Term(FIELD,
"orange"));
SpanQuery q = new SpanNearQuery(new SpanQuery[]{q1, q2, q3}, 3, true);
searcher.search(indexSearcher, FIELD, q, null, analyzer, visitor,
new IndexIdDocIdBuilder());
List<TermIDF> results = ((CooccurVisitor) visitor).getResults();
Map<String, Integer> truth = new HashMap<String, Integer>();
truth.put("a", 1);
truth.put("b", 1);
truth.put("c", 1);
truth.put("d", 1);
truth.put("e", 1);
truth.put("f", 1);
assertEquals(truth.size(), results.size());
for (TermIDF r : results) {
assertEquals(r.getTerm(), truth.get(r.getTerm()).intValue(), r.getTermFreq());
}
reader.close();
directory.close();
}
//TODO: add tests for ignore duplicates, TargetVisitor
}