package org.apache.lucene.xmlparser; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.Random; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenFilter; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Version; import org.apache.lucene.util.LuceneTestCase; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ public class TestParser extends LuceneTestCase { CoreParser builder; static Directory dir; // TODO: rewrite test (this needs to set QueryParser.enablePositionIncrements, too, for work with CURRENT): Analyzer analyzer=new MockAnalyzer(MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET, false); IndexReader reader; private IndexSearcher searcher; /* * @see TestCase#setUp() */ @Override protected void setUp() throws Exception { super.setUp(); //initialize the parser builder=new CorePlusExtensionsParser("contents",analyzer); Random random = newRandom(); BufferedReader d = new BufferedReader(new InputStreamReader(TestParser.class.getResourceAsStream("reuters21578.txt"))); dir=newDirectory(random); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random, Version.LUCENE_24, analyzer)); String line = d.readLine(); while(line!=null) { int endOfDate=line.indexOf('\t'); String date=line.substring(0,endOfDate).trim(); String content=line.substring(endOfDate).trim(); org.apache.lucene.document.Document doc =new org.apache.lucene.document.Document(); doc.add(new Field("date",date,Field.Store.YES,Field.Index.ANALYZED)); doc.add(new Field("contents",content,Field.Store.YES,Field.Index.ANALYZED)); NumericField numericField = new NumericField("date2"); numericField.setIntValue(Integer.valueOf(date)); doc.add(numericField); writer.addDocument(doc); line=d.readLine(); } d.close(); writer.close(); reader=IndexReader.open(dir, true); searcher=new IndexSearcher(reader); } @Override protected void tearDown() throws Exception { reader.close(); searcher.close(); dir.close(); super.tearDown(); } public void testSimpleXML() throws ParserException, IOException { Query q=parse("TermQuery.xml"); dumpResults("TermQuery", q, 5); } public void testSimpleTermsQueryXML() throws ParserException, IOException { Query q=parse("TermsQuery.xml"); dumpResults("TermsQuery", q, 5); } public void testBooleanQueryXML() throws ParserException, IOException { Query q=parse("BooleanQuery.xml"); dumpResults("BooleanQuery", q, 5); } public void testRangeFilterQueryXML() throws ParserException, IOException { Query q=parse("RangeFilterQuery.xml"); dumpResults("RangeFilter", q, 5); } public void testUserQueryXML() throws ParserException, IOException { Query q=parse("UserInputQuery.xml"); dumpResults("UserInput with Filter", q, 5); } public void testCustomFieldUserQueryXML() throws ParserException, IOException { Query q=parse("UserInputQueryCustomField.xml"); int h = searcher.search(q, null, 1000).totalHits; assertEquals("UserInputQueryCustomField should produce 0 result ", 0,h); } public void testLikeThisQueryXML() throws Exception { Query q=parse("LikeThisQuery.xml"); dumpResults("like this", q, 5); } public void testBoostingQueryXML() throws Exception { Query q=parse("BoostingQuery.xml"); dumpResults("boosting ",q, 5); } public void testFuzzyLikeThisQueryXML() throws Exception { Query q=parse("FuzzyLikeThisQuery.xml"); //show rewritten fuzzyLikeThisQuery - see what is being matched on if(VERBOSE) { System.out.println(q.rewrite(reader)); } dumpResults("FuzzyLikeThis", q, 5); } public void testTermsFilterXML() throws Exception { Query q=parse("TermsFilterQuery.xml"); dumpResults("Terms Filter",q, 5); } public void testBoostingTermQueryXML() throws Exception { Query q=parse("BoostingTermQuery.xml"); dumpResults("BoostingTermQuery",q, 5); } public void testSpanTermXML() throws Exception { Query q=parse("SpanQuery.xml"); dumpResults("Span Query",q, 5); } public void testConstantScoreQueryXML() throws Exception { Query q=parse("ConstantScoreQuery.xml"); dumpResults("ConstantScoreQuery",q, 5); } public void testMatchAllDocsPlusFilterXML() throws ParserException, IOException { Query q=parse("MatchAllDocsQuery.xml"); dumpResults("MatchAllDocsQuery with range filter", q, 5); } public void testBooleanFilterXML() throws ParserException, IOException { Query q=parse("BooleanFilter.xml"); dumpResults("Boolean filter", q, 5); } public void testNestedBooleanQuery() throws ParserException, IOException { Query q=parse("NestedBooleanQuery.xml"); dumpResults("Nested Boolean query", q, 5); } public void testCachedFilterXML() throws ParserException, IOException { Query q=parse("CachedFilter.xml"); dumpResults("Cached filter", q, 5); } public void testDuplicateFilterQueryXML() throws ParserException, IOException { Query q=parse("DuplicateFilterQuery.xml"); int h = searcher.search(q, null, 1000).totalHits; assertEquals("DuplicateFilterQuery should produce 1 result ", 1,h); } public void testNumericRangeFilterQueryXML() throws ParserException, IOException { Query q=parse("NumericRangeFilterQuery.xml"); dumpResults("NumericRangeFilter", q, 5); } public void testNumericRangeQueryQueryXML() throws ParserException, IOException { Query q=parse("NumericRangeQueryQuery.xml"); dumpResults("NumericRangeQuery", q, 5); } //================= Helper methods =================================== private Query parse(String xmlFileName) throws ParserException, IOException { InputStream xmlStream=TestParser.class.getResourceAsStream(xmlFileName); Query result=builder.parse(xmlStream); xmlStream.close(); return result; } private void dumpResults(String qType,Query q, int numDocs) throws IOException { TopDocs hits = searcher.search(q, null, numDocs); assertTrue(qType +" should produce results ", hits.totalHits>0); if(VERBOSE) { System.out.println("========="+qType+"============"); ScoreDoc[] scoreDocs = hits.scoreDocs; for(int i=0;i<Math.min(numDocs,hits.totalHits);i++) { org.apache.lucene.document.Document ldoc=searcher.doc(scoreDocs[i].doc); System.out.println("["+ldoc.get("date")+"]"+ldoc.get("contents")); } System.out.println(); } } }