package org.apache.lucene.spelt; /* * Copyright 2007 The Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; /** * Test the {@link LuceneIndexToDict} class. Performs the same tests as * {@link SpellReadWriteTest} but makes and converts a Lucene index instead of * making the dictionary directly. * * @author Martin Haye */ public class LuceneIndexToDictTest extends SpellReadWriteTest { /** Create the temporary spelling dictionary */ protected @Override void setUp() throws Exception { // Make the Lucene index using paragraphs from Call of the Wild. Directory luceneDir = new RAMDirectory(); IndexWriter luceneWriter = new IndexWriter(luceneDir, new StandardAnalyzer()); addParagraphDocs(luceneWriter); luceneWriter.close(); // Set up to create the spelling dictionary createDictDir("LuceneIndexToDictTest"); SpellWriter spellWriter = SpellWriter.open(dictDir); spellWriter.setStopwords(STOP_SET); spellWriter.setMinWordFreq(1); try { // Convert the Lucene index to a spelling dictionary. IndexReader luceneReader = IndexReader.open(luceneDir); LuceneIndexToDict.createDict(luceneReader, new MinimalAnalyzer(), spellWriter, null); // Ready to test. reader = SpellReader.open(dictDir); reader.setStopwords(STOP_SET); } finally { spellWriter.close(); } } /** * Divide the Call of the Wild text into paragraphs. Add them as various * fields in several documents to a Lucene index writer. * * @param luceneWriter destination for the new docs */ protected void addParagraphDocs(IndexWriter luceneWriter) throws IOException { // Divide the text into paragraphs. String[] paras = CALL_OF_THE_WILD.split("\n\n"); // Stick in some accented chars to test end-to-end accent preservation paras[0] = paras[0] + " europ\u00e4ische europ\u00e4ische europ\u00e4ische"; // Put the paragraphs into the Lucene index, splitting them up into a few // fields and documents. // Document doc = new Document(); for (int i = 0; i < paras.length; i++) { int nFields = doc.getFields().size(); doc.add(new Field("field" + nFields, paras[i], Field.Store.YES, Field.Index.TOKENIZED)); if (nFields + 1 == 3) { luceneWriter.addDocument(doc); doc = new Document(); } } if (doc.getFields().size() > 0) luceneWriter.addDocument(doc); } }