/* * Copyright 2011-2013 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.analysis.kr.test; import lombok.extern.slf4j.Slf4j; import org.apache.lucene.analysis.kr.KoreanAnalyzer; import org.apache.lucene.analysis.kr.freq.HighFreqTerms; import org.apache.lucene.analysis.kr.freq.TermFreq; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.junit.Before; import org.junit.Ignore; import org.junit.Test; import java.io.File; import java.io.IOException; @Slf4j public class IndexingTest { private Directory directory; @Before public void setUp() throws Exception { directory = FSDirectory.open(new File(".lucene/index")); } private IndexWriter getWriter() throws IOException { return new IndexWriter(directory, new KoreanAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); } @Test public void testIndexWriter() throws IOException { IndexWriter writer = getWriter(); String description = "Approved for entry into archive by p pant (momo31@gmail.com) on 2011-11-18T05:08:46Z (GMT) No. of bitstreams: 0"; String publisher = "漢陽大學校"; String title = "硏究開發費 會計에 關한 硏究"; Document doc = new Document(); doc.add(new Field("description", description, Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("publisher", publisher, Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); writer.close(); } private IndexReader getReader() throws IOException { return IndexReader.open(directory); } @Test public void testIndexReader() throws Exception { testIndexWriter(); IndexReader reader = getReader(); try { int freq = reader.docFreq(new Term("description", "entry")); log.debug("freq=[{}]", freq); } finally { reader.close(); } } @Test @Ignore( "테스트 전에 인덱스가 만들어 졌는지 확인해야 합니다." ) public void termHighFreqTerms() throws Exception { testIndexWriter(); try (IndexReader reader = getReader()) { TermFreq[] termFreqs = HighFreqTerms.getHighFreqTerms(reader, 100, "description"); for (TermFreq termFreq : termFreqs) { log.debug("term=[{}]", termFreq); } } } @Test @Ignore( "테스트 전에 인덱스가 만들어 졌는지 확인해야 합니다." ) public void termHighFreqTermsWithSharding() throws Exception { final String prefix = "debop4j-search/.lucene/indexes/kr.debop4j.search.twitter.Twit"; final int numShard = 4; IndexReader[] readers = new IndexReader[numShard]; for (int i = 0; i < numShard; i++) { readers[i] = IndexReader.open(FSDirectory.open(new File(prefix + "." + i))); } try { TermFreq[] termFreqs = HighFreqTerms.getHighFreqTerms(readers, 100, "text"); for (TermFreq termFreq : termFreqs) { log.debug("term=[{}]", termFreq); } } finally { for (IndexReader reader : readers) reader.close(); } } }