/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.entityhub.indexing.destination.solryard.fst;
import java.io.IOException;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import org.apache.commons.io.FileUtils;
import org.apache.lucene.index.IndexReader;
import org.apache.solr.core.SolrCore;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.RefCounted;
import org.opensextant.solrtexttagger.TaggerFstCorpus;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Runtime creation of FST corpora is done as {@link Callable}. This allows
* users to decide by the configuration of the {@link ExecutorService} to
* control how Corpora are build (e.g. how many can be built at a time.
* @author Rupert Westenthaler
*
*/
public class CorpusCreationTask implements Runnable{
private final Logger log = LoggerFactory.getLogger(CorpusCreationTask.class);
CorpusCreationInfo corpusInfo;
SolrCore core;
public CorpusCreationTask(SolrCore core, CorpusCreationInfo corpus){
this.core = core;
this.corpusInfo = corpus;
}
@Override
public void run() {
TaggerFstCorpus corpus = null;
RefCounted<SolrIndexSearcher> searcherRef = core.getSearcher();
try {
SolrIndexSearcher searcher = searcherRef.get();
//we do get the AtomicReader, because TaggerFstCorpus will need it
//anyways. This prevents to create another SlowCompositeReaderWrapper.
IndexReader reader = searcher.getAtomicReader();
log.info(" ... build {}", corpusInfo);
corpus = new TaggerFstCorpus(reader, searcher.getIndexReader().getVersion(),
null, corpusInfo.indexedField, corpusInfo.storedField, corpusInfo.analyzer,
corpusInfo.partialMatches,1,200);
} catch (IOException e) {
throw new IllegalStateException("Unable to read Information to build "
+ corpusInfo + " from SolrIndex '" + core.getName() + "'!", e);
} finally {
searcherRef.decref(); //ensure that we dereference the searcher
}
if(corpusInfo.fst.exists()){
if(!FileUtils.deleteQuietly(corpusInfo.fst)){
log.warn("Unable to delete existing FST fiel for {}",corpusInfo);
}
}
if(corpus.getPhrases() != null){ //the FST is not empty
try { //NOTE saving an empty corpus results in a NPE
corpus.save(corpusInfo.fst);
} catch (IOException e) {
log.warn("Unable to store FST corpus " + corpusInfo + " to "
+ corpusInfo.fst.getAbsolutePath() + "!", e);
}
} else {
log.info("FST for {} is empty ... no FST will be stored",corpusInfo);
}
}
@Override
public String toString() {
return new StringBuilder("Task: building ").append(corpusInfo)
.append(" for SolrCore ").append(core.getName()).toString();
}
}