package uk.ac.shef.dcs.jate.app;
import org.apache.solr.core.SolrCore;
import org.apache.solr.search.SolrIndexSearcher;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import uk.ac.shef.dcs.jate.JATEException;
import uk.ac.shef.dcs.jate.JATEProperties;
import uk.ac.shef.dcs.jate.algorithm.CValue;
import uk.ac.shef.dcs.jate.feature.*;
import uk.ac.shef.dcs.jate.model.JATETerm;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
public class AppCValue extends App {
private static final Logger LOG = LoggerFactory.getLogger(AppCValue.class);
/**
* @param args
* command-line params accepting solr home path, solr core name
* and more optional run-time parameters
* @see uk.ac.shef.dcs.jate.app.AppParams
*/
public static void main(String[] args) {
if (args.length < 1) {
printHelp();
System.exit(1);
}
String solrHomePath = args[args.length - 2];
String solrCoreName = args[args.length - 1];
Map<String, String> params = getParams(args);
String jatePropertyFile = getJATEProperties(params);
String corpusDir = getCorpusDir(params);
List<JATETerm> terms;
try {
App app = new AppCValue(params);
if (isCorpusProvided(corpusDir)) {
app.index(Paths.get(corpusDir), Paths.get(solrHomePath), solrCoreName, jatePropertyFile);
}
terms = app.extract(solrHomePath, solrCoreName, jatePropertyFile);
if (isExport(params)) {
app.write(terms);
}
System.exit(0);
} catch (IOException | JATEException e) {
e.printStackTrace();
}
}
/**
* @param initParams
* initial parameters including pre-filtering and post-filtering
* parameters
* @throws JATEException
* @see uk.ac.shef.dcs.jate.app.AppParams
*/
public AppCValue(Map<String, String> initParams) throws JATEException {
super(initParams);
}
@Override
public List<JATETerm> extract(SolrCore core, String jatePropertyFile) throws IOException, JATEException {
LOG.info("Start CValue term ranking and filtering for whole index ...");
JATEProperties properties;
properties = getJateProperties(jatePropertyFile);
return extract(core, properties);
}
public List<JATETerm> extract(SolrCore core, JATEProperties properties) throws JATEException {
SolrIndexSearcher searcher = core.getSearcher().get();
try {
this.freqFeatureBuilder = new FrequencyTermBasedFBMaster(searcher, properties, 0);
this.freqFeature = (FrequencyTermBased) freqFeatureBuilder.build();
Set<String> uniqueCandidateTerms = freqFeature.getMapTerm2TTF().keySet();
TermComponentIndexFBMaster termCompIndexFeatureBuilder = new TermComponentIndexFBMaster(properties,
new ArrayList<>(uniqueCandidateTerms));
TermComponentIndex termComponentIndexFeature = (TermComponentIndex) termCompIndexFeatureBuilder.build();
ContainmentFBMaster cb = new ContainmentFBMaster(searcher, properties, termComponentIndexFeature,
uniqueCandidateTerms);
Containment cf = (Containment) cb.build();
CValue cvalue = new CValue();
cvalue.registerFeature(FrequencyTermBased.class.getName(), this.freqFeature);
cvalue.registerFeature(Containment.class.getName(), cf);
List<String> candidates = new ArrayList<>(this.freqFeature.getMapTerm2TTF().keySet());
filterByTTF(candidates);
List<JATETerm> terms = cvalue.execute(candidates);
terms = cutoff(terms);
addAdditionalTermInfo(terms, searcher, properties.getSolrFieldNameJATENGramInfo(),
properties.getSolrFieldNameID());
LOG.info("Complete CValue term extraction.");
return terms;
} finally {
try {
searcher.close();
} catch (IOException e) {
LOG.error(e.toString());
}
}
}
}