package uk.ac.shef.dcs.jate.app; import org.apache.log4j.Logger; import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer; import org.apache.solr.common.SolrException; import org.junit.Assert; import uk.ac.shef.dcs.jate.JATEException; import uk.ac.shef.dcs.jate.JATEProperties; import uk.ac.shef.dcs.jate.model.JATETerm; import java.io.File; import java.io.IOException; import java.nio.file.Paths; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; /** * To run on external/remote Solr server, it needs jate-2.0Alpha-SNAPSHOT-jar-with-dependencies.jar * and lib\dragontool.jar */ public class AppATEACLRDTECTest extends ACLRDTECTest { private static Logger LOG = Logger.getLogger(AppATEACLRDTECTest.class.getName()); public AppATEACLRDTECTest(String solrHomeDir, String solrCoreName) throws JATEException, IOException { initialise(solrHomeDir, solrCoreName); } public List<JATETerm> rankAndFilter(EmbeddedSolrServer server, String solrCoreName, JATEProperties jateProp) throws JATEException { List<JATETerm> terms = new ArrayList<>(); return terms; } /** * To run the test class via * <p> * mvn exec:java -Dexec.mainClass="uk.ac.shef.dcs.jate.app.AppATEACLRDTECTest" -Dexec.classpathScope="test" * * @param args, true or false for indexing * @throws JATEException */ public static void main(String[] args) throws JATEException { try { AppATEACLRDTECTest appATETest = new AppATEACLRDTECTest(solrHome.toString(), solrCoreName); boolean reindex = true; if (args.length > 0) { try { reindex = Boolean.valueOf(args[0]); } catch (Exception e) { throw new JATEException(e); } } long numOfDocs = validate_indexing(); LOG.info("start to indexing and candidate extraction..."); if (numOfDocs == 0 || reindex) { long startTime = System.currentTimeMillis(); appATETest.indexAndExtract(corpusDir); long endTime = System.currentTimeMillis(); LOG.info(String.format("Indexing and Candidate Extraction took [%s] milliseconds", (endTime - startTime))); } LOG.info("complete indexing and candidate extraction."); List<JATETerm> terms = null; AppATTFTest appATTFTest = new AppATTFTest(); terms = appATTFTest.rankAndFilter(server, solrCoreName, appATETest.jateProp); appATTFTest.evaluate(terms, AppATTF.class.getSimpleName()); AppChiSquareTest appChiSquareTest = new AppChiSquareTest(); terms = appChiSquareTest.rankAndFilter(server, solrCoreName, appATETest.jateProp); appChiSquareTest.evaluate(terms, AppChiSquare.class.getSimpleName()); AppCValueTest appCValueTest = new AppCValueTest(); terms = appCValueTest.rankAndFilter(server, solrCoreName, appATETest.jateProp); appCValueTest.evaluate(terms, AppCValue.class.getSimpleName()); AppGlossExTest appGlossExTest = new AppGlossExTest(); terms = appGlossExTest.rankAndFilter(server, solrCoreName, appATETest.jateProp); appGlossExTest.evaluate(terms, AppGlossEx.class.getSimpleName()); AppRAKETest appRAKETest = new AppRAKETest(); terms = appRAKETest.rankAndFilter(server, solrCoreName, appATETest.jateProp); appRAKETest.evaluate(terms, AppRAKE.class.getSimpleName()); AppRIDFTest appRIDFTest = new AppRIDFTest(); terms = appRIDFTest.rankAndFilter(server, solrCoreName, appATETest.jateProp); appRIDFTest.evaluate(terms, AppRIDF.class.getSimpleName()); AppTermExTest appTermExTest = new AppTermExTest(); terms = appTermExTest.rankAndFilter(server, solrCoreName, appATETest.jateProp); appTermExTest.evaluate(terms, AppTermEx.class.getSimpleName()); AppTFIDFTest appTFIDFTest = new AppTFIDFTest(); terms = appTFIDFTest.rankAndFilter(server, solrCoreName, appATETest.jateProp); appTFIDFTest.evaluate(terms, AppTFIDF.class.getSimpleName()); AppTTFTest appTTFTest = new AppTTFTest(); terms = appTTFTest.rankAndFilter(server, solrCoreName, appATETest.jateProp); appTTFTest.evaluate(terms, AppTTF.class.getSimpleName()); AppWeirdnessTest appWeirdnessTest = new AppWeirdnessTest(); terms = appWeirdnessTest.rankAndFilter(server, solrCoreName, appATETest.jateProp); appWeirdnessTest.evaluate(terms, AppWeirdness.class.getSimpleName()); } catch (Exception e) { e.printStackTrace(); } finally { try { server.getCoreContainer().getCore(solrCoreName).close(); server.getCoreContainer().shutdown(); server.close(); } catch (IOException e) { e.printStackTrace(); } catch (SolrException solrEx) { solrEx.printStackTrace(); } finally { System.exit(0); } } unlock(); System.exit(0); } private static void unlock() { File lock = Paths.get(solrHome.toString(), solrCoreName, "data", "index", "write.lock").toFile(); if (lock.exists()) { System.err.println("Previous solr did not shut down cleanly. Unlock it ..."); Assert.assertTrue(lock.delete()); } } } class AppATTFTest extends ACLRDTECTest { private static Logger LOG = Logger.getLogger(AppATTFTest.class.getName()); @Override List<JATETerm> rankAndFilter(EmbeddedSolrServer server, String solrCoreName, JATEProperties jateProp) throws JATEException { LOG.info("AppATTF ranking and filtering ... "); List<JATETerm> terms = new ArrayList<>(); Map<String, String> initParam = new HashMap<>(); initParam.put(AppParams.PREFILTER_MIN_TERM_TOTAL_FREQUENCY.getParamKey(), "2"); initParam.put(AppParams.CUTOFF_TOP_K_PERCENT.getParamKey(), "0.99999"); AppATTF appATTF = new AppATTF(initParam); long startTime = System.currentTimeMillis(); terms = appATTF.extract(server.getCoreContainer().getCore(solrCoreName), jateProp); long endTime = System.currentTimeMillis(); LOG.info(String.format("AppATTF ranking took [%s] milliseconds", (endTime - startTime))); LOG.info("complete ranking and filtering."); LOG.info("Export results for evaluation ..."); try { appATTF.outputFile = "attf_acltdtec.json"; appATTF.write(terms); } catch (IOException e) { throw new JATEException("Fail to export results."); } return terms; } } class AppChiSquareTest extends ACLRDTECTest { private static Logger LOG = Logger.getLogger(AppChiSquareTest.class.getName()); @Override List<JATETerm> rankAndFilter(EmbeddedSolrServer server, String solrCoreName, JATEProperties jateProp) throws JATEException { LOG.info("AppChiSquare ranking and filtering ... "); List<JATETerm> terms = new ArrayList<>(); Map<String, String> initParam = new HashMap<>(); initParam.put(AppParams.PREFILTER_MIN_TERM_TOTAL_FREQUENCY.getParamKey(), "2"); initParam.put(AppParams.CUTOFF_TOP_K_PERCENT.getParamKey(), "0.99999"); initParam.put(AppParams.PREFILTER_MIN_TERM_CONTEXT_FREQUENCY.getParamKey(), "2"); initParam.put(AppParams.CHISQUERE_FREQ_TERM_CUTOFF_PERCENTAGE.getParamKey(), "0.1"); AppChiSquare appChiSquare = new AppChiSquare(initParam); long startTime = System.currentTimeMillis(); terms = appChiSquare.extract(server.getCoreContainer().getCore(solrCoreName), jateProp); long endTime = System.currentTimeMillis(); LOG.info(String.format("AppChiSquare ranking took [%s] milliseconds", (endTime - startTime))); LOG.info("complete ranking and filtering."); LOG.info("Export results for evaluation ..."); try { appChiSquare.outputFile = "chi_square_acltdtec.json"; appChiSquare.write(terms); } catch (IOException e) { throw new JATEException("Fail to export results."); } return terms; } } class AppCValueTest extends ACLRDTECTest { private static Logger LOG = Logger.getLogger(AppCValueTest.class.getName()); @Override List<JATETerm> rankAndFilter(EmbeddedSolrServer server, String solrCoreName, JATEProperties jateProp) throws JATEException { LOG.info("AppCValue ranking and filtering ... "); List<JATETerm> terms = new ArrayList<>(); Map<String, String> initParam = new HashMap<>(); initParam.put(AppParams.PREFILTER_MIN_TERM_TOTAL_FREQUENCY.getParamKey(), "2"); initParam.put(AppParams.CUTOFF_TOP_K_PERCENT.getParamKey(), "0.99999"); AppCValue appCValue = new AppCValue(initParam); long startTime = System.currentTimeMillis(); terms = appCValue.extract(server.getCoreContainer().getCore(solrCoreName), jateProp); long endTime = System.currentTimeMillis(); LOG.info(String.format("AppCValue ranking took [%s] milliseconds", (endTime - startTime))); LOG.info("complete ranking and filtering."); LOG.info("Export results for evaluation ..."); try { appCValue.outputFile = "cvalue_acltdtec.json"; appCValue.write(terms); } catch (IOException e) { throw new JATEException("Fail to export results."); } return terms; } } class AppGlossExTest extends ACLRDTECTest { private static Logger LOG = Logger.getLogger(AppGlossExTest.class.getName()); @Override List<JATETerm> rankAndFilter(EmbeddedSolrServer server, String solrCoreName, JATEProperties jateProp) throws JATEException { LOG.info("AppGlossEx ranking and filtering ... "); List<JATETerm> terms = new ArrayList<>(); Map<String, String> initParam = new HashMap<>(); initParam.put(AppParams.PREFILTER_MIN_TERM_TOTAL_FREQUENCY.getParamKey(), "2"); initParam.put(AppParams.CUTOFF_TOP_K_PERCENT.getParamKey(), "0.99999"); initParam.put(AppParams.REFERENCE_FREQUENCY_FILE.getParamKey(), FREQ_GENIC_FILE.toString()); AppGlossEx appGlossEx = new AppGlossEx(initParam); long startTime = System.currentTimeMillis(); terms = appGlossEx.extract(server.getCoreContainer().getCore(solrCoreName), jateProp); long endTime = System.currentTimeMillis(); LOG.info(String.format("appGlossEx ranking took [%s] milliseconds", (endTime - startTime))); LOG.info("complete ranking and filtering."); LOG.info("Export results for evaluation ..."); try { appGlossEx.outputFile = "glossEx_acltdtec.json"; appGlossEx.write(terms); } catch (IOException e) { throw new JATEException("Fail to export results."); } return terms; } } class AppRAKETest extends ACLRDTECTest { private static Logger LOG = Logger.getLogger(AppRAKETest.class.getName()); @Override List<JATETerm> rankAndFilter(EmbeddedSolrServer server, String solrCoreName, JATEProperties jateProp) throws JATEException { LOG.info("AppRAKE ranking and filtering ... "); List<JATETerm> terms = new ArrayList<>(); Map<String, String> initParam = new HashMap<>(); initParam.put(AppParams.PREFILTER_MIN_TERM_TOTAL_FREQUENCY.getParamKey(), "2"); initParam.put(AppParams.CUTOFF_TOP_K_PERCENT.getParamKey(), "0.99999"); AppRAKE appRAKE = new AppRAKE(initParam); long startTime = System.currentTimeMillis(); terms = appRAKE.extract(server.getCoreContainer().getCore(solrCoreName), jateProp); long endTime = System.currentTimeMillis(); LOG.info(String.format("appRAKE ranking took [%s] milliseconds", (endTime - startTime))); LOG.info("complete ranking and filtering."); LOG.info("Export results for evaluation ..."); try { appRAKE.outputFile = "rake_acltdtec.json"; appRAKE.write(terms); } catch (IOException e) { throw new JATEException("Fail to export results."); } return terms; } } class AppRIDFTest extends ACLRDTECTest { private static Logger LOG = Logger.getLogger(AppRIDFTest.class.getName()); @Override List<JATETerm> rankAndFilter(EmbeddedSolrServer server, String solrCoreName, JATEProperties jateProp) throws JATEException { LOG.info("AppRIDF ranking and filtering ... "); List<JATETerm> terms = new ArrayList<>(); Map<String, String> initParam = new HashMap<>(); initParam.put(AppParams.PREFILTER_MIN_TERM_TOTAL_FREQUENCY.getParamKey(), "2"); initParam.put(AppParams.CUTOFF_TOP_K_PERCENT.getParamKey(), "0.99999"); AppRIDF appRIDF = new AppRIDF(initParam); long startTime = System.currentTimeMillis(); terms = appRIDF.extract(server.getCoreContainer().getCore(solrCoreName), jateProp); long endTime = System.currentTimeMillis(); LOG.info(String.format("AppRIDF ranking took [%s] milliseconds", (endTime - startTime))); LOG.info("complete ranking and filtering."); LOG.info("Export results for evaluation ..."); try { appRIDF.outputFile = "ridf_acltdtec.json"; appRIDF.write(terms); } catch (IOException e) { throw new JATEException("Fail to export results."); } return terms; } } class AppTermExTest extends ACLRDTECTest { private static Logger LOG = Logger.getLogger(AppTermExTest.class.getName()); @Override List<JATETerm> rankAndFilter(EmbeddedSolrServer server, String solrCoreName, JATEProperties jateProp) throws JATEException { LOG.info("AppTermEx ranking and filtering ... "); List<JATETerm> terms = new ArrayList<>(); Map<String, String> initParam = new HashMap<>(); initParam.put(AppParams.PREFILTER_MIN_TERM_TOTAL_FREQUENCY.getParamKey(), "2"); initParam.put(AppParams.CUTOFF_TOP_K_PERCENT.getParamKey(), "0.99999"); initParam.put(AppParams.REFERENCE_FREQUENCY_FILE.getParamKey(), FREQ_GENIC_FILE.toString()); AppTermEx appTermEx = new AppTermEx(initParam); long startTime = System.currentTimeMillis(); terms = appTermEx.extract(server.getCoreContainer().getCore(solrCoreName), jateProp); long endTime = System.currentTimeMillis(); LOG.info(String.format("AppTermEx ranking took [%s] milliseconds", (endTime - startTime))); LOG.info("complete ranking and filtering."); LOG.info("Export results for evaluation ..."); try { appTermEx.outputFile = "termEx_acltdtec.json"; appTermEx.write(terms); } catch (IOException e) { throw new JATEException("Fail to export results."); } return terms; } } class AppTFIDFTest extends ACLRDTECTest { private static Logger LOG = Logger.getLogger(AppTFIDFTest.class.getName()); @Override List<JATETerm> rankAndFilter(EmbeddedSolrServer server, String solrCoreName, JATEProperties jateProp) throws JATEException { LOG.info("AppTFIDF ranking and filtering ... "); List<JATETerm> terms = new ArrayList<>(); Map<String, String> initParam = new HashMap<>(); initParam.put(AppParams.PREFILTER_MIN_TERM_TOTAL_FREQUENCY.getParamKey(), "2"); initParam.put(AppParams.CUTOFF_TOP_K_PERCENT.getParamKey(), "0.99999"); AppTFIDF appTFIDF = new AppTFIDF(initParam); long startTime = System.currentTimeMillis(); terms = appTFIDF.extract(server.getCoreContainer().getCore(solrCoreName), jateProp); long endTime = System.currentTimeMillis(); LOG.info(String.format("AppTFIDF ranking took [%s] milliseconds", (endTime - startTime))); LOG.info("complete ranking and filtering."); LOG.info("Export results for evaluation ..."); try { appTFIDF.outputFile = "tfidf_acltdtec.json"; appTFIDF.write(terms); } catch (IOException e) { throw new JATEException("Fail to export results."); } return terms; } } class AppTTFTest extends ACLRDTECTest { private static Logger LOG = Logger.getLogger(AppTTFTest.class.getName()); @Override List<JATETerm> rankAndFilter(EmbeddedSolrServer server, String solrCoreName, JATEProperties jateProp) throws JATEException { LOG.info("AppTTF ranking and filtering ... "); List<JATETerm> terms = new ArrayList<>(); Map<String, String> initParam = new HashMap<>(); initParam.put(AppParams.PREFILTER_MIN_TERM_TOTAL_FREQUENCY.getParamKey(), "2"); initParam.put(AppParams.CUTOFF_TOP_K_PERCENT.getParamKey(), "0.99999"); AppTTF appTTF = new AppTTF(initParam); long startTime = System.currentTimeMillis(); terms = appTTF.extract(server.getCoreContainer().getCore(solrCoreName), jateProp); long endTime = System.currentTimeMillis(); LOG.info(String.format("AppTTF ranking took [%s] milliseconds", (endTime - startTime))); LOG.info("complete ranking and filtering."); LOG.info("Export results for evaluation ..."); try { appTTF.outputFile = "ttf_acltdtec.json"; appTTF.write(terms); } catch (IOException e) { throw new JATEException("Fail to export results."); } return terms; } } class AppWeirdnessTest extends ACLRDTECTest { private static Logger LOG = Logger.getLogger(AppWeirdnessTest.class.getName()); @Override List<JATETerm> rankAndFilter(EmbeddedSolrServer server, String solrCoreName, JATEProperties jateProp) throws JATEException { LOG.info("AppWeirdness ranking and filtering ... "); List<JATETerm> terms = new ArrayList<>(); Map<String, String> initParam = new HashMap<>(); initParam.put(AppParams.PREFILTER_MIN_TERM_TOTAL_FREQUENCY.getParamKey(), "2"); initParam.put(AppParams.CUTOFF_TOP_K_PERCENT.getParamKey(), "0.99999"); initParam.put(AppParams.REFERENCE_FREQUENCY_FILE.getParamKey(), FREQ_GENIC_FILE.toString()); AppWeirdness appWeirdness = new AppWeirdness(initParam); long startTime = System.currentTimeMillis(); terms = appWeirdness.extract(server.getCoreContainer().getCore(solrCoreName), jateProp); long endTime = System.currentTimeMillis(); LOG.info(String.format("AppWeirdness ranking took [%s] milliseconds", (endTime - startTime))); LOG.info("complete ranking and filtering."); LOG.info("Export results for evaluation ..."); try { appWeirdness.outputFile = "weirdness_acltdtec.json"; appWeirdness.write(terms); } catch (IOException e) { throw new JATEException("Fail to export results."); } return terms; } }