package org.wikibrain.loader; import org.apache.commons.cli.*; import org.wikibrain.conf.ConfigurationException; import org.wikibrain.conf.DefaultOptionBuilder; import org.wikibrain.core.WikiBrainException; import org.wikibrain.core.cmd.Env; import org.wikibrain.core.cmd.EnvBuilder; import org.wikibrain.core.dao.DaoException; import org.wikibrain.core.dao.MetaInfoDao; import org.wikibrain.core.lang.Language; import org.wikibrain.phrases.PhraseAnalyzer; import org.wikibrain.phrases.PrunedCounts; import org.wikibrain.phrases.StanfordPhraseAnalyzer; import java.io.IOException; import java.sql.SQLException; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** */ public class PhraseLoader { private static final Logger LOG = LoggerFactory.getLogger(PhraseLoader.class); public static void main(String args[]) throws ClassNotFoundException, SQLException, IOException, ConfigurationException, WikiBrainException, DaoException, InterruptedException { Options options = new Options(); options.addOption( new DefaultOptionBuilder() .hasArgs() .withLongOpt("analyzer") .withDescription("the name of the phrase analyzer to use") .create("p")); EnvBuilder.addStandardOptions(options); CommandLineParser parser = new PosixParser(); CommandLine cmd; try { cmd = parser.parse(options, args); } catch (ParseException e) { System.err.println( "Invalid option usage: " + e.getMessage()); new HelpFormatter().printHelp("ConceptLoader", options); return; } Map<String, String> confOverrides = new HashMap<String, String>(); confOverrides.put("phrases.loading", "true"); Env env = new EnvBuilder(cmd).setProperty("phrases.loading", true).build(); List<String> toLoad = env.getConfiguration().get().getStringList("phrases.toLoad"); if (cmd.hasOption("p")) { toLoad = Arrays.asList(cmd.getOptionValues("p")); } if (toLoad.contains("stanford")) { StanfordPhraseAnalyzer.downloadDictionaryIfNecessary(env.getConfiguration()); } int n = 0; for (String name : toLoad) { PhraseAnalyzer analyzer = env.getConfigurator().get(PhraseAnalyzer.class, name); LOG.info("LOADING PHRASE CORPUS FOR " + name); n += analyzer.loadCorpus(env.getLanguages()); LOG.info("DONE"); } MetaInfoDao metaDao = env.getConfigurator().get(MetaInfoDao.class); for (Language lang : env.getLanguages()) { metaDao.incrementRecords(PrunedCounts.class, lang, n); } // For some reasons this appears to hang without this line. System.exit(0); } }