package ivory.bloomir.preprocessing; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import ivory.bloomir.data.CompressedPostingsIO; import ivory.bloomir.util.OptionManager; import ivory.core.RetrievalEnvironment; public class GenerateCompressedPostings { public static void main(String[] args) throws Exception { OptionManager options = new OptionManager(GenerateCompressedPostings.class.getName()); options.addOption(OptionManager.INDEX_ROOT_PATH, "path", "index root", true); options.addOption(OptionManager.OUTPUT_PATH, "path", "output root", true); options.addOption(OptionManager.SPAM_PATH, "path", "spam percentile score", true); try { options.parse(args); } catch(Exception exp) { return; } final String input = options.getOptionValue(OptionManager.INDEX_ROOT_PATH); final String output = options.getOptionValue(OptionManager.OUTPUT_PATH); final String spam = options.getOptionValue(OptionManager.SPAM_PATH); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); RetrievalEnvironment env = new RetrievalEnvironment(input, fs); env.initialize(false); CompressedPostingsIO.writePostings(output, fs, env, spam); } }