/* * #! * Ontopia Classify * #- * Copyright (C) 2001 - 2013 The Ontopia Project * #- * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * !# */ package net.ontopia.topicmaps.classify; import net.ontopia.utils.CmdlineOptions; import net.ontopia.utils.CmdlineUtils; import net.ontopia.topicmaps.core.TopicMapIF; import net.ontopia.topicmaps.utils.ImportExportUtils; /** * PUBLIC: Command-line tool for extracting keywords from a document. */ public class Chew { public static void main(String[] argv) throws Exception { // Initialize logging CmdlineUtils.initializeLogging(); // Register logging options CmdlineOptions options = new CmdlineOptions("Chew", argv); CmdlineUtils.registerLoggingOptions(options); OptionsListener ohandler = new OptionsListener(); options.addLong(ohandler, "terms", 't', true); // Parse command line options try { options.parse(); } catch (CmdlineOptions.OptionsException e) { System.err.println("Error: " + e.getMessage()); System.exit(1); } // Get command line arguments String[] args = options.getArguments(); if (args.length == 0 || args.length > 2) { usage(); System.exit(3); } String infile = (args.length == 1 ? args[0] : args[1]); // load the topic maps TopicMapIF topicmap = (args.length == 2 ? ImportExportUtils.getReader(args[0]).read() : null); // rank and dump TermDatabase tdb = SimpleClassifier.classify(infile, topicmap); tdb.dump(ohandler.terms); } private static void usage() { System.out.println("java net.ontopia.topicmaps.classify.Chew [options] <topicmapuri> <inputfile>"); System.out.println(""); System.out.println(" Performs auto-classification of a document against a topic map."); System.out.println(""); System.out.println(" Options:"); CmdlineUtils.printLoggingOptionsUsage(System.out); System.out.println(" --terms=<number> : number of terms to output (default: 30)"); System.out.println(""); System.out.println(" <topicmapuri>: the topic map to classify against (optional)"); System.out.println(" <inputfile>: the document to classify"); System.out.println(""); } private static class OptionsListener implements CmdlineOptions.ListenerIF { int terms = 30; public void processOption(char option, String value) { if (option == 't') terms = Integer.valueOf(value).intValue(); } } }