/** * Copyright (C) 2012 cogroo <cogroo@cogroo.org> * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * This code derived from Apache OpenNLP. Please keep the header. */ package org.cogroo.cmdline; import java.util.Collections; import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import org.cogroo.cmdline.chunker2.Chunker2ConverterTool; import org.cogroo.cmdline.chunker2.Chunker2CrossValidatorTool; import org.cogroo.cmdline.chunker2.Chunker2EvaluatorTool; import org.cogroo.cmdline.chunker2.Chunker2Tool; import org.cogroo.cmdline.chunker2.Chunker2TrainerTool; import org.cogroo.cmdline.dictionary.AbbreviationDictionaryBuilderTool; import org.cogroo.cmdline.featurizer.FeaturizerConverterTool; import org.cogroo.cmdline.featurizer.FeaturizerCrossValidatorTool; import org.cogroo.cmdline.featurizer.FeaturizerEvaluatorTool; import org.cogroo.cmdline.featurizer.FeaturizerMETool; import org.cogroo.cmdline.featurizer.FeaturizerTrainerTool; import org.cogroo.formats.FeatureSampleStreamFactory; import org.cogroo.formats.ad.ADChunkBasedHeadFinderSampleStreamFactory; import org.cogroo.formats.ad.ADChunkBasedShallowParserSampleStreamFactory; import org.cogroo.formats.ad.ADChunkSampleStreamFactory; import org.cogroo.formats.ad.ADContractionNameSampleStreamFactory; import org.cogroo.formats.ad.ADExPOSSampleStreamFactory; import org.cogroo.formats.ad.ADExpNameSampleStreamFactory; import org.cogroo.formats.ad.ADFeatureSampleStreamFactory; import opennlp.tools.cmdline.BasicCmdLineTool; import opennlp.tools.cmdline.CmdLineTool; import opennlp.tools.cmdline.StreamFactoryRegistry; import opennlp.tools.cmdline.TerminateToolException; import opennlp.tools.cmdline.TypedCmdLineTool; import opennlp.tools.cmdline.namefind.TokenNameFinderCrossValidatorTool; import opennlp.tools.cmdline.namefind.TokenNameFinderEvaluatorTool; import opennlp.tools.cmdline.namefind.TokenNameFinderTool; import opennlp.tools.cmdline.namefind.TokenNameFinderTrainerTool; import opennlp.tools.cmdline.postag.POSTaggerCrossValidatorTool; import opennlp.tools.cmdline.postag.POSTaggerTrainerTool; import opennlp.tools.util.Version; public final class CLI { public static final String CMD = "cogroo-nlp"; private static Map<String, CmdLineTool> toolLookupMap; static { // Register other types FeatureSampleStreamFactory.registerFactory(); ADFeatureSampleStreamFactory.registerFactory(); ADContractionNameSampleStreamFactory.registerFactory(); ADExpNameSampleStreamFactory.registerFactory(); ADExPOSSampleStreamFactory.registerFactory(); ADChunkBasedHeadFinderSampleStreamFactory.registerFactory(); ADChunkBasedShallowParserSampleStreamFactory.registerFactory(); ADChunkSampleStreamFactory.registerFactory(); toolLookupMap = new LinkedHashMap<String, CmdLineTool>(); List<CmdLineTool> tools = new LinkedList<CmdLineTool>(); // Featurizer tools.add(new FeaturizerMETool()); tools.add(new FeaturizerTrainerTool()); tools.add(new FeaturizerEvaluatorTool()); tools.add(new FeaturizerCrossValidatorTool()); tools.add(new FeaturizerConverterTool()); // Chunker2 tools.add(new Chunker2Tool()); tools.add(new Chunker2TrainerTool()); tools.add(new Chunker2EvaluatorTool()); tools.add(new Chunker2CrossValidatorTool()); tools.add(new Chunker2ConverterTool()); // Contraction tools.add(new AbbreviationDictionaryBuilderTool()); tools.add(new TokenNameFinderTool()); tools.add(new TokenNameFinderTrainerTool()); tools.add(new TokenNameFinderEvaluatorTool()); tools.add(new TokenNameFinderCrossValidatorTool()); // tagger tools.add(new POSTaggerTrainerTool()); tools.add(new POSTaggerCrossValidatorTool()); // Chunker // tools.add(new Chunker2TrainerTool()); // tools.add(new Chunker2CrossValidatorTool()); for (CmdLineTool tool : tools) { toolLookupMap.put(tool.getName(), tool); } toolLookupMap = Collections.unmodifiableMap(toolLookupMap); } /** * @return a set which contains all tool names */ public static Set<String> getToolNames() { return toolLookupMap.keySet(); } private static void usage() { System.out.print("CoGrOO Common " + Version.currentVersion().toString() + ". "); System.out.println("Usage: " + CMD + " TOOL"); System.out.println("where TOOL is one of:"); // distance of tool name from line start int numberOfSpaces = -1; for (String toolName : toolLookupMap.keySet()) { if (toolName.length() > numberOfSpaces) { numberOfSpaces = toolName.length(); } } numberOfSpaces = numberOfSpaces + 4; for (CmdLineTool tool : toolLookupMap.values()) { System.out.print(" " + tool.getName()); for (int i = 0; i < Math.abs(tool.getName().length() - numberOfSpaces); i++) { System.out.print(" "); } System.out.println(tool.getShortDescription()); } System.out.println("All tools print help when invoked with help parameter"); System.out.println("Example: opennlp SimpleTokenizer help"); } public static void main(String[] args) { if (args.length == 0) { usage(); System.exit(0); } String toolArguments[] = new String[args.length - 1]; System.arraycopy(args, 1, toolArguments, 0, toolArguments.length); String toolName = args[0]; // check for format String formatName = StreamFactoryRegistry.DEFAULT_FORMAT; int idx = toolName.indexOf("."); if (-1 < idx) { formatName = toolName.substring(idx + 1); toolName = toolName.substring(0, idx); } CmdLineTool tool = toolLookupMap.get(toolName); try { if (null == tool) { throw new TerminateToolException(1, "Tool " + toolName + " is not found."); } if (0 == toolArguments.length || 0 < toolArguments.length && "help".equals(toolArguments[0])) { if (tool instanceof TypedCmdLineTool) { System.out.println(((TypedCmdLineTool) tool).getHelp(formatName)); } else if (tool instanceof BasicCmdLineTool) { System.out.println(tool.getHelp()); } System.exit(0); } if (tool instanceof TypedCmdLineTool) { ((TypedCmdLineTool) tool).run(formatName, toolArguments); } else if (tool instanceof BasicCmdLineTool) { if (-1 == idx) { ((BasicCmdLineTool) tool).run(toolArguments); } else { throw new TerminateToolException(1, "Tool " + toolName + " does not support formats."); } } else { throw new TerminateToolException(1, "Tool " + toolName + " is not supported."); } } catch (TerminateToolException e) { if (e.getMessage() != null) System.err.println(e.getMessage()); System.exit(e.getCode()); } } }