/* LanguageTool, a natural language style checker * Copyright (C) 2012 Daniel Naber (http://www.danielnaber.de) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 * USA */ package org.languagetool.commandline; import java.io.File; import java.io.PrintStream; import java.util.Arrays; import org.languagetool.Language; import org.languagetool.Languages; /** * Parser for the command line arguments. */ public class CommandLineParser { public CommandLineOptions parseOptions(String[] args) { if (args.length < 1 || args.length > 12) { throw new WrongParameterNumberException(); } CommandLineOptions options = new CommandLineOptions(); for (int i = 0; i < args.length; i++) { if (args[i].equals("--version")) { options.setPrintVersion(true); } else if (args[i].equals("--list")) { options.setPrintLanguages(true); } else if (args[i].equals("-h") || args[i].equals("-help") || args[i].equals("--help") || args[i].equals("--?")) { options.setPrintUsage(true); } else if (args[i].equals("-adl") || args[i].equals("--autoDetect")) { // set autoDetect flag options.setAutoDetect(true); } else if (args[i].equals("-v") || args[i].equals("--verbose")) { options.setVerbose(true); } else if (args[i].equals("--line-by-line")) { options.setLineByLine(true); } else if (args[i].equals("-t") || args[i].equals("--taggeronly")) { options.setTaggerOnly(true); if (options.isListUnknown()) { throw new IllegalArgumentException("You cannot list unknown words when tagging only"); } if (options.isApplySuggestions()) { throw new IllegalArgumentException("You cannot apply suggestions when tagging only"); } } else if (args[i].equals("-r") || args[i].equals("--recursive")) { options.setRecursive(true); } else if (args[i].equals("-b2") || args[i].equals("--bitext")) { options.setBitext(true); } else if (args[i].equals("-eo") || args[i].equals("--enabledonly")) { if (options.getDisabledRules().size() > 0) { throw new IllegalArgumentException("You cannot specify both disabled rules and enabledonly"); } options.setUseEnabledOnly(); } else if (args[i].equals("-d") || args[i].equals("--disable")) { if (options.isUseEnabledOnly()) { throw new IllegalArgumentException("You cannot specify both disabled rules and enabledonly"); } checkArguments("-d/--disable", i, args); String rules = args[++i]; options.setDisabledRules(Arrays.asList(rules.split(","))); } else if (args[i].equals("-e") || args[i].equals("--enable")) { checkArguments("-e/--enable", i, args); String rules = args[++i]; options.setEnabledRules(Arrays.asList(rules.split(","))); } else if (args[i].equals("--enablecategories")) { checkArguments("--enablecategories", i, args); String categories = args[++i]; options.setEnabledCategories(Arrays.asList(categories.split(","))); } else if (args[i].equals("--disablecategories")) { checkArguments("--disablecategories", i, args); String categories = args[++i]; options.setDisabledCategories(Arrays.asList(categories.split(","))); } else if (args[i].equals("-l") || args[i].equals("--language")) { checkArguments("-l/--language", i, args); options.setLanguage(getLanguage(args[++i])); } else if (args[i].equals("-m") || args[i].equals("--mothertongue")) { checkArguments("-m/--mothertongue", i, args); options.setMotherTongue(getLanguage(args[++i])); } else if (args[i].equals("--languagemodel")) { checkArguments("--languagemodel", i, args); options.setLanguageModel(new File(args[++i])); } else if (args[i].equals("--rulefile")) { checkArguments("--rulefile", i, args); options.setRuleFile(args[++i]); } else if (args[i].equals("--falsefriends")) { checkArguments("--falsefriends", i, args); options.setFalseFriendFile(args[++i]); } else if (args[i].equals("--bitextrules")) { checkArguments("--bitextrules", i, args); options.setBitextRuleFile(args[++i]); } else if (args[i].equals("-c") || args[i].equals("--encoding")) { checkArguments("-c/--encoding", i, args); options.setEncoding(args[++i]); } else if (args[i].equals("-u") || args[i].equals("--list-unknown")) { options.setListUnknown(true); if (options.isTaggerOnly()) { throw new IllegalArgumentException("You cannot list unknown words when tagging only"); } } else if (args[i].equals("-b")) { options.setSingleLineBreakMarksParagraph(true); } else if (args[i].equals("--json")) { options.setJsonFormat(); if (options.isApplySuggestions()) { throw new IllegalArgumentException("JSON output format makes no sense for automatic application of suggestions"); } if (options.isLineByLine()) { throw new IllegalArgumentException("JSON output format is not implemented for \"line by line\" analysis"); } if (options.isBitext()) { throw new IllegalArgumentException("JSON output format is not implemented for Bitext"); } if (options.isListUnknown()) { throw new IllegalArgumentException("You cannot list unknown words in JSON output format"); } } else if (args[i].equals("--api")) { options.setXmlFormat(); if (options.isApplySuggestions()) { throw new IllegalArgumentException("XML API format makes no sense for automatic application of suggestions"); } } else if (args[i].equals("-a") || args[i].equals("--apply")) { options.setApplySuggestions(true); if (options.isTaggerOnly()) { throw new IllegalArgumentException("You cannot apply suggestions when tagging only"); } if (options.isXmlFormat()) { throw new IllegalArgumentException("XML API format makes no sense for automatic application of suggestions"); } if (options.isJsonFormat()) { throw new IllegalArgumentException("JSON output format makes no sense for automatic application of suggestions"); } } else if (args[i].equals("-p") || args[i].equals("--profile")) { options.setProfile(true); if (options.isXmlFormat()) { throw new IllegalArgumentException("XML API format makes no sense for profiling"); } if (options.isJsonFormat()) { throw new IllegalArgumentException("JSON output format makes no sense for profiling"); } if (options.isApplySuggestions()) { throw new IllegalArgumentException("Applying suggestions makes no sense for profiling"); } if (options.isTaggerOnly()) { throw new IllegalArgumentException("Tagging makes no sense for profiling"); } } else if (args[i].equals("--xmlfilter")) { options.setXmlFiltering(true); } else if (i == args.length - 1) { options.setFilename(args[i]); } else { throw new UnknownParameterException("Unknown parameter: " + args[i]); } } return options; } public void printUsage() { printUsage(System.out); } /* * NOTE: please keep http://wiki.languagetool.org/command-line-options * up-to-date if you add an option */ public void printUsage(PrintStream stream) { stream.println("Usage: java -jar languagetool-commandline.jar [OPTION]... FILE\n" + " FILE plain text file to be checked\n" + " Available options:\n" + " -r, --recursive work recursively on directory, not on a single file\n" + " -c, --encoding ENC character set of the input text, e.g. utf-8 or latin1\n" + " -b assume that a single line break marks the end of a paragraph\n" + " -l, --language LANG the language code of the text, e.g. en for English, en-GB for British English\n" + " --list print all available languages and exit\n" + " -adl, --autoDetect auto-detect the language of the input text - note this will not detect\n" + " variants like 'English (US)', so you will not get spell checking for\n" + " languages with variants\n" + " -m, --mothertongue LANG the language code of your first language, used to activate false-friend checking\n" + " -d, --disable RULES a comma-separated list of rule ids to be disabled (use no spaces between ids)\n" + " -e, --enable RULES a comma-separated list of rule ids to be enabled (use no spaces between ids)\n" + " -eo, --enabledonly disable all rules except those enabled explicitly in -e\n" + " --enablecategories CAT a comma-separated list of category ids to be enabled (use no spaces between ids)\n" + " --disablecategories CAT a comma-separated list of category ids to be disabled (use no spaces between ids)\n" + " -t, --taggeronly don't check, but only print text analysis (sentences, part-of-speech tags)\n" + " -u, --list-unknown also print a summary of words from the input that LanguageTool doesn't know\n" + " -b2, --bitext check bilingual texts with a tab-separated input file,\n" + " see http://languagetool.wikidot.com/checking-translations-bilingual-texts\n" + " --api print results as XML (deprecated, please use --json or the JSON API in server mode)\n" + " --json print results as JSON\n" + " -p, --profile print performance measurements\n" + " -v, --verbose print text analysis (sentences, part-of-speech tags) to STDERR\n" + " --version print LanguageTool version number and exit\n" + " -a, --apply automatically apply suggestions if available, printing result to STDOUT\n" + " NOTE: only use with very robust rules, as this will otherwise introduce new errors\n" + " --rulefile FILE use an additional grammar file; if the filename contains a known language code,\n" + " it is used in addition of standard rules\n" + " --falsefriends FILE use external false friend file to be used along with the built-in rules\n" + " --bitextrules FILE use external bitext XML rule file (useful only in bitext mode)\n" + " --languagemodel DIR a directory with e.g. 'en' sub directory (i.e. a language code) that contains\n" + " '1grams'...'3grams' sub directories with Lucene indexes with\n" + " ngram occurrence counts; activates the confusion rule if supported\n" + " --xmlfilter remove XML/HTML elements from input before checking (deprecated)\n" + " --line-by-line work on file line by line (for development, e.g. inside an IDE)" ); } private void checkArguments(String option, int argParsingPos, String[] args) { if (argParsingPos + 1 >= args.length) { throw new IllegalArgumentException("Missing argument to " + option + " command line option."); } } private Language getLanguage(String userSuppliedLangCode) { return Languages.getLanguageForShortCode(userSuppliedLangCode); } }