/* * #%L * NICTA t3as MetaMap Tagger * %% * Copyright (C) 2014 NICTA * %% * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this program. If not, see * <http://www.gnu.org/licenses/gpl-3.0.html>. * * Additional permission under GNU GPL version 3 section 7 * * If you modify this Program, or any covered work, by linking or combining * it with H2, GWT, or JavaBeans Activation Framework (JAF) (or a * modified version of those libraries), containing parts covered by the * terms of the H2 License, the GWT Terms, or the Common Development and * Distribution License (CDDL) version 1.0 ,the licensors of this Program * grant you additional permission to convey the resulting work. * #L% */ package org.t3as.metamap.options; import com.google.common.base.Splitter; import com.google.common.base.Strings; import com.google.common.collect.ImmutableCollection; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import org.t3as.metamap.SemanticTypes; import java.util.List; import java.util.regex.Pattern; import static com.google.common.base.CharMatcher.JAVA_LETTER_OR_DIGIT; import static com.google.common.base.CharMatcher.anyOf; import static org.t3as.metamap.options.RestrictToSources.SNOMEDCT_US; public abstract class MetaMapOptions { @SuppressWarnings("PublicStaticCollectionField") public static final ImmutableCollection<Option> DEFAULT_MM_OPTIONS; private static final ImmutableMap<String, Option> OPTS; private static final Pattern SPACE = Pattern.compile(" "); static { // PUT NEW OPTIONS IN THIS LIST OPTS = ImmutableMap.<String, Option>builder() .put(WordSenseDisambiguation.NAME, new WordSenseDisambiguation()) .put(CompositePhrases.NAME, new CompositePhrases()) .put(NoDerivationalVariants.NAME, new NoDerivationalVariants()) .put(StrictModel.NAME, new StrictModel()) .put(IgnoreWordOrder.NAME, new IgnoreWordOrder()) .put(AllowLargeN.NAME, new AllowLargeN()) .put(IgnoreStopPhrases.NAME, new IgnoreStopPhrases()) .put(AllAcrosAbbrs.NAME, new AllAcrosAbbrs()) .put(RestrictToSources.NAME, new RestrictToSources()) .put(RestrictToSts.NAME, new RestrictToSts()) .build(); DEFAULT_MM_OPTIONS = ImmutableList.of( new WordSenseDisambiguation(), new CompositePhrases(8), new NoDerivationalVariants(), new StrictModel(), new IgnoreWordOrder(), new AllowLargeN(), new RestrictToSources(ImmutableList.of(SNOMEDCT_US)), new RestrictToSts(SemanticTypes.DEFAULT_MM_SEMANTIC_TYPES)); } private MetaMapOptions() {} /** * Parse an option string (without the leading double hyphens) into an option to pass to MetaMap, * e.g. "word_sense_disambiguation". */ @SuppressWarnings("ReturnOfNull") public static Option strToOpt(final String optStr) { final String[] parts = SPACE.split(optStr, 2); final String name = parts[0]; final String param = 1 < parts.length ? parts[1] : null; final Option opt = OPTS.get(name); return opt == null ? null : opt.newInstance(param); } /*package-private*/ static List<String> sanitiseAndSplit(final String param) { if (Strings.isNullOrEmpty(param)) return ImmutableList.of(); // sanitize the user input, only keep letters, digits, and any of a small number of approved params final String sanitized = JAVA_LETTER_OR_DIGIT.or(anyOf(".,_-[]")).retainFrom(param); return Splitter.on(',').trimResults().omitEmptyStrings().splitToList(sanitized); } }