/* * Copyright 2011 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.tudarmstadt.ukp.dkpro.core.api.parameter; public final class ComponentParameters { /** * For analysis engines: Use this language instead of the document language to resolve the model * and tag set mapping. * * For readers: Set this as the language of the produced documents. */ public static final String PARAM_LANGUAGE = "language"; /** * Variant of the model. Used to address a specific model if here are multiple models * for one language. */ public static final String PARAM_PATTERNS = "patterns"; /** * Variant of a model the model. Used to address a specific model if here are multiple models * for one language. */ public static final String PARAM_VARIANT = "modelVariant"; /** * Location from which the model is read. */ public static final String PARAM_MODEL_LOCATION = "modelLocation"; /** * Location from which the segmentation model is read. */ public static final String PARAM_SEGMENTATION_MODEL_LOCATION = "segmentationModelLocation"; /** * Location from which the tokenization model is read. */ public static final String PARAM_TOKENIZATION_MODEL_LOCATION = "tokenizationModelLocation"; /** * The character encoding used by the model. */ public static final String PARAM_MODEL_ENCODING = "modelEncoding"; /** * Location from which the input is read. */ public static final String PARAM_SOURCE_LOCATION = "sourceLocation"; /** * Character encoding of the input data. */ public static final String PARAM_SOURCE_ENCODING = "sourceEncoding"; /** * Location to which the output is written. */ public static final String PARAM_TARGET_LOCATION = "targetLocation"; /** * Character encoding of the output data. */ public static final String PARAM_TARGET_ENCODING = "targetEncoding"; /** * Use this filename extension. */ public static final String PARAM_FILENAME_EXTENSION = "filenameExtension"; /** * Remove the original extension. */ public static final String PARAM_STRIP_EXTENSION = "stripExtension"; /** * Log the tag set(s) when a model is loaded. */ public static final String PARAM_PRINT_TAGSET = "printTagSet"; /** * Use the {@link String#intern()} method on tags. This is usually a good idea to avoid * spamming the heap with thousands of strings representing only a few different tags. */ public static final String PARAM_INTERN_TAGS = "internTags"; /** * When splitting an annotation into multiple parts, e.g. when splitting a token that is a * compound word into multiple tokens, each representing a part of the word, this parameter * controls if the original annotation is kept or removed. */ public static final String PARAM_DELETE_COVER = "deleteCover"; /** * Maximal sentence length in tokens that is still being processed. */ public static final String PARAM_MAX_SENTENCE_LENGTH = "maxSentenceLength"; /** * The number of threads to use for components that implement multi-threading */ public static final String PARAM_NUM_THREADS = "numThreads"; /** * Use smart number of threads if PARAM_NUM_THREADS is set to this value */ public static final String AUTO_NUM_THREADS = "0"; /** * Compute the number of threads to use for components that can make use of multi-threading. * <ul> * <li>for positive values: use the given number of threads, with the number of available CPUs maximum.</li> * <li>for negative value: use the number of available CPUs minus the given value, minimum 1.</li> * <li>for {@link #AUTO_NUM_THREADS} (0): use the number of available CPUs minus one.</li> * </ul> * * @param value the user-proposed number of threads (positive, negative, or 0) * @return the actual number of threads to use. */ public static int computeNumThreads(int value) { int cpus = Runtime.getRuntime().availableProcessors(); if (value > 0) { return Math.min(cpus, value); } else if (value < 0) { return Math.max(1, cpus + value); } else { return Math.max(1, cpus - 1); } } // ============================================================================================= // Annotation types // ============================================================================================= private static final String PARAGRAPH = "Paragraph"; private static final String SENTENCE = "Sentence"; private static final String FORM = "Form"; private static final String TOKEN = "Token"; private static final String LEMMA = "Lemma"; private static final String POS = "POS"; private static final String CPOS = "CPOS"; private static final String MORPH = "Morph"; private static final String CHUNK = "Chunk"; private static final String CONSTITUENT = "Constituent"; private static final String COREFERENCE = "Coreference"; private static final String PENN_TREE = "PennTree"; private static final String DEPENDENCY = "Dependency"; private static final String NAMED_ENTITY = "NamedEntity"; // ============================================================================================= // Verbs for parameters // ============================================================================================= private static final String READ = "read"; private static final String WRITE = "write"; // ============================================================================================= // Nouns for parameters // ============================================================================================= private static final String TAG_SET = "TagSet"; private static final String MAPPING_LOCATION = "MappingLocation"; // ============================================================================================= // Enable / disable reading or writing of particular annotation types. // ============================================================================================= public static final String PARAM_READ_PARAGRAPH = READ + PARAGRAPH; public static final String PARAM_READ_SENTENCE = READ + SENTENCE; public static final String PARAM_READ_TOKEN = READ + TOKEN; public static final String PARAM_READ_FORM = READ + FORM; public static final String PARAM_READ_LEMMA = READ + LEMMA; public static final String PARAM_READ_POS = READ + POS; public static final String PARAM_READ_CPOS = READ + CPOS; public static final String PARAM_READ_CHUNK = READ + CHUNK; public static final String PARAM_READ_MORPH = READ + MORPH; public static final String PARAM_READ_CONSTITUENT = READ + CONSTITUENT; public static final String PARAM_READ_COREFERENCE = READ + COREFERENCE; public static final String PARAM_READ_PENN_TREE = READ + PENN_TREE; public static final String PARAM_READ_DEPENDENCY = READ + DEPENDENCY; public static final String PARAM_READ_NAMED_ENTITY = READ + NAMED_ENTITY; public static final String PARAM_WRITE_PARAGRAPH = WRITE + PARAGRAPH; public static final String PARAM_WRITE_SENTENCE = WRITE + SENTENCE; public static final String PARAM_WRITE_TOKEN = WRITE + TOKEN; public static final String PARAM_WRITE_FORM = WRITE + FORM; public static final String PARAM_WRITE_LEMMA = WRITE + LEMMA; public static final String PARAM_WRITE_POS = WRITE + POS; public static final String PARAM_WRITE_CPOS = WRITE + CPOS; public static final String PARAM_WRITE_CHUNK = WRITE + CHUNK; public static final String PARAM_WRITE_MORPH = WRITE + MORPH; public static final String PARAM_WRITE_CONSTITUENT = WRITE + CONSTITUENT; public static final String PARAM_WRITE_COREFERENCE = WRITE + COREFERENCE; public static final String PARAM_WRITE_PENN_TREE = WRITE + PENN_TREE; public static final String PARAM_WRITE_DEPENDENCY = WRITE + DEPENDENCY; public static final String PARAM_WRITE_NAMED_ENTITY = WRITE + NAMED_ENTITY; // ============================================================================================= // Configure tag sets for different kinds of annotations. // // Not using the type constants here because they are capitalized for use with verbs // ============================================================================================= /** * Use this part-of-speech tag set to use to resolve the tag set mapping instead of using the * tag set defined as part of the model meta data. This can be useful if a custom model is * specified which does not have such meta data, or it can be used in readers. */ public static final String PARAM_POS_TAG_SET = POS + TAG_SET; /** * Use this chunk tag set to use to resolve the tag set mapping instead of using the * tag set defined as part of the model meta data. This can be useful if a custom model is * specified which does not have such meta data, or it can be used in readers. */ public static final String PARAM_CHUNK_TAG_SET = CHUNK + TAG_SET; public static final String PARAM_CONSTITUENT_TAG_SET = CONSTITUENT + TAG_SET; public static final String PARAM_MORPH_TAG_SET = MORPH + TAG_SET; // ============================================================================================= // Configure mapping of tags to annotation types for different kinds of annotations. // // Not using the type constants here because they are capitalized for use with verbs // ============================================================================================= /** * Location of the mapping file for part-of-speech tags to UIMA types. */ public static final String PARAM_POS_MAPPING_LOCATION = POS + MAPPING_LOCATION; /** * Location of the mapping file for constituent tags to UIMA types. */ public static final String PARAM_CONSTITUENT_MAPPING_LOCATION = CONSTITUENT + MAPPING_LOCATION; /** * Location of the mapping file for chunk tags to UIMA types. */ public static final String PARAM_CHUNK_MAPPING_LOCATION = CHUNK + MAPPING_LOCATION; /** * Location of the mapping file for named entity tags to UIMA types. */ public static final String PARAM_NAMED_ENTITY_MAPPING_LOCATION = NAMED_ENTITY + MAPPING_LOCATION; /** * Location of the mapping file for morphological analysis strings to features. */ public static final String PARAM_MORPH_MAPPING_LOCATION = MORPH + MAPPING_LOCATION; /** * Location of the mapping file for dependency tags to UIMA types. */ public static final String PARAM_DEPENDENCY_MAPPING_LOCATION = DEPENDENCY + MAPPING_LOCATION; private ComponentParameters() { // No instances of this class } }