/*******************************************************************************
* Copyright 2015-2016 - CNRS (Centre National de Recherche Scientifique)
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*******************************************************************************/
package eu.project.ttc.tools;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import org.apache.uima.resource.RelativePathResolver;
import org.apache.uima.resource.impl.RelativePathResolver_impl;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Preconditions;
import eu.project.ttc.engines.desc.Lang;
import eu.project.ttc.engines.desc.TermSuiteResourceException;
import eu.project.ttc.utils.TermSuiteConstants;
import eu.project.ttc.utils.URLUtils;
/**
*
* A meta-type for TermSuite linguistic resources.
*
* @author Damien Cram
*
*/
public enum TermSuiteResource {
GENERAL_LANGUAGE("[LANG_SHORT]/[LANG]-general-language.txt", "", ""),
PREFIX_BANK("[LANG_SHORT]/morphology/[LANG]-prefix-bank.txt", "", ""),
PREFIX_EXCEPTIONS("[LANG_SHORT]/morphology/[LANG]-prefix-exceptions.txt", "", ""),
MANUAL_COMPOSITIONS("[LANG_SHORT]/morphology/[LANG]-manual-composition.txt", "", ""),
ROOT_BANK("[LANG_SHORT]/morphology/[LANG]-root-bank.txt", "", ""),
ALLOWED_CHARS("[LANG_SHORT]/[LANG]-allowed-chars.txt", "", ""),
SUFFIX_DERIVATIONS("[LANG_SHORT]/morphology/[LANG]-suffix-derivation-bank.txt", "", ""),
SUFFIX_DERIVATION_EXCEPTIONS("[LANG_SHORT]/morphology/[LANG]-suffix-derivation-exceptions.txt", "", ""),
COMPOST_INFLECTION_RULES("[LANG_SHORT]/morphology/[LANG]-compost-inflection-rules.txt", "", ""),
COMPOST_STOP_LIST("[LANG_SHORT]/morphology/[LANG]-compost-stop-list.txt", "", ""),
COMPOST_TRANSFORMATION_RULES("[LANG_SHORT]/morphology/[LANG]-compost-transformation-rules.txt", "", ""),
DICO("[LANG_SHORT]/[LANG]-dico.txt", "", ""),
FIXED_EXPRESSIONS("[LANG_SHORT]/[LANG]-fixed-expressions.txt", "", ""),
TAGGER_CASE_MAPPING("[LANG_SHORT]/tagging/[TAGGER]/[LANG]-[TAGGER_SHORT]-case-mapping.xml", "", ""),
TAGGER_CATEGORY_MAPPING("[LANG_SHORT]/tagging/[TAGGER]/[LANG]-[TAGGER_SHORT]-category-mapping.xml", "", ""),
TAGGER_GENDER_MAPPING("[LANG_SHORT]/tagging/[TAGGER]/[LANG]-[TAGGER_SHORT]-gender-mapping.xml", "", ""),
TAGGER_MOOD_MAPPING("[LANG_SHORT]/tagging/[TAGGER]/[LANG]-[TAGGER_SHORT]-mood-mapping.xml", "", ""),
TAGGER_NUMBER_MAPPING("[LANG_SHORT]/tagging/[TAGGER]/[LANG]-[TAGGER_SHORT]-number-mapping.xml", "", ""),
TAGGER_SUBCATEGORY_MAPPING("[LANG_SHORT]/tagging/[TAGGER]/[LANG]-[TAGGER_SHORT]-subcategory-mapping.xml", "", ""),
TAGGER_TENSE_MAPPING("[LANG_SHORT]/tagging/[TAGGER]/[LANG]-[TAGGER_SHORT]-tense-mapping.xml", "", ""),
MWT_RULES("[LANG_SHORT]/[LANG]-multi-word-rule-system.regex", "", ""),
NEOCLASSICAL_PREFIXES("[LANG_SHORT]/morphology/[LANG]-neoclassical-prefixes.txt", "", ""),
SEGMENT_BANK("[LANG_SHORT]/[LANG]-segment-bank.xml", "", ""),
STOP_WORDS_FILTER("[LANG_SHORT]/[LANG]-stop-word-filter.xml", "", ""),
TREETAGGER_CONFIG("[LANG_SHORT]/tagging/[TAGGER]/[LANG]-treetagger.xml", "", ""),
VARIANTS("[LANG_SHORT]/[LANG]-variants.yaml", "", ""),
;
private static final String MSG_ERR_RESOURCE_NOT_FOUND = "Resource %s does not exist for resource %s (resolved URL is %s)";
private static final Logger LOGGER = LoggerFactory.getLogger(TermSuiteResource.class);
private String pathPattern;
private String title;
private String description;
private TermSuiteResource(String pathPattern, String title, String description) {
this.pathPattern = pathPattern;
this.title = title;
this.description = description;
}
public String getPathPattern() {
return pathPattern;
}
public String getTitle() {
return title;
}
public String getDescription() {
return description;
}
private static final String TAGGER_SHORT_PATTERN = "[TAGGER_SHORT]";
private static final String TAGGER_PATTERN = "[TAGGER]";
private static final String LANG_PATTERN = "[LANG]";
private static final String LANG_SHORT_PATTERN = "[LANG_SHORT]";
private URL checkUrl(URL url) {
Preconditions.checkNotNull(url, "Failed to load resource %s. Url is null.", this);
try(InputStream is = url.openStream()) {
Preconditions.checkNotNull(is, "Failed to load resource %s. Got a null input stream for url %s",
this, url);
return url;
} catch(IOException e) {
throw new TermSuiteResourceException(
String.format("Cannot open stream for resource %s and url %s", this, url),
e);
}
}
private static RelativePathResolver resolver = null;
private static RelativePathResolver getResolver() {
if(resolver == null)
resolver = new RelativePathResolver_impl(TermSuiteResource.class.getClassLoader());
return resolver;
}
public URL fromClasspath(Lang lang) {
URL url = fromClassPathUnchecked(lang);
return checkUrl(url);
}
public URL fromClassPathUnchecked(Lang lang) {
String classpathPath = TermSuiteConstants.DEFAULT_RESOURCE_URL_PREFIX + getPath(lang);
URL url = getClass().getResource(classpathPath);
return url;
}
public boolean existsInClasspath(Lang lang, Tagger tagger) {
return fromClassPathUnchecked(lang, tagger) != null;
}
public boolean exists(Lang lang) {
return fromClassPathUnchecked(lang) != null;
}
public URL fromClasspath(Lang lang, Tagger tagger) {
URL url = fromClassPathUnchecked(lang, tagger);
return checkUrl(url);
}
public URL fromClassPathUnchecked(Lang lang, Tagger tagger) {
String classpathPath =TermSuiteConstants.DEFAULT_RESOURCE_URL_PREFIX + getPath(lang, tagger);
URL url = getClass().getResource(classpathPath);
return url;
}
public URL fromUrlPrefix(URL prefix, Lang lang) {
URL url = resolve(prefix, lang, null);
return checkUrl(url);
}
public URL fromUrlPrefix(URL prefix, Lang lang, Tagger tagger) {
URL url = resolve(prefix, lang, tagger);
return checkUrl(url);
}
private URL resolve(URL prefix, Lang lang, Tagger tagger) {
try {
return URLUtils.join(prefix, getPath(lang,tagger));
} catch (MalformedURLException e) {
LOGGER.error("failed to build url: " + prefix.toString() + getPath(lang,tagger));
throw new RuntimeException(e);
}
}
public String getPath(Lang lang) {
return getPath(lang, null);
}
public String getPath(Lang lang, Tagger tagger) {
Preconditions.checkNotNull(lang);
String path = getPathPattern()
.replace(LANG_SHORT_PATTERN, lang.getCode())
.replace(LANG_PATTERN, lang.getName().toLowerCase());
if(getPathPattern().contains(TAGGER_PATTERN) || getPathPattern().contains(TAGGER_SHORT_PATTERN)) {
Preconditions.checkArgument(
tagger != null,
"Tagger should not be nil for resource %s.",
this.toString().toLowerCase());
path = path
.replace(TAGGER_SHORT_PATTERN, tagger.getShortName())
.replace(TAGGER_PATTERN, tagger.getName());
}
return path;
}
public static final TermSuiteResource forFileName(String fileName) {
for(Lang l:Lang.values()) {
for(Tagger t:Tagger.values()) {
for(TermSuiteResource r:TermSuiteResource.values())
if(r.getPath(l, t).equals(fileName))
return r;
}
}
return null;
}
}