/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.stanbol.enhancer.nlp.pos.olia; import org.apache.clerezza.commons.rdf.IRI; import org.apache.stanbol.enhancer.nlp.model.tag.TagSet; import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory; import org.apache.stanbol.enhancer.nlp.pos.Pos; import org.apache.stanbol.enhancer.nlp.pos.PosTag; /** * Defines {@link TagSet}s for the English language.<p> * TODO: this is currently done manually but it should be able to generate this * based on the <a herf="http://nlp2rdf.lod2.eu/olia/">OLIA</a> Ontologies * @author Rupert Westenthaler * */ public final class English { private English(){} public static final TagSet<PosTag> PENN_TREEBANK = new TagSet<PosTag>( "Penn Treebank", "en"); static { //TODO: define constants for annotation model and linking model PENN_TREEBANK.getProperties().put("olia.annotationModel", new IRI("http://purl.org/olia/penn.owl")); PENN_TREEBANK.getProperties().put("olia.linkingModel", new IRI("http://purl.org/olia/penn-link.rdf")); PENN_TREEBANK.addTag(new PosTag("CC", Pos.CoordinatingConjunction)); PENN_TREEBANK.addTag(new PosTag("CD",Pos.CardinalNumber)); PENN_TREEBANK.addTag(new PosTag("DT",Pos.Determiner)); PENN_TREEBANK.addTag(new PosTag("EX",Pos.ExistentialParticle)); //TODO: unsure mapping PENN_TREEBANK.addTag(new PosTag("FW",Pos.Foreign)); PENN_TREEBANK.addTag(new PosTag("IN",Pos.Preposition, Pos.SubordinatingConjunction)); PENN_TREEBANK.addTag(new PosTag("JJ",LexicalCategory.Adjective)); PENN_TREEBANK.addTag(new PosTag("JJR",LexicalCategory.Adjective, Pos.ComparativeParticle)); PENN_TREEBANK.addTag(new PosTag("JJS",LexicalCategory.Adjective, Pos.SuperlativeParticle)); PENN_TREEBANK.addTag(new PosTag("LS",Pos.ListMarker)); PENN_TREEBANK.addTag(new PosTag("MD",Pos.ModalVerb)); PENN_TREEBANK.addTag(new PosTag("NN",Pos.CommonNoun, Pos.SingularQuantifier)); PENN_TREEBANK.addTag(new PosTag("NNP",Pos.ProperNoun, Pos.SingularQuantifier)); PENN_TREEBANK.addTag(new PosTag("NNPS",Pos.ProperNoun, Pos.PluralQuantifier)); PENN_TREEBANK.addTag(new PosTag("NNS",Pos.CommonNoun, Pos.PluralQuantifier)); PENN_TREEBANK.addTag(new PosTag("PDT",Pos.Determiner)); //TODO should be Pre-Determiner PENN_TREEBANK.addTag(new PosTag("POS")); //TODO: map Possessive Ending (e.g., Nouns ending in 's) PENN_TREEBANK.addTag(new PosTag("PP",Pos.PersonalPronoun)); PENN_TREEBANK.addTag(new PosTag("PP$",Pos.PossessivePronoun)); PENN_TREEBANK.addTag(new PosTag("PRP",Pos.PersonalPronoun)); PENN_TREEBANK.addTag(new PosTag("PRP$",Pos.PossessivePronoun)); PENN_TREEBANK.addTag(new PosTag("RB",LexicalCategory.Adverb)); PENN_TREEBANK.addTag(new PosTag("RBR",LexicalCategory.Adverb,Pos.ComparativeParticle)); PENN_TREEBANK.addTag(new PosTag("RBS",LexicalCategory.Adverb,Pos.SuperlativeParticle)); PENN_TREEBANK.addTag(new PosTag("RP",Pos.Participle)); PENN_TREEBANK.addTag(new PosTag("SYM",Pos.Symbol)); PENN_TREEBANK.addTag(new PosTag("TO",LexicalCategory.Adposition)); PENN_TREEBANK.addTag(new PosTag("UH",LexicalCategory.Interjection)); PENN_TREEBANK.addTag(new PosTag("VB",Pos.Infinitive)); //TODO check a Verb in the base form should be Pos.Infinitive PENN_TREEBANK.addTag(new PosTag("VBD",Pos.PastParticiple)); //TODO check PENN_TREEBANK.addTag(new PosTag("VBG",Pos.PresentParticiple,Pos.Gerund)); PENN_TREEBANK.addTag(new PosTag("VBN",Pos.PastParticiple)); PENN_TREEBANK.addTag(new PosTag("VBP",Pos.PresentParticiple)); PENN_TREEBANK.addTag(new PosTag("VBZ",Pos.PresentParticiple)); PENN_TREEBANK.addTag(new PosTag("WDT",Pos.WHDeterminer)); PENN_TREEBANK.addTag(new PosTag("WP",Pos.WHPronoun)); PENN_TREEBANK.addTag(new PosTag("WP$",Pos.PossessivePronoun, Pos.WHPronoun)); PENN_TREEBANK.addTag(new PosTag("WRB",Pos.WHTypeAdverbs)); PENN_TREEBANK.addTag(new PosTag("ยดยด",Pos.CloseQuote)); PENN_TREEBANK.addTag(new PosTag(":",Pos.Colon)); PENN_TREEBANK.addTag(new PosTag(",",Pos.Comma)); PENN_TREEBANK.addTag(new PosTag("$",LexicalCategory.Residual)); PENN_TREEBANK.addTag(new PosTag("\"",Pos.Quote)); PENN_TREEBANK.addTag(new PosTag("``",Pos.OpenQuote)); PENN_TREEBANK.addTag(new PosTag(".",Pos.Point)); PENN_TREEBANK.addTag(new PosTag("{",Pos.OpenCurlyBracket)); PENN_TREEBANK.addTag(new PosTag("}",Pos.CloseCurlyBracket)); PENN_TREEBANK.addTag(new PosTag("[",Pos.OpenSquareBracket)); PENN_TREEBANK.addTag(new PosTag("]",Pos.CloseSquareBracket)); PENN_TREEBANK.addTag(new PosTag("(",Pos.OpenParenthesis)); PENN_TREEBANK.addTag(new PosTag(")",Pos.CloseParenthesis)); } }