/**
* Copyright (C) 2012 cogroo <cogroo@cogroo.org>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.cogroo.tools.checker.rules.dictionary;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.log4j.Logger;
import org.cogroo.entities.impl.MorphologicalTag;
import org.cogroo.interpreters.TagInterpreter;
import org.cogroo.util.PairWordPOSTag;
import org.cogroo.tools.checker.rules.model.TagMask;
/**
* Provides access to the FSA tag dictionary
* @author William Colen
*
*/
public class TagDictionary implements CogrooTagDictionary {
/**
* Logger.
*/
private static final Logger LOGGER = Logger.getLogger(TagDictionary.class);
LexicalDictionary access;
private boolean caseSensitive;
private TagInterpreter dicTI;
public TagDictionary(LexicalDictionary access, boolean caseSensitive, TagInterpreter tagInterpreter)
{
this.dicTI = tagInterpreter;
this.caseSensitive = caseSensitive;
this.access = access;
}
public boolean exists(String word, boolean cs) {
if( cs )
return this.access.wordExists(word);
else
return this.access.wordExists(word.toLowerCase());
}
public String[] getInflectedPrimitive(String primitive, TagMask tagMask, boolean cs) {
List<PairWordPOSTag> lemmaTag;
if (cs) {
lemmaTag = this.access.getWordsAndPosTagsForLemma(primitive);
} else {
lemmaTag = this.access.getWordsAndPosTagsForLemma(primitive.toLowerCase());
}
if (lemmaTag == null) { // Defensive programming.
lemmaTag = new ArrayList<PairWordPOSTag>();
}
Set<String> inflectedLexemes = new HashSet<String>(lemmaTag.size());
for (PairWordPOSTag pair : lemmaTag) {
MorphologicalTag t = dicTI.parseMorphologicalTag(pair.getPosTag());
if (t.match(tagMask)) {
inflectedLexemes.add(pair.getWord());
}
}
return inflectedLexemes.toArray(new String[inflectedLexemes.size()]);
}
public String[] getPrimitive(String lexeme, TagMask tagMask, boolean cs) {
Set<String> primitiveSet = new HashSet<String>();
List<PairWordPOSTag> pairs;
if (cs) {
pairs = this.access.getLemmasAndPosTagsForWord(lexeme);
} else {
pairs = this.access.getLemmasAndPosTagsForWord(lexeme.toLowerCase());
}
for (PairWordPOSTag lemmaPOSTag : pairs) {
if ((dicTI.parseMorphologicalTag(lemmaPOSTag.getPosTag())).match(tagMask)) {
primitiveSet.add(lemmaPOSTag.getWord());
}
}
if (primitiveSet.isEmpty()) { // Do not return null, it is better to return an empty string.
primitiveSet.add("");
}
return primitiveSet.toArray(new String[primitiveSet.size()]);
}
public String[] getPrimitive(String lexeme, MorphologicalTag morphologicalTag, boolean cs) {
Set<String> primitiveSet = new HashSet<String>();
List<PairWordPOSTag> pairs;
if (cs) {
pairs = this.access.getLemmasAndPosTagsForWord(lexeme);
} else {
pairs = this.access.getLemmasAndPosTagsForWord(lexeme.toLowerCase());
}
for (PairWordPOSTag lemmaPOSTag : pairs) {
if ((dicTI.parseMorphologicalTag(lemmaPOSTag.getPosTag())).match(morphologicalTag)) {
primitiveSet.add(lemmaPOSTag.getWord());
}
}
if (primitiveSet.isEmpty()) { // Do not return null, it is better to return an empty string.
return null;
}
return primitiveSet.toArray(new String[primitiveSet.size()]);
}
public boolean match(String lexeme, TagMask tagMask, boolean cs) {
List<PairWordPOSTag> pairs;
if (cs) {
pairs = this.access.getLemmasAndPosTagsForWord(lexeme);
} else {
pairs = this.access.getLemmasAndPosTagsForWord(lexeme.toLowerCase());
}
for (PairWordPOSTag lemmaPOSTag : pairs) {
MorphologicalTag m = dicTI.parseMorphologicalTag(lemmaPOSTag.getPosTag());
if(m.match(tagMask)) {
return true;
}
}
return false;
}
public MorphologicalTag[] getTags(String word, boolean cs) {
if (cs) {
return convertToTargetConvention(this.access.getPOSTagsForWord(word));
}
String lowerCaseWord = word.toLowerCase();
if(lowerCaseWord.equals(word))
return convertToTargetConvention(this.access.getPOSTagsForWord(word));
else
{
Set<String> tags = new HashSet<String>();
List<String> lc = this.access.getPOSTagsForWord(lowerCaseWord);
if(lc != null) {
tags.addAll(lc);
}
List<String> t = this.access.getPOSTagsForWord(word);
if(t != null) {
tags.addAll(t);
}
return convertToTargetConvention(tags);
}
}
public MorphologicalTag[] getTags(String word) {
return getTags(word, this.caseSensitive);
}
public MorphologicalTag convertToTargetConvention(String ori) {
return this.dicTI.parseMorphologicalTag(ori);
}
private MorphologicalTag[] convertToTargetConvention(Collection<String> ori) {
if(ori == null) {
return null;
}
List<MorphologicalTag> tag = new ArrayList<MorphologicalTag>(ori.size());
for (String morphologicalTag : ori) {
tag.add(convertToTargetConvention(morphologicalTag));
}
return tag.toArray(new MorphologicalTag[tag.size()]);
}
public TagInterpreter getTagInterpreter() {
return dicTI;
}
// private String[] extractTag(String lexeme){
// String[] arr = null;//this.word.stemAndForm(lexeme);
//
// if( arr == null )
// return null;
//
// String[] tags = new String[arr.length/2];
// for (int i = 0; i < arr.length; i+=2) {
// tags[i/2] = arr[i+1];
// }
// return tags;
// }
}