/**
* Copyright (C) 2012 cogroo <cogroo@cogroo.org>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.cogroo.tools.checker.rules.util;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.cogroo.entities.Token;
import org.cogroo.tools.checker.rules.model.TagMask;
/**
* Utility methods to work with {@link TagMask}s. Specially to clone a
* {@link TagMask} .
*
* @author colen
*
*/
public class TagMaskUtils {
private static final Pattern REPLACE_TAGR2 = Pattern
.compile("(\\w+)\\s*=\\s*([\\w-]+)");
// .compile("(number|gender|class|person|tense|mood)\\s*=\\s*([\\w-]+)");
private static final Pattern REPLACE_R2 = Pattern.compile("(\\w+)");
// .compile("(gender|number|class|person|tense|mood)");
/**
* Creates an identical copy of the parameter <code>tagMask</code>.
*
* @param tagMask
* the tag mask to be cloned
* @return the clone
*/
public static TagMask clone(TagMask tagMask) {
TagMask clone = new TagMask();
clone.setCase(tagMask.getCase());
clone.setChunkFunction(tagMask.getChunkFunction());
clone.setClazz(tagMask.getClazz());
clone.setGender(tagMask.getGender());
clone.setMood(tagMask.getMood());
clone.setNumber(tagMask.getNumber());
clone.setPerson(tagMask.getPerson());
clone.setPunctuation(tagMask.getPunctuation());
clone.setSyntacticFunction(tagMask.getSyntacticFunction());
clone.setTense(tagMask.getTense());
return clone;
}
/**
* @param text
* a string containing the attributes and its values, e.g.,
* "number=plural gender=male"
* @return a TagMask object with the attributes and its values
*/
public static TagMask parse(String text) {
TagMask tm = new TagMask();
Matcher m = REPLACE_TAGR2.matcher(text);
while (m.find()) {
String property = m.group(1);
String value = m.group(2).replace('-', ' ');
switch (property) {
case "number":
tm.setNumber(TagMask.Number.fromValue(value));
break;
case "gender":
tm.setGender(TagMask.Gender.fromValue(value));
break;
case "class":
tm.setClazz(TagMask.Class.fromValue(value));
break;
case "person":
tm.setPerson(TagMask.Person.fromValue(value));
break;
case "tense":
tm.setTense(TagMask.Tense.fromValue(value));
break;
case "mood":
tm.setMood(TagMask.Mood.fromValue(value));
break;
default:
throw new IllegalArgumentException("Invalid property: '"
+ property + "'");
}
}
return tm;
}
/**
* Returns a TagMask with the attributes collected from the given token.
*
* @param token
* the token whose attributes will be collected.
* @param text
* a string containing the attributes to get from the token,
* e.g., "number gender"
* @returna a TagMask object with the attributes collected
*/
public static TagMask createTagMaskFromToken(Token token, String text) {
TagMask tm = new TagMask();
Matcher m = REPLACE_R2.matcher(text);
while (m.find()) {
String property = m.group(1);
switch (property) {
case "number":
tm.setNumber(token.getMorphologicalTag().getNumberE());
break;
case "gender":
tm.setGender(token.getMorphologicalTag().getGenderE());
break;
case "class":
tm.setClazz(token.getMorphologicalTag().getClazzE());
break;
case "person":
tm.setPerson(token.getMorphologicalTag().getPersonE());
break;
case "tense":
tm.setTense(token.getMorphologicalTag().getTense());
break;
case "mood":
tm.setMood(token.getMorphologicalTag().getMood());
break;
default:
break;
}
}
return tm;
}
}