/** * Copyright (C) 2012 cogroo <cogroo@cogroo.org> * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.cogroo.tools.checker.rules.util; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import org.cogroo.entities.Sentence; import org.cogroo.entities.impl.ChunkTag; import org.cogroo.entities.impl.MorphologicalTag; import org.cogroo.entities.impl.SyntacticTag; import org.cogroo.tools.checker.rules.model.Composition; import org.cogroo.tools.checker.rules.model.Element; import org.cogroo.tools.checker.rules.model.Mask; import org.cogroo.tools.checker.rules.model.Operator; import org.cogroo.tools.checker.rules.model.PatternElement; import org.cogroo.tools.checker.rules.model.Reference; import org.cogroo.tools.checker.rules.model.Reference.Property; import org.cogroo.tools.checker.rules.model.Rule; import org.cogroo.tools.checker.rules.model.Suggestion; import org.cogroo.tools.checker.rules.model.Suggestion.Replace; import org.cogroo.tools.checker.rules.model.Suggestion.ReplaceMapping; import org.cogroo.tools.checker.rules.model.Suggestion.Swap; import org.cogroo.tools.checker.rules.model.TagMask; import org.cogroo.tools.checker.rules.model.TagMask.Gender; import org.cogroo.tools.checker.rules.model.TagMask.Number; /** * Set of utility methods, mostly to convert a rule element to a human readable string. * * @author Marcelo Suzumura * @author William Colen */ public class RuleUtils { public static Map<RuleInfo, String> getRuleAsString(Rule rule) { Map<RuleInfo, String> map = new HashMap<RuleInfo, String>(); map.put(RuleInfo.METHOD, getMethodAsString(rule)); map.put(RuleInfo.TYPE, getTypeAsString(rule)); map.put(RuleInfo.GROUP, getGroupAsString(rule)); map.put(RuleInfo.MESSAGE, getMessageAsString(rule)); map.put(RuleInfo.SHORTMESSAGE, getShortMessageAsString(rule)); map.put(RuleInfo.PATTERN, getPatternAsString(rule)); map.put(RuleInfo.BOUNDARIES, getBoundariesAsString(rule)); map.put(RuleInfo.SUGGESTIONS, getSuggestionsAsString(rule)); return map; } public static String getMethodAsString(Rule rule) { return rule.getMethod().value(); } public static String getTypeAsString(Rule rule) { return rule.getType(); } public static String getGroupAsString(Rule rule) { return rule.getGroup(); } public static String getMessageAsString(Rule rule) { return rule.getMessage(); } public static String getShortMessageAsString(Rule rule) { return rule.getShortMessage(); } public static String getPatternAsString(Rule rule) { StringBuilder sb = new StringBuilder(); for (PatternElement patternElement : rule.getPattern() .getPatternElement()) { sb.append(getPatternElementAsString(patternElement)); sb.append(" "); } sb.deleteCharAt(sb.length() - 1); return sb.toString(); } public static String getPatternElementAsString(PatternElement patternElement) { if (patternElement.getElement() != null) return getElementAsString(patternElement.getElement()); else if (patternElement.getComposition() != null) return getCompositionAsString(patternElement.getComposition()); return "NULL"; } private static String getCompositionAsString(Composition composition) { if (composition.getAnd() != null) return getOperatorAsString(composition.getAnd(), " & "); else if (composition.getOr() != null) return getOperatorAsString(composition.getOr(), " | "); return "NULL"; } private static String getOperatorAsString(Operator operator, String op) { List<PatternElement> peList = operator.getPatternElement(); StringBuilder sb = new StringBuilder(); sb.append("( "); int i = 0; for (; i < peList.size() - 1; i++) { sb.append(getPatternElementAsString(peList.get(i)) + op); } sb.append(getPatternElementAsString(peList.get(i)) + ") "); return sb.toString(); } /** * Gets the string representation of an element. * * @param element * the element to be planified to a string * @return the element as a string */ public static String getElementAsString(Element element) { StringBuilder sb = new StringBuilder(); if (element.isNegated() != null && element.isNegated().booleanValue()) { sb.append("~"); } int masks = element.getMask().size(); if (masks > 1) { sb.append("("); } int maskCounter = 0; for (Mask mask : element.getMask()) { // Encloses lexemes between quotes. if (mask.getLexemeMask() != null) { sb.append("\"").append(mask.getLexemeMask()).append("\""); } else if (mask.getPrimitiveMask() != null) { // Primitives are enclosed between curly brackets. sb.append("{").append(mask.getPrimitiveMask()).append("}"); } else if (mask.getTagMask() != null) { sb.append(getTagMaskAsString(mask.getTagMask())); } else if (mask.getTagReference() != null) { sb.append(getTagReferenceAsString(mask.getTagReference())); } if (maskCounter < masks - 1) { sb.append("|"); } maskCounter++; } if (masks > 1) { sb.append(")"); } return sb.toString(); } public static String getTagReferenceAsString(Reference tagRef) { StringBuilder sb = new StringBuilder(); String index = Long.toString(tagRef.getIndex()); sb.append("( ref[" + index + "] "); tagRef.getProperty(); for (Property prop : tagRef.getProperty()) { sb.append(prop + " "); } sb.append(")"); return sb.toString(); } public static String getTagMaskAsString(TagMask tagMask) { StringBuilder sb = new StringBuilder(); if (tagMask.getSyntacticFunction() != null) { sb.append(tagMask.getSyntacticFunction().value()).append("_"); } if (tagMask.getClazz() != null) { sb.append(tagMask.getClazz().value()).append("_"); } if (tagMask.getGender() != null) { sb.append(tagMask.getGender().value()).append("_"); } if (tagMask.getNumber() != null) { sb.append(tagMask.getNumber().value()).append("_"); } if (tagMask.getCase() != null) { sb.append(tagMask.getCase().value()).append("_"); } if (tagMask.getPerson() != null) { sb.append(tagMask.getPerson().value()).append("_"); } if (tagMask.getTense() != null) { sb.append(tagMask.getTense().value()).append("_"); } if (tagMask.getMood() != null) { sb.append(tagMask.getMood().value()).append("_"); } if (tagMask.getPunctuation() != null) { sb.append(tagMask.getPunctuation().value()).append("_"); } return sb.toString(); } public static String getBoundariesAsString(Rule rule) { return rule.getBoundaries().getLower() + " " + rule.getBoundaries().getUpper(); } public static String getSuggestionsAsString(Rule rule) { StringBuilder sb = new StringBuilder(); if (rule.getSuggestion().isEmpty()) { sb.append("none"); } for (Suggestion suggestion : rule.getSuggestion()) { // Replaces. if (!suggestion.getReplace().isEmpty()) { sb.append("Replace: "); } for (Replace replace : suggestion.getReplace()) { sb.append(replace.getIndex()); sb.append(" <=> "); if (replace.getLexeme() != null) { sb.append("\""); sb.append(replace.getLexeme()); sb.append("\""); } else if (replace.getTagReference() != null) { sb.append(replace.getTagReference().getIndex()); sb.append("_"); sb.append(getTagMaskAsString(replace.getTagReference() .getTagMask())); } sb.append("|"); } sb = removeLastVerticalBar(sb); // Replace mappings. if (!suggestion.getReplaceMapping().isEmpty()) { sb.append("Replace Mapping: "); } for (ReplaceMapping replaceMapping : suggestion.getReplaceMapping()) { sb.append(replaceMapping.getIndex()); sb.append(" "); sb.append(replaceMapping.getKey()); sb.append(" => "); sb.append(replaceMapping.getValue()); sb.append("|"); } sb = removeLastVerticalBar(sb); // Swaps. if (!suggestion.getSwap().isEmpty()) { sb.append("Swap: "); } for (Swap swap : suggestion.getSwap()) { sb.append(swap.getA()); sb.append(" <=> "); sb.append(swap.getB()); sb.append("|"); } sb = removeLastVerticalBar(sb); sb.append("\n"); } return sb.toString(); } private static StringBuilder removeLastVerticalBar(StringBuilder sb) { if (sb.length() > 0 && sb.charAt(sb.length() - 1) == '|') { return sb.deleteCharAt(sb.length() - 1); } return sb; } public enum RuleInfo { METHOD, TYPE, GROUP, MESSAGE, SHORTMESSAGE, PATTERN, BOUNDARIES, SUGGESTIONS } public static TagMask createTagMaskFromReference(Reference ref, MorphologicalTag mTag, ChunkTag cTag, SyntacticTag sTag) { TagMask t = new TagMask(); if (mTag == null) { return t; } for (Property p : ref.getProperty()) { switch (p) { case CLASS : t.setClazz(mTag.getClazzE()); break; case GENDER : if(Gender.NEUTRAL != mTag.getGenderE()) t.setGender(mTag.getGenderE()); break; case NUMBER : if(Number.NEUTRAL != mTag.getNumberE()) t.setNumber(mTag.getNumberE()); break; case PERSON : t.setPerson(mTag.getPersonE()); break; case SYNTACTIC_FUNCTION : t.setSyntacticFunction(TagMask.SyntacticFunction .fromValue(sTag.toVerboseString())); break; case CHUNK_FUNCTION : t.setChunkFunction(TagMask.ChunkFunction.fromValue(cTag .toVerboseString())); break; default : break; } } return t; } public static TagMask createTagMaskFromReference(Reference ref, Sentence sent, int refPos) { int pos = refPos + (int)ref.getIndex(); if(pos >= 0 && pos < sent.getTokens().size()) { MorphologicalTag mTag = sent.getTokens().get(pos).getMorphologicalTag(); ChunkTag cTag = sent.getTokens().get(pos).getChunkTag(); SyntacticTag sTag = sent.getTokens().get(pos).getSyntacticTag(); return createTagMaskFromReference(ref, mTag, cTag, sTag); } else { return new TagMask(); } } public static TagMask createTagMaskFromReferenceSyntatic(Reference ref, Sentence sent, int refPos) { int pos = refPos + (int)ref.getIndex(); if(pos >= 0 && pos < sent.getSyntacticChunks().size()) { MorphologicalTag mTag = sent.getSyntacticChunks().get(pos).getMorphologicalTag(); // ChunkTag cTag = sent.getSyntacticChunks().get(pos).getChunkTag(); SyntacticTag sTag = sent.getSyntacticChunks().get(pos).getSyntacticTag(); return createTagMaskFromReference(ref, mTag, null, sTag); } else { return new TagMask(); } } public static void completeMissingParts(TagMask tagMask, MorphologicalTag tag) { if(tagMask.getCase() == null) { tagMask.setCase(tag.getCase()); } if(tagMask.getClazz() == null) { tagMask.setClazz(tag.getClazzE()); } if(tagMask.getGender() == null) { tagMask.setGender(tag.getGenderE()); } if(tagMask.getMood() == null) { tagMask.setMood(tag.getMood()); } if(tagMask.getNumber() == null) { tagMask.setNumber(tag.getNumberE()); } if(tagMask.getPerson() == null) { tagMask.setPerson(tag.getPersonE()); } if(tagMask.getPunctuation() == null) { tagMask.setPunctuation(tag.getPunctuation()); } if(tagMask.getTense() == null) { tagMask.setTense(tag.getTense()); } if(tagMask.getPunctuation() == null) { tagMask.setPunctuation(tag.getPunctuation()); } } private static final Map<String, String> TAGS; static { Map<String, String> tags = new HashMap<String, String>(); tags.put("n", "substantivo"); tags.put("prop", "substantivo próprio"); tags.put("adj", "adjetivo"); tags.put("n-adj", "substantivo"); tags.put("v-fin", "verbo"); tags.put("v-inf", "verbo"); tags.put("v-pcp", "verbo"); tags.put("v-ger", "verbo"); tags.put("art", "artigo"); tags.put("pron-det", "pronome"); tags.put("pron-pers", "pronome"); tags.put("pron-indp", "pronome"); tags.put("adv", "advérbio"); tags.put("num", "numeral"); tags.put("prp", "preposição"); tags.put("intj", "interjeição"); tags.put("conj-s", "conjunção"); tags.put("conj-c", "conjunção"); TAGS = Collections.unmodifiableMap(tags); } public static String translate(String ori) { if(TAGS.containsKey(ori)) { return TAGS.get(ori); } return ori; } /** * Checks the case of the first char from <code>replaceable</code> and changes the first char from the * <code>replacement</code> accordingly. * * @param replaceable * the string that will be replaced * @param replacement * the string that will be used to replace the <code>replaceable</code> * @return the replacement, beginning with upper case if the <code>replaceable</code> begins too or * lower case, if not */ public static String useCasedString(String replaceable, String replacement) { String replacementCased = replacement; if (replacement.length() > 1) { // If the first char of the replaceable lexeme is upper case... if (Character.isUpperCase(replaceable.charAt(0))) { // ... so must be its replacement. replacementCased = Character.toUpperCase(replacement.charAt(0)) + replacement.substring(1); } else { // ... the replacement must be lower case. replacementCased = Character.toLowerCase(replacement.charAt(0)) + replacement.substring(1); } } else if (replacement.length() == 1) { // If the first char of the replaceable lexeme is upper case... if (Character.isUpperCase(replaceable.charAt(0))) { // ... so must be its replacement. replacementCased = String.valueOf(Character.toUpperCase(replacement.charAt(0))); } else { // ... the replacement must be lower case. replacementCased = String.valueOf(Character.toLowerCase(replacement.charAt(0))); } } return replacementCased; } // public static void main(String[] args) { // // Rule rule = RulesService.getInstance().getRule(69, true); // // System.out.println(patternAsString(rule.getPattern())); // Rules rules = new RulesContainerHelper().getContainerForXMLAccess().getComponent(RulesProvider.class).getRules(); // // for (Rule rule : rules.getRule()) { // // System.out.println(rule.getId() + "\t" + // // getPatternAsString(rule)); // System.out.println(rule.getId() + "\t" // + getSuggestionsAsString(rule)); // } // } }