/** * Copyright (C) 2012 cogroo <cogroo@cogroo.org> * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.cogroo.tools.checker.checkers; import static org.cogroo.tools.checker.rules.util.RuleUtils.translate; import java.util.ArrayList; import java.util.List; import java.util.Map; import org.apache.log4j.Logger; import org.cogroo.analyzer.Analyzer; import org.cogroo.entities.Mistake; import org.cogroo.text.Document; import org.cogroo.text.Sentence; import org.cogroo.text.Token; import org.cogroo.text.impl.DocumentImpl; import org.cogroo.tools.checker.AbstractChecker; import org.cogroo.tools.checker.JavaRuleDefinition; import org.cogroo.tools.checker.RuleDefinition; import org.cogroo.tools.checker.rules.model.Example; import org.cogroo.tools.checker.rules.paronym.ParonymList; import org.cogroo.tools.checker.rules.util.RuleUtils; public class ParonymChecker extends AbstractChecker { private static final String ID_PREFIX = "probs:"; static final String ID = ID_PREFIX + "paronyms"; static final String CATEGORY = "Enganos ortográficos"; static final String GROUP = "Ortografia"; static final String DESCRIPTION = "Procura por enganos em parônimos."; static final String MESSAGE = "Se a classe de %s for %s, use %s."; static final String SHORT = "Possível confusão entre %s e %s."; private static final Logger LOGGER = Logger.getLogger(ParonymChecker.class); private Analyzer analyzer; private final ParonymList dictionary; private Map<String, String> map; public ParonymChecker(Analyzer analyzer) { this.analyzer = analyzer; List<Example> examples = new ArrayList<Example>(); examples.add(createExample("Eu tenho uma duvida.", "Eu tenho uma dúvida.")); RuleDefinition definition = new JavaRuleDefinition(ID, CATEGORY, GROUP, DESCRIPTION, MESSAGE, SHORT, examples); add(definition); dictionary = new ParonymList(); map = dictionary.getParonymsMap(); } public String getIdPrefix() { return ID_PREFIX; } public int getPriority() { return 311; } public List<Mistake> check(Sentence sentence) { List<Mistake> mistakes= new ArrayList<Mistake>(); if(sentence.getTokens().size() < 2) { return mistakes; } for(int i = 0; i < sentence.getTokens().size(); i++) { Token originalToken = sentence.getTokens().get(i); String wanted = originalToken.getLexeme(); String wantedLowerCase = wanted.toLowerCase(); if(map.containsKey(wantedLowerCase)){ String candidate = RuleUtils.useCasedString(wanted, map.get(wantedLowerCase)); String sentenceText = sentence.getText(); String alternativeText = sentenceText.substring(0, originalToken.getStart()) + candidate + sentenceText.substring(originalToken.getEnd()); if (LOGGER.isDebugEnabled()) { LOGGER.debug("\n****** Sentença alternativa **********:\n" + alternativeText); } Document alternative = new DocumentImpl(alternativeText); this.analyzer.analyze(alternative); Sentence alternativeSentence = alternative.getSentences().get(0); if(alternativeSentence.getTokensProb() - sentence.getTokensProb() > 0.1){ if (LOGGER.isDebugEnabled()) { LOGGER.debug("Prob original: " + sentence.getTokensProb()); LOGGER.debug("Prob alternat: " + alternativeSentence.getTokensProb()); LOGGER.debug("\n****** Possível correção **********:\n" + sentenceText + " -> " + alternativeText); } Token alternativeToken = alternativeSentence.getTokens().get(i); String [] suggestions = {candidate}; String [] longMsgArgs = {wanted, translate(alternativeToken.getPOSTag()), candidate}; String [] shortMsgArgs = {wanted, candidate}; mistakes.add(createMistake(ID, longMsgArgs, shortMsgArgs, suggestions, originalToken.getStart(), originalToken.getEnd(), sentence.getText())); } } } return mistakes; } }