/** * Copyright (C) 2012 cogroo <cogroo@cogroo.org> * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package cogroo.uima.ae; import java.io.IOException; import java.io.InputStream; import java.util.Arrays; import java.util.List; import org.apache.uima.UimaContext; import org.apache.uima.analysis_component.AnalysisComponent; import org.apache.uima.analysis_component.JCasAnnotator_ImplBase; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.cas.text.AnnotationIndex; import org.apache.uima.jcas.JCas; import org.apache.uima.jcas.tcas.Annotation; import org.apache.uima.resource.ResourceInitializationException; import org.apache.uima.util.Level; import org.apache.uima.util.Logger; import org.cogroo.analyzer.ComponentFactory; import org.cogroo.analyzer.Pipe; import org.cogroo.checker.CheckDocument; import org.cogroo.checker.GrammarChecker; import org.cogroo.entities.Mistake; import cogroo.uima.GoldenSentence; import cogroo.uima.GrammarError; import com.google.common.io.Closeables; public class NewTagsetBaselineCogrooAE extends JCasAnnotator_ImplBase { /** * Work on sentences instead of analyzing the full text. */ public static final String PARAM_BYSENTENCES = "BySentences"; /** * Directory with the resources. */ public static final String PARAM_RESOURCESPATH = "ResourcesDir"; public static final String PARAM_RULESTOIGNORE = "RulesToIgnore"; private Boolean mIsBySentences; private GrammarChecker mCogroo; private Logger mLogger; /** * @see AnalysisComponent#initialize(UimaContext) */ public void initialize(UimaContext aContext) throws ResourceInitializationException { String[] rulesToIgnore = (String[]) aContext .getConfigParameterValue(PARAM_RULESTOIGNORE); mCogroo = createCogroo(); mIsBySentences = (Boolean) aContext .getConfigParameterValue(PARAM_BYSENTENCES); if (null == mIsBySentences) { // could be null if not set, it is optional mIsBySentences = Boolean.FALSE; } mLogger = aContext.getLogger(); } public static GrammarChecker createCogroo() throws ResourceInitializationException { InputStream in = ComponentFactory.class.getResourceAsStream("/models.xml"); ComponentFactory factory = ComponentFactory.create(in); GrammarChecker cogroo; try { cogroo = new GrammarChecker((Pipe) factory.createPipe()); cogroo.resetIgnoredRules(); // String[] ignore = { "xml:17", "xml:21", "xml:117", "xml:118", "xml:124", "xml:103", // "xml:104", "xml:105" }; // // for (String string : ignore) { // checker.ignoreRule(string); // } } catch (IllegalArgumentException e) { throw new ResourceInitializationException(e); } catch (IOException e) { throw new ResourceInitializationException(e); } Closeables.closeQuietly(in); return cogroo; } @Override public void process(JCas jcas) throws AnalysisEngineProcessException { AnnotationIndex<Annotation> sentIndex = jcas .getAnnotationIndex(GoldenSentence.type); for (Annotation annotation : sentIndex) { GoldenSentence s = (GoldenSentence) annotation; int start = s.getBegin(); String text = s.getCoveredText(); try { CheckDocument doc = new CheckDocument(); doc.setText(text); mCogroo.analyze(doc); List<Mistake> mistakes = doc.getMistakes(); for (Mistake mistake : mistakes) { GrammarError ge = new GrammarError(jcas); ge.setBegin(start + mistake.getStart()); ge.setEnd(start + mistake.getEnd()); ge.setRuleId(mistake.getRuleIdentifier()); ge.setCategory(Categories.getCat(mistake.getRuleIdentifier())); ge.setError(text.substring(mistake.getStart(), mistake.getEnd())); if (mistake.getSuggestions() != null && mistake.getSuggestions().length > 0) ge.setReplace(Arrays.toString(mistake.getSuggestions())); ge.addToIndexes(); } } catch (Throwable e) { System.out.println("Failed: " + text); e.printStackTrace(); mLogger.log(Level.SEVERE, "Failed: " + text,e); } } } }