/* * Copyright (C) 2013, VistaTEC or third-party contributors as indicated * by the @author tags or express copyright attribution statements applied by * the authors. All third-party contributions are distributed under license by * VistaTEC. * * This file is part of Ocelot. * * Ocelot is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Ocelot is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, write to: * * Free Software Foundation, Inc. * 51 Franklin Street, Fifth Floor * Boston, MA 02110-1301 * USA * * Also, see the full LGPL text here: <http://www.gnu.org/copyleft/lesser.html> */ package com.vistatec.ocelot.rules; import com.google.common.collect.Lists; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; import net.sf.okapi.common.annotation.GenericAnnotation; import net.sf.okapi.common.annotation.GenericAnnotationType; import org.junit.*; import com.vistatec.ocelot.its.model.LanguageQualityIssue; import com.vistatec.ocelot.its.model.OtherITSMetadata; import com.vistatec.ocelot.its.model.Provenance; import com.vistatec.ocelot.rules.DataCategoryField.Matcher; import com.vistatec.ocelot.segment.model.OcelotSegment; import com.vistatec.ocelot.its.model.okapi.OkapiProvenance; import static org.junit.Assert.*; import com.vistatec.ocelot.segment.model.SimpleSegment; public class TestRules { @Test public void testEmptyRulesMatchNothing() { Rule rule = new Rule(new ArrayList<RuleMatcher>()); assertFalse(rule.matches(lqi("omission", 100))); assertFalse(rule.matches(lqi("non-conformance", 100))); assertFalse(rule.matches(new OkapiProvenance(new GenericAnnotation(GenericAnnotationType.PROV, GenericAnnotationType.PROV_ORG, "S", GenericAnnotationType.PROV_PERSON, "T", GenericAnnotationType.PROV_TOOL, "U")))); } OcelotSegment emptySegment() { return new SimpleSegment.Builder() .segmentNumber(1) .source("") .target("") .build(); } @Test public void testMtConfidence() throws Exception { List<RuleMatcher> ruleMatchers = new ArrayList<RuleMatcher>(); // Look for MT confidence of 75 and below ruleMatchers.add(new RuleMatcher(DataCategoryField.MT_CONFIDENCE, numericMatcher(0, 75))); Rule filter = new Rule(ruleMatchers); OcelotSegment segment = emptySegment(); segment.addAllOtherITSMetadata(Collections.singletonList( new OtherITSMetadata(DataCategoryField.MT_CONFIDENCE, new Double(50)))); assertTrue(filter.matches(segment)); segment = emptySegment(); segment.addAllOtherITSMetadata(Collections.singletonList( new OtherITSMetadata(DataCategoryField.MT_CONFIDENCE, new Double(80)))); assertFalse(filter.matches(segment)); } @Test public void testLQIMatching() throws Exception { List<RuleMatcher> ruleMatchers = new ArrayList<RuleMatcher>(); // look for omissions with severity 85 and up ruleMatchers.add(new RuleMatcher(DataCategoryField.LQI_TYPE, regexMatcher("omission"))); ruleMatchers.add(new RuleMatcher(DataCategoryField.LQI_SEVERITY, numericMatcher(85, 100))); Rule filter = new Rule(ruleMatchers); // This one should match LanguageQualityIssue lqi1 = lqi("omission", 85); // This one should not match - incorrect type LanguageQualityIssue lqi2 = lqi("terminology", 85); // This one should not match - incorrect severity LanguageQualityIssue lqi3 = lqi("omission", 60); OcelotSegment segment = emptySegment(); segment.addAllLQI(Lists.newArrayList(lqi1, lqi2, lqi3)); assertTrue(filter.matches(segment)); segment = emptySegment(); segment.addAllLQI(Collections.singletonList(lqi1)); assertTrue(filter.matches(segment)); segment = emptySegment(); segment.addAllLQI(Collections.singletonList(lqi2)); assertFalse(filter.matches(segment)); segment = emptySegment(); segment.addAllLQI(Collections.singletonList(lqi3)); assertFalse(filter.matches(segment)); segment = emptySegment(); segment.addAllLQI(Lists.newArrayList(lqi1, lqi2)); assertTrue(filter.matches(segment)); // Tricky! Make sure we don't get a false positive // because we have an omission AND a valid severity! // (We do have each, but not on the same issue.) segment = new SimpleSegment.Builder() .segmentNumber(6) .source("") .target("") .build(); segment.addAllLQI(Lists.newArrayList(lqi2, lqi3)); assertFalse(filter.matches(segment)); } @Test public void testProvenanceBasicFieldsMatches() throws Exception { Provenance matchingProv = new OkapiProvenance(new GenericAnnotation(GenericAnnotationType.PROV, GenericAnnotationType.PROV_ORG, "S", GenericAnnotationType.PROV_PERSON, "T", GenericAnnotationType.PROV_TOOL, "U")); testBasicProvenance(matchingProv, true); } @Test public void testProvenanceRuleShouldntMatchNonProvenance() throws Exception { Rule filter = ruleFilter( new RuleMatcher(DataCategoryField.PROV_TOOL, regexMatcher("Google Translator's Toolkit"))); assertFalse(filter.matches(lqi("non-conformance", 85))); } @Test public void testProvenanceBasicFieldsFailsOrg() throws Exception { Provenance matchingProv = new OkapiProvenance(new GenericAnnotation(GenericAnnotationType.PROV, GenericAnnotationType.PROV_ORG, "X", GenericAnnotationType.PROV_PERSON, "T", GenericAnnotationType.PROV_TOOL, "U")); testBasicProvenance(matchingProv, false); } @Test public void testProvenanceBasicFieldsFailsPerson() throws Exception { Provenance matchingProv = new OkapiProvenance(new GenericAnnotation(GenericAnnotationType.PROV, GenericAnnotationType.PROV_ORG, "S", GenericAnnotationType.PROV_PERSON, "X", GenericAnnotationType.PROV_TOOL, "U")); testBasicProvenance(matchingProv, false); } @Test public void testProvenanceBasicFieldsFailsTool() throws Exception { Provenance matchingProv = new OkapiProvenance(new GenericAnnotation(GenericAnnotationType.PROV, GenericAnnotationType.PROV_ORG, "S", GenericAnnotationType.PROV_PERSON, "T", GenericAnnotationType.PROV_TOOL, "X")); testBasicProvenance(matchingProv, false); } private void testBasicProvenance(Provenance prov, boolean expectedMatchResult) { // Match provenance that: // - has an organization starting with 'S' // - has a person starting with 'T' // - has a tool starting with 'U' Rule filter = ruleFilter( new RuleMatcher(DataCategoryField.PROV_ORG, regexMatcher("^S.*")), new RuleMatcher(DataCategoryField.PROV_PERSON, regexMatcher("^T.*")), new RuleMatcher(DataCategoryField.PROV_TOOL, regexMatcher("^U.*"))); assertEquals(expectedMatchResult, filter.matches(provSegment(prov))); } @Test public void testProvenanceRevFieldsMatches() throws Exception { Provenance matchingProv = new OkapiProvenance(new GenericAnnotation(GenericAnnotationType.PROV, GenericAnnotationType.PROV_REVORG, "S", GenericAnnotationType.PROV_REVPERSON, "T", GenericAnnotationType.PROV_REVTOOL, "U")); testRevisionProvenance(matchingProv, true); } @Test public void testProvenanceRevFieldsFailsOrg() throws Exception { Provenance matchingProv = new OkapiProvenance(new GenericAnnotation(GenericAnnotationType.PROV, GenericAnnotationType.PROV_REVORG, "X", GenericAnnotationType.PROV_REVPERSON, "T", GenericAnnotationType.PROV_REVTOOL, "U")); testRevisionProvenance(matchingProv, false); } @Test public void testProvenanceRevFieldsFailsPerson() throws Exception { Provenance matchingProv = new OkapiProvenance(new GenericAnnotation(GenericAnnotationType.PROV, GenericAnnotationType.PROV_REVORG, "S", GenericAnnotationType.PROV_REVPERSON, "X", GenericAnnotationType.PROV_REVTOOL, "U")); testRevisionProvenance(matchingProv, false); } @Test public void testProvenanceRevFieldsFailsTool() throws Exception { Provenance matchingProv = new OkapiProvenance(new GenericAnnotation(GenericAnnotationType.PROV, GenericAnnotationType.PROV_REVORG, "S", GenericAnnotationType.PROV_REVPERSON, "T", GenericAnnotationType.PROV_REVTOOL, "X")); testRevisionProvenance(matchingProv, false); } private void testRevisionProvenance(Provenance prov, boolean expectedMatchResult) { // Match provenance that: // - has a revision organization starting with 'S' // - has a revision person starting with 'T' // - has a revision tool starting with 'U' Rule filter = ruleFilter( new RuleMatcher(DataCategoryField.PROV_REVORG, regexMatcher("^S.*")), new RuleMatcher(DataCategoryField.PROV_REVPERSON, regexMatcher("^T.*")), new RuleMatcher(DataCategoryField.PROV_REVTOOL, regexMatcher("^U.*"))); assertEquals(expectedMatchResult, filter.matches(provSegment(prov))); } @Test public void testProvenanceRevRef() throws Exception { Rule filter = ruleFilter( new RuleMatcher(DataCategoryField.PROV_PROVREF, regexMatcher("^S.*"))); assertTrue(filter.matches(provSegment(new OkapiProvenance(new GenericAnnotation(GenericAnnotationType.PROV, GenericAnnotationType.PROV_PROVREF, "S"))))); assertFalse(filter.matches(provSegment(new OkapiProvenance(new GenericAnnotation(GenericAnnotationType.PROV, GenericAnnotationType.PROV_PROVREF, "T"))))); } private OcelotSegment provSegment(Provenance prov) { OcelotSegment segment = emptySegment(); segment.addProvenance(prov); return segment; } private Rule ruleFilter(RuleMatcher... matchers) { return new Rule(Arrays.asList(matchers)); } private Matcher regexMatcher(String regex) { Matcher m = new Matchers.RegexMatcher(); assertTrue(m.validatePattern(regex)); m.setPattern(regex); return m; } private Matcher numericMatcher(int min, int max) { Matcher m = new Matchers.NumericMatcher(); String s = "" + min + "-" + max; // Hacky..... assertTrue(m.validatePattern(s)); m.setPattern(s); return m; } private LanguageQualityIssue lqi(String type, int severity) { LanguageQualityIssue lqi = new LanguageQualityIssue(); lqi.setType(type); lqi.setSeverity(severity); return lqi; } }