package org.molgenis.ontology.roc; import com.google.common.collect.Sets; import org.mockito.Mockito; import org.molgenis.data.DataService; import org.molgenis.data.Entity; import org.molgenis.data.QueryRule; import org.molgenis.data.QueryRule.Operator; import org.molgenis.data.support.QueryImpl; import org.molgenis.ontology.core.meta.Ontology; import org.molgenis.ontology.core.meta.OntologyFactory; import org.molgenis.ontology.core.meta.OntologyMetaData; import org.molgenis.ontology.core.meta.OntologyTermMetaData; import org.molgenis.test.data.AbstractMolgenisSpringTest; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.ComponentScan; import org.springframework.context.annotation.Configuration; import org.springframework.test.context.ContextConfiguration; import org.testng.Assert; import org.testng.annotations.Test; import java.util.Map; import java.util.Set; import static java.util.Arrays.asList; import static java.util.Collections.singletonList; import static org.mockito.Mockito.when; import static org.molgenis.ontology.core.meta.OntologyMetaData.ONTOLOGY; import static org.molgenis.ontology.core.meta.OntologyTermMetaData.ONTOLOGY_TERM; @ContextConfiguration(classes = { InformationContentServiceTest.Config.class }) public class InformationContentServiceTest extends AbstractMolgenisSpringTest { @Autowired private OntologyFactory ontologyFactory; private DataService dataService = Mockito.mock(DataService.class); private InformationContentService informationContentService = new InformationContentService(dataService); @Test public void createStemmedWordSet() { Set<String> actualStemmedWordSet = informationContentService .createStemmedWordSet("hearing-impairment_eye ball"); Set<String> expectedStemmedWordSet = Sets.newHashSet("hear", "impair", "ey", "ball"); Assert.assertEquals(actualStemmedWordSet.size(), expectedStemmedWordSet.size()); Assert.assertTrue(expectedStemmedWordSet.containsAll(actualStemmedWordSet)); } @Test public void createWordIDF() { String ontologyIri = "http://www.molgenis.org"; Ontology ontology = ontologyFactory.create(); ontology.setOntologyIri(ontologyIri); when(dataService.findOne(ONTOLOGY, new QueryImpl<>().eq(OntologyMetaData.ONTOLOGY_IRI, ontologyIri))) .thenReturn(ontology); when(dataService.count(ONTOLOGY_TERM, new QueryImpl<>().eq(OntologyTermMetaData.ONTOLOGY, ontology))) .thenReturn((long) 100); QueryRule queryRule = new QueryRule( singletonList(new QueryRule(OntologyTermMetaData.ONTOLOGY_TERM_SYNONYM, Operator.FUZZY_MATCH, "hear"))); queryRule.setOperator(Operator.DIS_MAX); QueryRule finalQuery = new QueryRule( asList(new QueryRule(OntologyTermMetaData.ONTOLOGY, Operator.EQUALS, ontology), new QueryRule(Operator.AND), queryRule)); when(dataService.count(ONTOLOGY_TERM, new QueryImpl<>(finalQuery))).thenReturn((long) 30); QueryRule queryRule2 = new QueryRule(singletonList( new QueryRule(OntologyTermMetaData.ONTOLOGY_TERM_SYNONYM, Operator.FUZZY_MATCH, "impair"))); queryRule2.setOperator(Operator.DIS_MAX); QueryRule finalQuery2 = new QueryRule( asList(new QueryRule(OntologyTermMetaData.ONTOLOGY, Operator.EQUALS, ontology), new QueryRule(Operator.AND), queryRule2)); when(dataService.count(ONTOLOGY_TERM, new QueryImpl<>(finalQuery2))).thenReturn((long) 10); Map<String, Double> expectedWordIDF = informationContentService .createWordIDF("hearing impairment", ontologyIri); Assert.assertEquals(expectedWordIDF.get("hear").intValue(), 2); Assert.assertEquals(expectedWordIDF.get("impair").intValue(), 3); } @Test public void redistributedNGramScore() { String ontologyIri = "http://www.molgenis.org"; Entity ontologyEntity = ontologyFactory.create(); ontologyEntity.set(OntologyMetaData.ONTOLOGY_IRI, ontologyIri); when(dataService.findOne(ONTOLOGY, new QueryImpl<>().eq(OntologyMetaData.ONTOLOGY_IRI, ontologyIri))) .thenReturn(ontologyEntity); when(dataService.count(ONTOLOGY_TERM, new QueryImpl<>().eq(OntologyTermMetaData.ONTOLOGY, ontologyEntity))) .thenReturn((long) 100); QueryRule queryRule = new QueryRule( singletonList(new QueryRule(OntologyTermMetaData.ONTOLOGY_TERM_SYNONYM, Operator.FUZZY_MATCH, "hear"))); queryRule.setOperator(Operator.DIS_MAX); QueryRule finalQuery = new QueryRule( asList(new QueryRule(OntologyTermMetaData.ONTOLOGY, Operator.EQUALS, ontologyEntity), new QueryRule(Operator.AND), queryRule)); when(dataService.count(ONTOLOGY_TERM, new QueryImpl<>(finalQuery))).thenReturn((long) 30); QueryRule queryRule2 = new QueryRule(singletonList( new QueryRule(OntologyTermMetaData.ONTOLOGY_TERM_SYNONYM, Operator.FUZZY_MATCH, "impair"))); queryRule2.setOperator(Operator.DIS_MAX); QueryRule finalQuery2 = new QueryRule( asList(new QueryRule(OntologyTermMetaData.ONTOLOGY, Operator.EQUALS, ontologyEntity), new QueryRule(Operator.AND), queryRule2)); when(dataService.count(ONTOLOGY_TERM, new QueryImpl<>(finalQuery2))).thenReturn((long) 10); Map<String, Double> redistributedNGramScore = informationContentService .redistributedNGramScore("hearing impairment", ontologyIri); Assert.assertEquals(redistributedNGramScore.get("hear").intValue(), -7); Assert.assertEquals(redistributedNGramScore.get("impair").intValue(), 7); } @Configuration @ComponentScan({ "org.molgenis.ontology.core.meta", "org.molgenis.ontology.core.model" }) public static class Config { } }