package org.molgenis.data.semanticsearch.service.impl; import com.google.common.collect.LinkedHashMultimap; import com.google.common.collect.Multimap; import com.google.common.collect.Sets; import org.molgenis.data.DataService; import org.molgenis.data.Entity; import org.molgenis.data.QueryRule; import org.molgenis.data.meta.MetaDataService; import org.molgenis.data.meta.model.*; import org.molgenis.data.semantic.Relation; import org.molgenis.data.semanticsearch.explain.service.ElasticSearchExplainService; import org.molgenis.data.semanticsearch.service.SemanticSearchService; import org.molgenis.data.semanticsearch.string.NGramDistanceAlgorithm; import org.molgenis.data.support.QueryImpl; import org.molgenis.ontology.core.model.OntologyTerm; import org.molgenis.ontology.core.service.OntologyService; import org.molgenis.ontology.ic.TermFrequencyService; import org.molgenis.test.data.AbstractMolgenisSpringTest; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.test.context.ContextConfiguration; import org.testng.Assert; import org.testng.annotations.Test; import java.util.List; import java.util.Set; import java.util.concurrent.ExecutionException; import static java.util.Arrays.asList; import static java.util.Collections.emptyList; import static java.util.Collections.singletonList; import static org.mockito.Mockito.*; import static org.molgenis.data.meta.AttributeType.STRING; import static org.molgenis.data.meta.model.EntityTypeMetadata.ENTITY_TYPE_META_DATA; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; @ContextConfiguration(classes = SemanticSearchServiceHelperTest.Config.class) public class SemanticSearchServiceHelperTest extends AbstractMolgenisSpringTest { @Autowired private AttributeFactory attrMetaFactory; @Autowired private EntityTypeFactory entityTypeFactory; @Autowired private OntologyService ontologyService; @Autowired private SemanticSearchServiceHelper semanticSearchServiceHelper; @Autowired private DataService dataService; @Test public void testCreateDisMaxQueryRule() { List<String> createdTargetAttributeQueries = asList("Height", "Standing height in cm", "body_length", "Sitting height", "sitting_length", "Height", "sature"); QueryRule actualRule = semanticSearchServiceHelper.createDisMaxQueryRuleForTerms(createdTargetAttributeQueries); String expectedQueryRuleToString = "DIS_MAX ('label' FUZZY_MATCH 'Height', 'description' FUZZY_MATCH 'Height', 'label' FUZZY_MATCH 'Standing height in cm', 'description' FUZZY_MATCH 'Standing height in cm', 'label' FUZZY_MATCH 'body_length', 'description' FUZZY_MATCH 'body_length', 'label' FUZZY_MATCH 'Sitting height', 'description' FUZZY_MATCH 'Sitting height', 'label' FUZZY_MATCH 'sitting_length', 'description' FUZZY_MATCH 'sitting_length', 'label' FUZZY_MATCH 'Height', 'description' FUZZY_MATCH 'Height', 'label' FUZZY_MATCH 'sature', 'description' FUZZY_MATCH 'sature')"; assertEquals(actualRule.getOperator(), QueryRule.Operator.DIS_MAX); assertEquals(actualRule.toString(), expectedQueryRuleToString); List<String> createdTargetAttributeQueries2 = singletonList("(Height) [stand^~]"); QueryRule actualRule2 = semanticSearchServiceHelper .createDisMaxQueryRuleForTerms(createdTargetAttributeQueries2); String expectedQueryRuleToString2 = "DIS_MAX ('label' FUZZY_MATCH '\\(Height\\) \\[stand^\\~\\]', 'description' FUZZY_MATCH '\\(Height\\) \\[stand^\\~\\]')"; assertEquals(actualRule2.getOperator(), QueryRule.Operator.DIS_MAX); assertEquals(actualRule2.toString(), expectedQueryRuleToString2); } @Test public void testCreateShouldQueryRule() { String multiOntologyTermIri = "http://www.molgenis.org/1,http://www.molgenis.org/2"; OntologyTerm ontologyTerm_1 = OntologyTerm.create("http://www.molgenis.org/1", "molgenis label in the gcc"); OntologyTerm ontologyTerm_2 = OntologyTerm .create("http://www.molgenis.org/2", "molgenis label 2 in the genetics", singletonList("label 2")); when(ontologyService.getOntologyTerm(ontologyTerm_1.getIRI())).thenReturn(ontologyTerm_1); when(ontologyService.getOntologyTerm(ontologyTerm_2.getIRI())).thenReturn(ontologyTerm_2); QueryRule actualShouldQueryRule = semanticSearchServiceHelper.createShouldQueryRule(multiOntologyTermIri); String expectedShouldQueryRuleToString = "SHOULD (DIS_MAX ('label' FUZZY_MATCH 'gcc molgenis label', 'description' FUZZY_MATCH 'gcc molgenis label'), DIS_MAX ('label' FUZZY_MATCH '2 label', 'description' FUZZY_MATCH '2 label', 'label' FUZZY_MATCH '2 genetics molgenis label', 'description' FUZZY_MATCH '2 genetics molgenis label'))"; assertEquals(actualShouldQueryRule.toString(), expectedShouldQueryRuleToString); assertEquals(actualShouldQueryRule.getOperator(), QueryRule.Operator.SHOULD); } @Test public void testCreateTargetAttributeQueryTerms() { Attribute targetAttribute_1 = attrMetaFactory.create().setName("targetAttribute 1"); targetAttribute_1.setDescription("Height"); Attribute targetAttribute_2 = attrMetaFactory.create().setName("targetAttribute 2"); targetAttribute_2.setLabel("Height"); Multimap<Relation, OntologyTerm> tags = LinkedHashMultimap.create(); OntologyTerm ontologyTerm1 = OntologyTerm .create("http://onto/standingheight", "Standing height", "Description is not used", singletonList("body_length")); OntologyTerm ontologyTerm2 = OntologyTerm .create("http://onto/sittingheight", "Sitting height", "Description is not used", singletonList("sitting_length")); OntologyTerm ontologyTerm3 = OntologyTerm .create("http://onto/height", "Height", "Description is not used", singletonList("sature")); tags.put(Relation.isAssociatedWith, ontologyTerm1); tags.put(Relation.isRealizationOf, ontologyTerm2); tags.put(Relation.isDefinedBy, ontologyTerm3); // Case 1 QueryRule actualTargetAttributeQueryTerms_1 = semanticSearchServiceHelper .createDisMaxQueryRuleForAttribute(Sets.newLinkedHashSet(asList("targetAttribute 1", "Height")), tags.values()); String expecteddisMaxQueryRuleToString_1 = "DIS_MAX ('label' FUZZY_MATCH '1 targetattribute', 'description' FUZZY_MATCH '1 targetattribute', 'label' FUZZY_MATCH 'height', 'description' FUZZY_MATCH 'height', 'label' FUZZY_MATCH 'length body', 'description' FUZZY_MATCH 'length body', 'label' FUZZY_MATCH 'standing height', 'description' FUZZY_MATCH 'standing height', 'label' FUZZY_MATCH 'length sitting', 'description' FUZZY_MATCH 'length sitting', 'label' FUZZY_MATCH 'sitting height', 'description' FUZZY_MATCH 'sitting height', 'label' FUZZY_MATCH 'sature', 'description' FUZZY_MATCH 'sature', 'label' FUZZY_MATCH 'height', 'description' FUZZY_MATCH 'height')"; assertEquals(actualTargetAttributeQueryTerms_1.toString(), expecteddisMaxQueryRuleToString_1); // Case 2 QueryRule expecteddisMaxQueryRuleToString_2 = semanticSearchServiceHelper .createDisMaxQueryRuleForAttribute(Sets.newHashSet("Height"), tags.values()); String expectedTargetAttributeQueryTermsToString_2 = "DIS_MAX ('label' FUZZY_MATCH 'height', 'description' FUZZY_MATCH 'height', 'label' FUZZY_MATCH 'length body', 'description' FUZZY_MATCH 'length body', 'label' FUZZY_MATCH 'standing height', 'description' FUZZY_MATCH 'standing height', 'label' FUZZY_MATCH 'length sitting', 'description' FUZZY_MATCH 'length sitting', 'label' FUZZY_MATCH 'sitting height', 'description' FUZZY_MATCH 'sitting height', 'label' FUZZY_MATCH 'sature', 'description' FUZZY_MATCH 'sature', 'label' FUZZY_MATCH 'height', 'description' FUZZY_MATCH 'height')"; assertEquals(expecteddisMaxQueryRuleToString_2.toString(), expectedTargetAttributeQueryTermsToString_2); // Case 3 QueryRule expecteddisMaxQueryRuleToString_3 = semanticSearchServiceHelper .createDisMaxQueryRuleForAttribute(Sets.newHashSet("targetAttribute 3"), tags.values()); String expectedTargetAttributeQueryTermsToString_3 = "DIS_MAX ('label' FUZZY_MATCH '3 targetattribute', 'description' FUZZY_MATCH '3 targetattribute', 'label' FUZZY_MATCH 'length body', 'description' FUZZY_MATCH 'length body', 'label' FUZZY_MATCH 'standing height', 'description' FUZZY_MATCH 'standing height', 'label' FUZZY_MATCH 'length sitting', 'description' FUZZY_MATCH 'length sitting', 'label' FUZZY_MATCH 'sitting height', 'description' FUZZY_MATCH 'sitting height', 'label' FUZZY_MATCH 'sature', 'description' FUZZY_MATCH 'sature', 'label' FUZZY_MATCH 'height', 'description' FUZZY_MATCH 'height')"; assertEquals(expecteddisMaxQueryRuleToString_3.toString(), expectedTargetAttributeQueryTermsToString_3); } @Test public void testCollectQueryTermsFromOntologyTerm() { // Case 1 OntologyTerm ontologyTerm1 = OntologyTerm .create("http://onto/standingheight", "Standing height", "Description is not used", singletonList("body_length")); List<String> actual_1 = semanticSearchServiceHelper.parseOntologyTermQueries(ontologyTerm1); assertEquals(actual_1, asList("length body", "standing height")); // Case 2 OntologyTerm ontologyTerm2 = OntologyTerm .create("http://onto/standingheight", "height", "Description is not used", emptyList()); OntologyTerm ontologyTerm3 = OntologyTerm .create("http://onto/standingheight-children", "length", singletonList("body_length")); when(ontologyService.getChildren(ontologyTerm2)).thenReturn(singletonList(ontologyTerm3)); when(ontologyService.getOntologyTermDistance(ontologyTerm2, ontologyTerm3)).thenReturn(1); List<String> actual_2 = semanticSearchServiceHelper.parseOntologyTermQueries(ontologyTerm2); assertEquals(actual_2, asList("height", "length^0.5 body^0.5", "length^0.5")); } @Test public void testGetAttributeIdentifiers() { EntityType sourceEntityType = entityTypeFactory.create().setName("sourceEntityType"); Entity EntityTypeEntity = mock(Entity.class); when(dataService.findOne(ENTITY_TYPE_META_DATA, new QueryImpl<>().eq(EntityTypeMetadata.FULL_NAME, sourceEntityType.getName()))) .thenReturn(EntityTypeEntity); Attribute attributeEntity1 = attrMetaFactory.create(); attributeEntity1.setIdentifier("1"); attributeEntity1.setDataType(STRING); Attribute attributeEntity2 = attrMetaFactory.create(); attributeEntity2.setIdentifier("2"); attributeEntity2.setDataType(STRING); when(EntityTypeEntity.getEntities(EntityTypeMetadata.ATTRIBUTES)) .thenReturn(asList(attributeEntity1, attributeEntity2)); List<String> expactedAttributeIdentifiers = asList("1", "2"); assertEquals(semanticSearchServiceHelper.getAttributeIdentifiers(sourceEntityType), expactedAttributeIdentifiers); } @Test public void testParseBoostQueryString() { String description = "falling in the ocean!"; String actual = semanticSearchServiceHelper.parseBoostQueryString(description, 0.5); assertEquals(actual, "ocean^0.5 falling^0.5"); } @Test public void testRemoveStopWords() { String description = "falling in the ocean!"; Set<String> actual = semanticSearchServiceHelper.removeStopWords(description); Set<String> expected = Sets.newHashSet("falling", "ocean"); assertEquals(actual, expected); } @Test public void testFindTagsSync() { String description = "Fall " + NGramDistanceAlgorithm.STOPWORDSLIST + " sleep"; List<String> ontologyIds = singletonList("1"); Set<String> searchTerms = Sets.newHashSet("fall", "sleep"); semanticSearchServiceHelper.findTags(description, ontologyIds); verify(ontologyService).findOntologyTerms(ontologyIds, searchTerms, SemanticSearchServiceHelper.MAX_NUM_TAGS); } @Test public void testSearchCircumflex() throws InterruptedException, ExecutionException { String description = "body^0.5 length^0.5"; Set<String> expected = Sets.newHashSet("length", "body", "0.5"); Set<String> actual = semanticSearchServiceHelper.removeStopWords(description); assertEquals(actual.size(), 3); assertTrue(actual.containsAll(expected)); } @Test public void testSearchTilde() throws InterruptedException, ExecutionException { String description = "body~0.5 length~0.5"; Set<String> expected = Sets.newHashSet("length~0.5", "body~0.5"); Set<String> actual = semanticSearchServiceHelper.removeStopWords(description); assertEquals(actual, expected); } @Test public void testSearchUnderScore() throws InterruptedException, ExecutionException { String description = "body_length"; Set<String> expected = Sets.newHashSet("body", "length"); Set<String> actual = semanticSearchServiceHelper.removeStopWords(description); assertEquals(actual, expected); } @Test public void testSearchIsoLatin() throws InterruptedException, ExecutionException { String description = "Standing height (Ångstrøm)"; List<String> ontologyIds = singletonList("1"); Set<String> searchTerms = Sets.newHashSet("standing", "height", "ångstrøm"); semanticSearchServiceHelper.findTags(description, ontologyIds); verify(ontologyService).findOntologyTerms(ontologyIds, searchTerms, SemanticSearchServiceHelper.MAX_NUM_TAGS); } @Test public void testSearchUnicode() throws InterruptedException, ExecutionException { String description = "/əˈnædrəməs/"; List<String> ontologyIds = singletonList("1"); Set<String> searchTerms = Sets.newHashSet("əˈnædrəməs"); semanticSearchServiceHelper.findTags(description, ontologyIds); verify(ontologyService).findOntologyTerms(ontologyIds, searchTerms, SemanticSearchServiceHelper.MAX_NUM_TAGS); } @Test public void testEscapeCharsExcludingCaretChar() { Assert.assertEquals(semanticSearchServiceHelper.escapeCharsExcludingCaretChar("(hypertension^4)~[]"), "\\(hypertension^4\\)\\~\\[\\]"); Assert.assertEquals(semanticSearchServiceHelper.escapeCharsExcludingCaretChar("hypertension^4"), "hypertension^4"); } @Configuration public static class Config { @Bean OntologyService ontologyService() { return mock(OntologyService.class); } @Bean SemanticSearchService semanticSearchService() { return new SemanticSearchServiceImpl(dataService(), ontologyService(), metaDataService(), semanticSearchServiceHelper(), elasticSearchExplainService()); } @Bean DataService dataService() { return mock(DataService.class); } @Bean MetaDataService metaDataService() { return mock(MetaDataService.class); } @Bean ElasticSearchExplainService elasticSearchExplainService() { return mock(ElasticSearchExplainService.class); } @Bean TermFrequencyService termFrequencyService() { return mock(TermFrequencyService.class); } @Bean SemanticSearchServiceHelper semanticSearchServiceHelper() { return new SemanticSearchServiceHelper(dataService(), ontologyService(), termFrequencyService()); } } }