/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.stanbol.enhancer.engines.entitylinking.engine; import static org.apache.stanbol.enhancer.nlp.NlpAnnotations.PHRASE_ANNOTATION; import static org.apache.stanbol.enhancer.nlp.NlpAnnotations.POS_ANNOTATION; import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_CREATOR; import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE; import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE; import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_EXTRACTED_FROM; import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE; import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_ENTITYANNOTATION; import static org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateAllTextAnnotations; import static org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateEntityAnnotation; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import org.apache.clerezza.commons.rdf.Literal; import org.apache.clerezza.rdf.core.LiteralFactory; import org.apache.clerezza.commons.rdf.Graph; import org.apache.clerezza.commons.rdf.RDFTerm; import org.apache.clerezza.commons.rdf.Triple; import org.apache.clerezza.commons.rdf.IRI; import org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl; import org.apache.clerezza.commons.rdf.impl.utils.TripleImpl; import org.apache.stanbol.commons.indexedgraph.IndexedGraph; import org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory; import org.apache.stanbol.enhancer.engines.entitylinking.Entity; import org.apache.stanbol.enhancer.engines.entitylinking.LabelTokenizer; import org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig; import org.apache.stanbol.enhancer.engines.entitylinking.config.LanguageProcessingConfig; import org.apache.stanbol.enhancer.engines.entitylinking.config.TextProcessingConfig; import org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.RedirectProcessingMode; import org.apache.stanbol.enhancer.engines.entitylinking.impl.EntityLinker; import org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity; import org.apache.stanbol.enhancer.engines.entitylinking.impl.Suggestion; import org.apache.stanbol.enhancer.engines.entitylinking.impl.TestSearcherImpl; import org.apache.stanbol.enhancer.engines.entitylinking.labeltokenizer.SimpleLabelTokenizer; import org.apache.stanbol.enhancer.nlp.model.AnalysedText; import org.apache.stanbol.enhancer.nlp.model.AnalysedTextFactory; import org.apache.stanbol.enhancer.nlp.model.annotation.Value; import org.apache.stanbol.enhancer.nlp.phrase.PhraseTag; import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory; import org.apache.stanbol.enhancer.nlp.pos.Pos; import org.apache.stanbol.enhancer.nlp.pos.PosTag; import org.apache.stanbol.enhancer.servicesapi.ContentItem; import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory; import org.apache.stanbol.enhancer.servicesapi.EngineException; import org.apache.stanbol.enhancer.servicesapi.impl.StringSource; import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum; import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses; import org.apache.stanbol.enhancer.servicesapi.rdf.Properties; import org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper; import org.junit.After; import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * TODO: convert this to an integration test! * @author Rupert Westenthaler */ public class EntityLinkingEngineTest { private final static Logger log = LoggerFactory.getLogger(EntityLinkingEngineTest.class); /** * The context for the tests (same as in TestOpenNLPEnhancementEngine) */ public static final String TEST_TEXT = "Dr. Patrick Marshall (1869 - November 1950) was a" + " geologist who lived in New Zealand and worked at the University of Otago."; /** * changed oder af given and family name */ public static final String TEST_TEXT_WO = "Dr. Marshall Patrick (1869 - November 1950) was a" + " geologist who lived in New Zealand and worked at the University of Otago."; private static AnalysedText TEST_ANALYSED_TEXT; private static AnalysedText TEST_ANALYSED_TEXT_WO; // public static final String TEST_TEXT2 = "A CBS televised debate between Australia's " + // "candidates for Prime Minister in the upcoming US election has been rescheduled " + // "and shortend, to avoid a clash with popular cookery sow MasterChef."; private static final ContentItemFactory ciFactory = InMemoryContentItemFactory.getInstance(); private static final String TEST_REFERENCED_SITE_NAME = "dummRefSiteName"; private static Value<PhraseTag> NOUN_PHRASE = Value.value(new PhraseTag("NP",LexicalCategory.Noun),1d); static TestSearcherImpl searcher; public static final IRI NAME = new IRI(NamespaceEnum.rdfs+"label"); public static final IRI TYPE = new IRI(NamespaceEnum.rdf+"type"); public static final IRI REDIRECT = new IRI(NamespaceEnum.rdfs+"seeAlso"); @BeforeClass public static void setUpServices() throws IOException { searcher = new TestSearcherImpl(TEST_REFERENCED_SITE_NAME,NAME,new SimpleLabelTokenizer()); //add some terms to the searcher Graph graph = new IndexedGraph(); IRI uri = new IRI("urn:test:PatrickMarshall"); graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("Patrick Marshall"))); graph.add(new TripleImpl(uri, TYPE, OntologicalClasses.DBPEDIA_PERSON)); searcher.addEntity(new Entity(uri, graph)); uri = new IRI("urn:test:Geologist"); graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("Geologist"))); graph.add(new TripleImpl(uri, TYPE, new IRI(NamespaceEnum.skos+"Concept"))); graph.add(new TripleImpl(uri, REDIRECT, new IRI("urn:test:redirect:Geologist"))); searcher.addEntity(new Entity(uri, graph)); //a redirect uri = new IRI("urn:test:redirect:Geologist"); graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("Geologe (redirect)"))); graph.add(new TripleImpl(uri, TYPE, new IRI(NamespaceEnum.skos+"Concept"))); searcher.addEntity(new Entity(uri, graph)); uri = new IRI("urn:test:NewZealand"); graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("New Zealand"))); graph.add(new TripleImpl(uri, TYPE, OntologicalClasses.DBPEDIA_PLACE)); searcher.addEntity(new Entity(uri, graph)); uri = new IRI("urn:test:UniversityOfOtago"); graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("University of Otago"))); graph.add(new TripleImpl(uri, TYPE, OntologicalClasses.DBPEDIA_ORGANISATION)); searcher.addEntity(new Entity(uri, graph)); uri = new IRI("urn:test:University"); graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("University"))); graph.add(new TripleImpl(uri, TYPE, new IRI(NamespaceEnum.skos+"Concept"))); searcher.addEntity(new Entity(uri, graph)); uri = new IRI("urn:test:Otago"); graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("Otago"))); graph.add(new TripleImpl(uri, TYPE, OntologicalClasses.DBPEDIA_PLACE)); searcher.addEntity(new Entity(uri, graph)); //add a 2nd Otago (Place and University uri = new IRI("urn:test:Otago_Texas"); graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("Otago (Texas)"))); graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("Otago"))); graph.add(new TripleImpl(uri, TYPE, OntologicalClasses.DBPEDIA_PLACE)); searcher.addEntity(new Entity(uri, graph)); uri = new IRI("urn:test:UniversityOfOtago_Texas"); graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("University of Otago (Texas)"))); graph.add(new TripleImpl(uri, TYPE, OntologicalClasses.DBPEDIA_ORGANISATION)); searcher.addEntity(new Entity(uri, graph)); TEST_ANALYSED_TEXT = AnalysedTextFactory.getDefaultInstance().createAnalysedText( ciFactory.createBlob(new StringSource(TEST_TEXT))); TEST_ANALYSED_TEXT_WO = AnalysedTextFactory.getDefaultInstance().createAnalysedText( ciFactory.createBlob(new StringSource(TEST_TEXT_WO))); initAnalyzedText(TEST_ANALYSED_TEXT); TEST_ANALYSED_TEXT.addChunk(0, "Dr. Patrick Marshall".length()).addAnnotation(PHRASE_ANNOTATION, NOUN_PHRASE); TEST_ANALYSED_TEXT.addToken(4, 11).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NP",Pos.ProperNoun),1d)); TEST_ANALYSED_TEXT.addToken(12, 20).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NP",Pos.ProperNoun),1d)); initAnalyzedText(TEST_ANALYSED_TEXT_WO); TEST_ANALYSED_TEXT_WO.addChunk(0, "Dr. Marshall Patrick".length()).addAnnotation(PHRASE_ANNOTATION, NOUN_PHRASE); TEST_ANALYSED_TEXT_WO.addToken(4, 12).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NP",Pos.ProperNoun),1d)); TEST_ANALYSED_TEXT_WO.addToken(13, 20).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NP",Pos.ProperNoun),1d)); } /** * @param nounPhrase */ private static void initAnalyzedText(AnalysedText at) { at.addSentence(0, TEST_ANALYSED_TEXT.getEnd()); at.addChunk(TEST_TEXT.indexOf("New Zealand"), TEST_TEXT.indexOf("New Zealand")+"New Zealand".length()) .addAnnotation(PHRASE_ANNOTATION, NOUN_PHRASE); at.addChunk(TEST_TEXT.indexOf("geologist"), TEST_TEXT.indexOf("geologist")+"geologist".length()) .addAnnotation(PHRASE_ANNOTATION, NOUN_PHRASE); at.addChunk(TEST_TEXT.indexOf("the University of Otago"), TEST_TEXT.length()-1).addAnnotation(PHRASE_ANNOTATION, NOUN_PHRASE); //add some tokens at.addToken(0, 2).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NE",Pos.Abbreviation),1d)); at.addToken(2, 3).addAnnotation(POS_ANNOTATION, Value.value(new PosTag(".",Pos.Point),1d)); int start = TEST_TEXT.indexOf("(1869 - November 1950)"); at.addToken(start,start+1).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("(",Pos.OpenBracket),1d)); at.addToken(start+1,start+5).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NUM",Pos.Numeral),1d)); at.addToken(start+6,start+7).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("-",Pos.Hyphen),1d)); at.addToken(start+8,start+16).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NE",Pos.CommonNoun),1d)); at.addToken(start+17,start+21).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NUM",Pos.Numeral),1d)); at.addToken(start+21,start+22).addAnnotation(POS_ANNOTATION, Value.value(new PosTag(")",Pos.CloseBracket),1d)); at.addToken(start+23, start+26).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("O",LexicalCategory.Adjective))); at.addToken(start+27, start+28).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("A", LexicalCategory.Adposition))); start = TEST_TEXT.indexOf("geologist"); at.addToken(start,start+9).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NE",Pos.CommonNoun),1d)); at.addToken(start+10, start+13).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("O", LexicalCategory.Adjective))); at.addToken(start+14, start+19).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("V", LexicalCategory.Verb))); at.addToken(start+20, start+22).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("PP", LexicalCategory.PronounOrDeterminer))); start = TEST_TEXT.indexOf("New Zealand"); at.addToken(start,start+3).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NE",Pos.CommonNoun),1d)); at.addToken(start+4,start+11).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NP",Pos.ProperNoun),1d)); //add filler Tokens for "and worked at" at.addToken(start+12, start+15).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("O", LexicalCategory.Adjective))); at.addToken(start+16, start+22).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("V", LexicalCategory.Verb))); at.addToken(start+23, start+25).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("PP", LexicalCategory.PronounOrDeterminer))); start = TEST_TEXT.indexOf("the University of Otago"); at.addToken(start,start+3).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("ART",Pos.Article),1d)); at.addToken(start+4,start+14).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NE",Pos.CommonNoun),1d)); at.addToken(start+15,start+17).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("OF",Pos.Preposition),1d)); at.addToken(start+18,start+23).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NP",Pos.ProperNoun),1d)); at.addToken(start+23,start+24).addAnnotation(POS_ANNOTATION, Value.value(new PosTag(".",Pos.Point),1d)); } private LabelTokenizer labelTokenizer = new SimpleLabelTokenizer(); @Before public void bindServices() throws IOException { } @After public void unbindServices() { } @AfterClass public static void shutdownServices() { } public static ContentItem getContentItem(final String id, final String text) throws IOException { return ciFactory.createContentItem(new IRI(id),new StringSource(text)); } /** * This tests the EntityLinker functionality (if the expected Entities * are linked). In this case with the default configurations for * {@link LexicalCategory#Noun}. * @throws Exception */ @Test public void testEntityLinkerWithNouns() throws Exception { LanguageProcessingConfig tpc = new LanguageProcessingConfig(); tpc.setLinkedLexicalCategories(LanguageProcessingConfig.DEFAULT_LINKED_LEXICAL_CATEGORIES); tpc.setLinkedPos(Collections.EMPTY_SET); EntityLinkerConfig config = new EntityLinkerConfig(); config.setMinFoundTokens(2);//this is assumed by this test config.setRedirectProcessingMode(RedirectProcessingMode.FOLLOW); EntityLinker linker = new EntityLinker(TEST_ANALYSED_TEXT,"en", tpc, searcher, config, labelTokenizer); linker.process(); Map<String,List<String>> expectedResults = new HashMap<String,List<String>>(); expectedResults.put("Patrick Marshall", new ArrayList<String>( Arrays.asList("urn:test:PatrickMarshall"))); expectedResults.put("geologist", new ArrayList<String>( Arrays.asList("urn:test:redirect:Geologist"))); //the redirected entity expectedResults.put("New Zealand", new ArrayList<String>( Arrays.asList("urn:test:NewZealand"))); expectedResults.put("University of Otago", new ArrayList<String>( Arrays.asList("urn:test:UniversityOfOtago","urn:test:UniversityOfOtago_Texas"))); validateEntityLinkerResults(linker, expectedResults); } /** * This tests the EntityLinker functionality (if the expected Entities * are linked). In this case with the default configurations for * {@link LexicalCategory#Noun}. * @throws Exception */ @Test public void testEntityLinkerWithWrongOrder() throws Exception { LanguageProcessingConfig tpc = new LanguageProcessingConfig(); tpc.setLinkedLexicalCategories(LanguageProcessingConfig.DEFAULT_LINKED_LEXICAL_CATEGORIES); tpc.setLinkedPos(Collections.EMPTY_SET); tpc.setIgnoreChunksState(true); //to emulate pre STANBOL-1211 EntityLinkerConfig config = new EntityLinkerConfig(); config.setMinFoundTokens(2);//this is assumed by this test config.setRedirectProcessingMode(RedirectProcessingMode.FOLLOW); EntityLinker linker = new EntityLinker(TEST_ANALYSED_TEXT_WO,"en", tpc, searcher, config, labelTokenizer); linker.process(); Map<String,List<String>> expectedResults = new HashMap<String,List<String>>(); expectedResults.put("Marshall Patrick", new ArrayList<String>( Arrays.asList("urn:test:PatrickMarshall"))); expectedResults.put("geologist", new ArrayList<String>( Arrays.asList("urn:test:redirect:Geologist"))); //the redirected entity expectedResults.put("New Zealand", new ArrayList<String>( Arrays.asList("urn:test:NewZealand"))); expectedResults.put("University of Otago", new ArrayList<String>( Arrays.asList("urn:test:UniversityOfOtago","urn:test:UniversityOfOtago_Texas"))); validateEntityLinkerResults(linker, expectedResults); } /** * This tests the EntityLinker functionality (if the expected Entities * are linked). In this case with the default configurations for * {@link Pos#ProperNoun}. * @throws Exception */ @Test public void testEntityLinkerWithProperNouns() throws Exception { LanguageProcessingConfig tpc = new LanguageProcessingConfig(); tpc.setLinkedLexicalCategories(Collections.EMPTY_SET); tpc.setLinkedPos(LanguageProcessingConfig.DEFAULT_LINKED_POS); EntityLinkerConfig config = new EntityLinkerConfig(); config.setMinFoundTokens(2);//this is assumed by this test config.setRedirectProcessingMode(RedirectProcessingMode.FOLLOW); EntityLinker linker = new EntityLinker(TEST_ANALYSED_TEXT,"en", tpc, searcher, config, labelTokenizer); linker.process(); Map<String,List<String>> expectedResults = new HashMap<String,List<String>>(); expectedResults.put("Patrick Marshall", new ArrayList<String>( Arrays.asList("urn:test:PatrickMarshall"))); //Geologist is a common noun and MUST NOT be found //expectedResults.put("geologist", new ArrayList<String>( // Arrays.asList("urn:test:redirect:Geologist"))); //the redirected entity expectedResults.put("New Zealand", new ArrayList<String>( Arrays.asList("urn:test:NewZealand"))); expectedResults.put("University of Otago", new ArrayList<String>( Arrays.asList("urn:test:UniversityOfOtago","urn:test:UniversityOfOtago_Texas"))); validateEntityLinkerResults(linker, expectedResults); } private void validateEntityLinkerResults(EntityLinker linker, Map<String,List<String>> expectedResults) { log.info("---------------------"); log.info("- Validating Results-"); log.info("---------------------"); for(LinkedEntity linkedEntity : linker.getLinkedEntities().values()){ log.info("> LinkedEntity {}",linkedEntity); List<String> expectedSuggestions = expectedResults.remove(linkedEntity.getSelectedText()); assertNotNull("LinkedEntity '"+linkedEntity.getSelectedText()+ "' is not an expected Result (or was found twice)", expectedSuggestions); linkedEntity.getSuggestions().iterator(); assertEquals("Number of suggestions "+linkedEntity.getSuggestions().size()+ " != number of expected suggestions "+expectedSuggestions.size()+ "for selection "+linkedEntity.getSelectedText() + "(Expected: " + expectedSuggestions +")", linkedEntity.getSuggestions().size(), expectedSuggestions.size()); double score = linkedEntity.getScore(); for(int i=0;i<expectedSuggestions.size();i++){ Suggestion suggestion = linkedEntity.getSuggestions().get(i); assertEquals("Expecced Suggestion at Rank "+i+" expected: "+ expectedSuggestions.get(i)+" suggestion: "+ suggestion.getEntity().getId(), expectedSuggestions.get(i), suggestion.getEntity().getId()); assertTrue("Score of suggestion "+i+"("+suggestion.getScore()+ " > as of the previous one ("+score+")", score >= suggestion.getScore()); score = suggestion.getScore(); } } assertTrue("The expected Result(s) "+expectedResults+" wehre not found", expectedResults.isEmpty()); } /** * This tests if the Enhancements created by the Engine confirm to the * rules defined for the Stanbol Enhancement Structure. * @throws IOException * @throws EngineException */ @Test public void testEngine() throws IOException, EngineException { EntityLinkerConfig linkerConfig = new EntityLinkerConfig(); linkerConfig.setRedirectProcessingMode(RedirectProcessingMode.FOLLOW); linkerConfig.setMinFoundTokens(2);//this is assumed by this test EntityLinkingEngine engine = new EntityLinkingEngine("dummy", searcher, new TextProcessingConfig(), linkerConfig, labelTokenizer); ContentItem ci = ciFactory.createContentItem(new StringSource(TEST_TEXT)); //tells the engine that this is an English text ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("en"))); //and add the AnalysedText instance used for this test ci.addPart(AnalysedText.ANALYSED_TEXT_URI, TEST_ANALYSED_TEXT); //compute the enhancements engine.computeEnhancements(ci); //validate the enhancement results Map<IRI,RDFTerm> expectedValues = new HashMap<IRI,RDFTerm>(); expectedValues.put(ENHANCER_EXTRACTED_FROM, ci.getUri()); expectedValues.put(DC_CREATOR,LiteralFactory.getInstance().createTypedLiteral( engine.getClass().getName())); //adding null as expected for confidence makes it a required property expectedValues.put(Properties.ENHANCER_CONFIDENCE, null); //validate create fise:TextAnnotations int numTextAnnotations = validateAllTextAnnotations(ci.getMetadata(), TEST_TEXT, expectedValues); assertEquals("Four fise:TextAnnotations are expected by this Test", 4, numTextAnnotations); //validate create fise:EntityAnnotations int numEntityAnnotations = validateAllEntityAnnotations(ci, expectedValues); assertEquals("Five fise:EntityAnnotations are expected by this Test", 5, numEntityAnnotations); } /** * Similar to {@link EnhancementStructureHelper#validateAllEntityAnnotations(org.apache.clerezza.commons.rdf.Graph, Map)} * but in addition checks fise:confidence [0..1] and entityhub:site properties * @param ci * @param expectedValues * @return */ private static int validateAllEntityAnnotations(ContentItem ci, Map<IRI,RDFTerm> expectedValues){ Iterator<Triple> entityAnnotationIterator = ci.getMetadata().filter(null, RDF_TYPE, ENHANCER_ENTITYANNOTATION); int entityAnnotationCount = 0; while (entityAnnotationIterator.hasNext()) { IRI entityAnnotation = (IRI) entityAnnotationIterator.next().getSubject(); // test if selected Text is added validateEntityAnnotation(ci.getMetadata(), entityAnnotation, expectedValues); //validate also that the confidence is between [0..1] Iterator<Triple> confidenceIterator = ci.getMetadata().filter(entityAnnotation, ENHANCER_CONFIDENCE, null); //Confidence is now checked by the EnhancementStructureHelper (STANBOL-630) // assertTrue("Expected fise:confidence value is missing (entityAnnotation " // +entityAnnotation+")",confidenceIterator.hasNext()); // Double confidence = LiteralFactory.getInstance().createObject(Double.class, // (TypedLiteral)confidenceIterator.next().getObject()); // assertTrue("fise:confidence MUST BE <= 1 (value= '"+confidence // + "',entityAnnotation " +entityAnnotation+")", // 1.0 >= confidence.doubleValue()); // assertTrue("fise:confidence MUST BE >= 0 (value= '"+confidence // +"',entityAnnotation "+entityAnnotation+")", // 0.0 <= confidence.doubleValue()); //Test the entityhub:site property (STANBOL-625) IRI ENTITYHUB_SITE = new IRI(NamespaceEnum.entityhub+"site"); Iterator<Triple> entitySiteIterator = ci.getMetadata().filter(entityAnnotation, ENTITYHUB_SITE, null); assertTrue("Expected entityhub:site value is missing (entityAnnotation " +entityAnnotation+")",entitySiteIterator.hasNext()); RDFTerm siteResource = entitySiteIterator.next().getObject(); assertTrue("entityhub:site values MUST BE Literals", siteResource instanceof Literal); assertEquals("'"+TEST_REFERENCED_SITE_NAME+"' is expected as " + "entityhub:site value", TEST_REFERENCED_SITE_NAME, ((Literal)siteResource).getLexicalForm()); assertFalse("entityhub:site MUST HAVE only a single value", entitySiteIterator.hasNext()); entityAnnotationCount++; } return entityAnnotationCount; } }