/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.enhancer.engines.textannotationnewmodel.impl;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_TEXTANNOTATION;
import java.io.IOException;
import java.io.InputStream;
import java.util.Collections;
import java.util.Dictionary;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.Map;
import org.apache.clerezza.commons.rdf.Graph;
import org.apache.clerezza.commons.rdf.BlankNodeOrIRI;
import org.apache.clerezza.commons.rdf.Triple;
import org.apache.clerezza.commons.rdf.IRI;
import org.apache.clerezza.rdf.core.serializedform.SupportedFormat;
import org.apache.clerezza.rdf.jena.parser.JenaParserProvider;
import org.apache.stanbol.commons.indexedgraph.IndexedGraph;
import org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.impl.StringSource;
import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses;
import org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.osgi.service.cm.ConfigurationException;
import org.osgi.service.component.ComponentContext;
import org.apache.clerezza.commons.rdf.RDFTerm;
public class TextAnnotationNewModelEngineTest {
public static final String SINGLE_SENTENCE = "Dr Patrick Marshall (1869 - November 1950) was a"
+ " geologist who lived in New Zealand and worked at the University of Otago.";
private static final String TEST_ENHANCEMENTS = "enhancement-results.rdf";
private static final JenaParserProvider rdfParser = new JenaParserProvider();
private static Graph origEnhancements;
private static IRI ciUri;
private ContentItem contentItem;
private static TextAnnotationsNewModelEngine engine;
private final ContentItemFactory ciFactory = InMemoryContentItemFactory.getInstance();
private static ComponentContext ctx;
@BeforeClass
public static void init() throws IOException, ConfigurationException {
InputStream in = TextAnnotationNewModelEngineTest.class.getClassLoader().getResourceAsStream(TEST_ENHANCEMENTS);
Assert.assertNotNull("Unable to load reaource '"+TEST_ENHANCEMENTS+"' via Classpath",in);
origEnhancements = new IndexedGraph();
rdfParser.parse(origEnhancements, in, SupportedFormat.RDF_XML, null);
Assert.assertFalse(origEnhancements.isEmpty());
//parse the ID of the ContentItem form the enhancements
Iterator<Triple> it = origEnhancements.filter(null, Properties.ENHANCER_EXTRACTED_FROM, null);
Assert.assertTrue(it.hasNext());
RDFTerm id = it.next().getObject();
Assert.assertTrue(id instanceof IRI);
ciUri = (IRI)id;
//validate that the enhancements in the file are valid
//NOTE: the input data are no longer fully valid to test some features of this engine
// because of that this initial test is deactivated
// EnhancementStructureHelper.validateAllTextAnnotations(
// origEnhancements, SINGLE_SENTENCE, null,
// false); //those do not yet contain fise:selection-prefix/suffix values
//init the engine
engine = new TextAnnotationsNewModelEngine();
Dictionary<String,Object> config = new Hashtable<String,Object>();
config.put(EnhancementEngine.PROPERTY_NAME, "test-engine");
config.put(TextAnnotationsNewModelEngine.PROPERTY_PREFIX_SUFFIX_SIZE, Integer.valueOf(10));
ctx = new MockComponentContext(config);
engine.activate(ctx);
}
@Before
public void initTest() throws IOException {
contentItem = ciFactory.createContentItem(ciUri,
new StringSource(SINGLE_SENTENCE), new IndexedGraph(origEnhancements));
}
@Test
public void testTextAnnotationNewModel() throws EngineException {
Assert.assertEquals(EnhancementEngine.ENHANCE_ASYNC, engine.canEnhance(contentItem));
engine.computeEnhancements(contentItem);
//validate
Graph g = contentItem.getMetadata();
Iterator<Triple> it = g.filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
Assert.assertTrue(it.hasNext());
while(it.hasNext()){
BlankNodeOrIRI ta = it.next().getSubject();
Assert.assertTrue(ta instanceof IRI);
Map<IRI,RDFTerm> expected = new HashMap<IRI,RDFTerm>();
expected.put(Properties.ENHANCER_EXTRACTED_FROM, contentItem.getUri());
EnhancementStructureHelper.validateTextAnnotation(g, (IRI)ta, SINGLE_SENTENCE, expected,true);
}
}
@After
public void afterTest(){
contentItem = null;
}
@AfterClass
public static void cleanup(){
engine.deactivate(ctx);
}
}