/**
* This file is part of General Entity Annotator Benchmark.
*
* General Entity Annotator Benchmark is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* General Entity Annotator Benchmark is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with General Entity Annotator Benchmark. If not, see <http://www.gnu.org/licenses/>.
*/
package org.aksw.gerbil.matching.impl;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import org.aksw.gerbil.matching.EvaluationCounts;
import org.aksw.gerbil.semantic.kb.SimpleWhiteListBasedUriKBClassifier;
import org.aksw.gerbil.semantic.subclass.SimpleSubClassInferencerFactory;
import org.aksw.gerbil.transfer.nif.data.TypedNamedEntity;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.vocabulary.RDF;
import com.hp.hpl.jena.vocabulary.RDFS;
@RunWith(Parameterized.class)
public class HierarchicalMatchingsCounterTest {
public static final String KNOWN_KB_URIS[] = new String[] { "http://example.org/" };
/**
* <p>
* Some of the test cases have been taken from
* "Evaluation Measures for Hierarchical Classification: a unified view and novel approaches"
* by Kosmopoulos et al.
* </p>
*
* <p>
* Test 1: Overspecialization (figure 2 a) (A is the highest node, C the
* lowest)
*
* <pre>
* A - B - C
* </pre>
*
* gold standard = B <br>
* annotator = C
* </p>
* <p>
* Test 2: Underspecialization (figure 2 b) (reusing the model from above)
* <br>
* gold standard = C <br>
* annotator = B
* </p>
* <p>
* Test 3: Overspecialization (reusing the model from above)<br>
* gold standard = B,C <br>
* annotator = C
* </p>
* <p>
* Test 4: Underspecialization (reusing the model from above)<br>
* gold standard = B,C <br>
* annotator = A
* </p>
* <p>
* Test 5: Exact Matching (reusing the model from above)<br>
* gold standard = B <br>
* annotator = B
* </p>
* <p>
* Test 6: Exact Matching (reusing the model from above)<br>
* gold standard = B,C <br>
* annotator = B
* </p>
* <p>
* Test 7: Exact Matching (reusing the model from above)<br>
* gold standard = B <br>
* annotator = B,C
* </p>
* <p>
* Test 8: Alternative paths (figure 2 c)
*
* <pre>
* A
* / \
* B C
* | |
* | D
* \ /
* E
* </pre>
*
* gold standard = E <br>
* annotator = A
* </p>
* <p>
* Test 9: Pairing problem (figure 2 d)
*
* <pre>
* A
* / \
* B C
* / \ \
* D E F
* </pre>
*
* gold standard = B, F <br>
* annotator = D, E
* </p>
* <p>
* Test 10: Long distance problem (figure 2 d) (reusing the model from
* above) <br>
* gold standard = D <br>
* annotator = F
* </p>
* <p>
* Test 11: DAG example (figure 8 b)
*
* <pre>
* A
* / | \
* B C D
* / \ |/ยด/|\`\
* E F G H I \ J
* / \|
* K L
* </pre>
*
* gold standard = G, J, K <br>
* annotator = H, K, L
* </p>
* <p>
* Test 12: DAG example (reusing the model from above)<br>
* gold standard = D <br>
* annotator = C
* </p>
*
* @return
*/
@Parameters
public static Collection<Object[]> data() {
List<Object[]> testConfigs = new ArrayList<Object[]>();
Model classModel;
Resource resources[];
/*
* Overspecialization (figure 2 a)
*/
classModel = ModelFactory.createDefaultModel();
resources = createResources(3, classModel);
classModel.add(resources[1], RDFS.subClassOf, resources[0]);
classModel.add(resources[2], RDFS.subClassOf, resources[1]);
testConfigs.add(new Object[] { classModel, new String[] { resources[1].getURI() },
new String[] { resources[2].getURI() }, new int[] { 1, 0, 1 } });
/*
* Underspecialization (figure 2 b) reusing the model from above!
*/
testConfigs.add(new Object[] { classModel, new String[] { resources[2].getURI() },
new String[] { resources[1].getURI() }, new int[] { 1, 1, 0 } });
/*
* Overspecialization (reusing the model from above)
*/
testConfigs.add(new Object[] { classModel, new String[] { resources[1].getURI(), resources[2].getURI() },
new String[] { resources[2].getURI() }, new int[] { 1, 0, 1 } });
/*
* Underspecialization (reusing the model from above)
*/
testConfigs.add(new Object[] { classModel, new String[] { resources[1].getURI(), resources[2].getURI() },
new String[] { resources[0].getURI() }, new int[] { 2, 1, 0 } });
/*
* Exact matching (reusing the model from above)
*/
testConfigs.add(new Object[] { classModel, new String[] { resources[1].getURI() },
new String[] { resources[1].getURI() }, new int[] { 2, 0, 0 } });
/*
* Exact matching (reusing the model from above)
*/
testConfigs.add(new Object[] { classModel, new String[] { resources[1].getURI(), resources[2].getURI() },
new String[] { resources[1].getURI() }, new int[] { 2, 0, 0 } });
/*
* Exact matching (reusing the model from above)
*/
testConfigs.add(new Object[] { classModel, new String[] { resources[1].getURI() },
new String[] { resources[1].getURI(), resources[2].getURI() }, new int[] { 2, 0, 0 } });
/*
* Alternative paths (figure 2 c)
*/
classModel = ModelFactory.createDefaultModel();
resources = createResources(5, classModel);
classModel.add(resources[1], RDFS.subClassOf, resources[0]);
classModel.add(resources[2], RDFS.subClassOf, resources[0]);
classModel.add(resources[3], RDFS.subClassOf, resources[2]);
classModel.add(resources[4], RDFS.subClassOf, resources[3]);
classModel.add(resources[4], RDFS.subClassOf, resources[1]);
testConfigs.add(new Object[] { classModel, new String[] { resources[4].getURI() },
new String[] { resources[0].getURI() }, new int[] { 1, 4, 0 } });
/*
* Pairing problem (figure 2 d)
*/
classModel = ModelFactory.createDefaultModel();
resources = createResources(6, classModel);
classModel.add(resources[1], RDFS.subClassOf, resources[0]);
classModel.add(resources[2], RDFS.subClassOf, resources[0]);
classModel.add(resources[3], RDFS.subClassOf, resources[1]);
classModel.add(resources[4], RDFS.subClassOf, resources[1]);
classModel.add(resources[5], RDFS.subClassOf, resources[2]);
testConfigs.add(new Object[] { classModel, new String[] { resources[1].getURI(), resources[5].getURI() },
new String[] { resources[3].getURI(), resources[4].getURI() }, new int[] { 2, 0, 2 } });
/*
* Long distance problem (figure 2 d) reusing the model from above!
*/
testConfigs.add(new Object[] { classModel, new String[] { "http://example.org/D" },
new String[] { "http://example.org/F" }, new int[] { 0, 1, 1 } });
/*
* DAG example (figure 8 b)
*/
classModel = ModelFactory.createDefaultModel();
resources = createResources(12, classModel);
classModel.add(resources[1], RDFS.subClassOf, resources[0]);
classModel.add(resources[2], RDFS.subClassOf, resources[0]);
classModel.add(resources[3], RDFS.subClassOf, resources[0]);
classModel.add(resources[4], RDFS.subClassOf, resources[1]);
classModel.add(resources[5], RDFS.subClassOf, resources[1]);
classModel.add(resources[6], RDFS.subClassOf, resources[2]);
classModel.add(resources[6], RDFS.subClassOf, resources[3]);
classModel.add(resources[7], RDFS.subClassOf, resources[3]);
classModel.add(resources[8], RDFS.subClassOf, resources[3]);
classModel.add(resources[9], RDFS.subClassOf, resources[3]);
classModel.add(resources[10], RDFS.subClassOf, resources[8]);
classModel.add(resources[11], RDFS.subClassOf, resources[3]);
classModel.add(resources[11], RDFS.subClassOf, resources[8]);
testConfigs.add(new Object[] { classModel,
new String[] { resources[6].getURI(), resources[9].getURI(), resources[10].getURI() },
new String[] { resources[7].getURI(), resources[10].getURI(), resources[11].getURI() },
new int[] { 1, 2, 2 } });
/*
* DAG example (reusing the model from above)
*/
testConfigs.add(new Object[] { classModel, new String[] { resources[3].getURI() },
new String[] { resources[2].getURI() }, new int[] { 1, 1, 6 } });
return testConfigs;
}
private Model typeHierarchy;
private String goldStandardTypes[];
private String annotatorResults[];
private EvaluationCounts expectedCounts;
public HierarchicalMatchingsCounterTest(Model typeHierarchy, String[] goldStandardTypes, String[] annotatorResults,
int[] expectedCounts) {
this.typeHierarchy = typeHierarchy;
this.goldStandardTypes = goldStandardTypes;
this.annotatorResults = annotatorResults;
this.expectedCounts = new EvaluationCounts(expectedCounts[0], expectedCounts[1], expectedCounts[2]);
}
@Test
public void test() {
HierarchicalMatchingsCounter<TypedNamedEntity> counter = new HierarchicalMatchingsCounter<TypedNamedEntity>(
new WeakSpanMatchingsSearcher<TypedNamedEntity>(),
new SimpleWhiteListBasedUriKBClassifier(KNOWN_KB_URIS),
SimpleSubClassInferencerFactory.createInferencer(typeHierarchy));
List<TypedNamedEntity> annotatorResult = new ArrayList<TypedNamedEntity>();
annotatorResult.add(createTypedNamedEntities(annotatorResults, 0));
List<TypedNamedEntity> goldStandard = new ArrayList<TypedNamedEntity>();
goldStandard.add(createTypedNamedEntities(goldStandardTypes, 0));
List<EvaluationCounts> evalCounts = counter.countMatchings(annotatorResult, goldStandard);
Assert.assertNotNull(evalCounts);
Assert.assertTrue(evalCounts.size() > 0);
Assert.assertEquals("Arrays do not equal exp=" + expectedCounts + " calculated=" + evalCounts.get(0),
expectedCounts, evalCounts.get(0));
}
public static TypedNamedEntity createTypedNamedEntities(String types[], int id) {
return new TypedNamedEntity(id * 2, (id * 2) + 1, KNOWN_KB_URIS[0] + "entity_" + id,
new HashSet<String>(Arrays.asList(types)));
}
public static Resource[] createResources(int numberOfResources, Model classModel) {
Resource resources[] = new Resource[numberOfResources];
int startChar = (int) 'A';
for (int i = 0; i < resources.length; ++i) {
resources[i] = classModel.createResource(KNOWN_KB_URIS[0] + ((char) (startChar + i)));
classModel.add(resources[i], RDF.type, RDFS.Class);
}
return resources;
}
}