/*******************************************************************************
* Copyright 2013
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package de.tudarmstadt.ukp.csniper.webapp.statistics;
import static org.junit.Assert.assertEquals;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import javax.annotation.Resource;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
import de.tudarmstadt.ukp.csniper.webapp.evaluation.EvaluationRepository;
import de.tudarmstadt.ukp.csniper.webapp.evaluation.model.EvaluationItem;
import de.tudarmstadt.ukp.csniper.webapp.evaluation.model.EvaluationResult;
import de.tudarmstadt.ukp.csniper.webapp.project.model.AnnotationType;
import de.tudarmstadt.ukp.csniper.webapp.statistics.SortableAggregatedEvaluationResultDataProvider;
/**
* The expected results were obtained using<br>
* http://en.wikipedia.org/wiki/Fleiss%27_kappa#Equations<br>
* http://justusrandolph.net/kappa/
*/
@RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration("file:src/main/webapp/WEB-INF/databaseContext.xml")
public class FleissKappaTest
{
private final static String CORRECT = "Correct";
private final static String WRONG = "Wrong";
private final static String EMPTY = "";
private static boolean setup = false;
@Resource(name = "evaluationRepository")
private EvaluationRepository service;
private SortableAggregatedEvaluationResultDataProvider dataProvider;
@Ignore
@Test
public void testOneUser()
{
// nothing to compare - only one user
test("c1", "type2", "user1", Double.NaN);
}
@Ignore
@Test
public void testTwoUsers()
{
// agreement above chance
// vector is (0-2, 1-1, 2-0) [Correct-Wrong]
test("c1,c2", "type1,type2", "user3,user4", 0.333);
}
@Ignore
@Test
public void testThreeUsers()
{
// disagreement below chance
// vector is (2-1, 1-2) [Correct-Wrong]
test("c2,c3", "type2", "user1,user2,user4", -0.333);
}
@Ignore
@Test
public void testFourUsers()
{
// disagreement below chance
// vector is (2-2, 3-1, 3-1, 2-2) [Correct-Wrong]
test("c1,c2,c3", "type1,type2", "user1,user2,user3,user4", -0.244);
}
@Ignore
@Test
public void testDismissItem()
{
// this test should not consider the first item since it is not rated by user5
// agreement above chance
// vector is (1-0, 2-0, 0-2, 1-1) [Correct-Wrong]
test("c1,c2,c3", "type1,type2", "user2,user5", 0.333);
}
@Ignore
@Test
public void testCompleteDisagreement()
{
// vector is (1-1, 1-1) [Correct-Wrong]
test("c1,c2,c3", "type2", "user4,user5", -1);
}
@Ignore
@Test
public void testCompleteAgreementOneSided()
{
// this is NaN because of a division by zero in Fleiss' Kappa Formula;
// for these ratings we have P^_e = 1
// vector is (2-0, 2-0) [Correct-Wrong]
test("c1", "type1,type2", "user1,user2", Double.NaN);
}
@Ignore
@Test
public void testCompleteAgreementTwoSided()
{
// vector is (2-0, 0-2) [Correct-Wrong]
test("c1,c2", "type2", "user2,user5", 1);
}
private void test(String aCollectionIds, String aTypes, String aUsers, double aExpected)
{
List<String> collectionIds = asList(aCollectionIds.split(","));
List<AnnotationType> types = new ArrayList<AnnotationType>();
for (String t : asList(aTypes.split(","))) {
types.add(new AnnotationType(t));
}
List<String> users = asList(aUsers.split(","));
double userThreshold = 0.6;
double confidenceThreshold = 0;
dataProvider = new SortableAggregatedEvaluationResultDataProvider(
service.listAggregatedResults(collectionIds, types, users, userThreshold,
confidenceThreshold), users);
double actual = dataProvider.computeInterAnnotatorAgreement();
assertEquals(aExpected, actual, 0.001);
}
@Before
public void setupResults()
{
// run setup only once;
// can't use @BeforeClass, because Spring cannot inject into static fields
if (setup) {
return;
}
else {
setup = true;
}
List<EvaluationItem> items = new ArrayList<EvaluationItem>();
List<EvaluationResult> results = new ArrayList<EvaluationResult>();
items.add(new EvaluationItem("c1", "Doc1", "type1", 0, 1, "textype1"));
results.add(new EvaluationResult(items.get(0), "user1", CORRECT));
results.add(new EvaluationResult(items.get(0), "user2", CORRECT));
results.add(new EvaluationResult(items.get(0), "user3", WRONG));
results.add(new EvaluationResult(items.get(0), "user4", WRONG));
results.add(new EvaluationResult(items.get(0), "user5", EMPTY));
items.add(new EvaluationItem("c1", "Doc2", "type2", 0, 1, "text2"));
results.add(new EvaluationResult(items.get(1), "user1", CORRECT));
results.add(new EvaluationResult(items.get(1), "user2", CORRECT));
results.add(new EvaluationResult(items.get(1), "user3", CORRECT));
results.add(new EvaluationResult(items.get(1), "user4", WRONG));
results.add(new EvaluationResult(items.get(1), "user5", CORRECT));
items.add(new EvaluationItem("c2", "Doc3", "type2", 0, 1, "textype2"));
results.add(new EvaluationResult(items.get(2), "user1", CORRECT));
results.add(new EvaluationResult(items.get(2), "user2", WRONG));
results.add(new EvaluationResult(items.get(2), "user3", CORRECT));
results.add(new EvaluationResult(items.get(2), "user4", CORRECT));
results.add(new EvaluationResult(items.get(2), "user5", WRONG));
items.add(new EvaluationItem("c3", "Doc4", "type2", 0, 1, "text4"));
results.add(new EvaluationResult(items.get(3), "user1", CORRECT));
results.add(new EvaluationResult(items.get(3), "user2", WRONG));
results.add(new EvaluationResult(items.get(3), "user3", CORRECT));
results.add(new EvaluationResult(items.get(3), "user4", WRONG));
results.add(new EvaluationResult(items.get(3), "user5", CORRECT));
items = service.writeEvaluationItems(items);
service.writeEvaluationResults(results);
}
private List<String> asList(String... aStrings)
{
return new ArrayList<String>(Arrays.asList(aStrings));
}
}