/**
* This file is part of General Entity Annotator Benchmark.
*
* General Entity Annotator Benchmark is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* General Entity Annotator Benchmark is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with General Entity Annotator Benchmark. If not, see <http://www.gnu.org/licenses/>.
*/
package org.aksw.gerbil.dataset.impl.bat;
import it.unipi.di.acube.batframework.data.Tag;
import it.unipi.di.acube.batframework.problems.A2WDataset;
import it.unipi.di.acube.batframework.problems.C2WDataset;
import it.unipi.di.acube.batframework.problems.TopicDataset;
import it.unipi.di.acube.batframework.utils.WikipediaApiInterface;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import org.aksw.gerbil.dataset.Dataset;
import org.aksw.gerbil.dataset.impl.AbstractDataset;
import org.aksw.gerbil.transfer.nif.Document;
import org.aksw.gerbil.transfer.nif.data.DocumentImpl;
import org.aksw.gerbil.utils.bat.BAT2NIF_TranslationHelper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class BatFrameworkDatasetWrapper {
private static final Logger LOGGER = LoggerFactory.getLogger(BatFrameworkDatasetWrapper.class);
public static final String DATASET_NAME_SUFFIX = " (BAT)";
public static final String DATASET_URI_PREFIX = "http://gerbil.aksw.org/BAT-Framework/";
public static Dataset create(TopicDataset dataset, WikipediaApiInterface wikiApi) {
LOGGER.warn("Using wrappers for BAT framework datasets is not recommended!");
if (dataset instanceof A2WDataset) {
return new A2KBDatasetWrapper<A2WDataset>((A2WDataset) dataset, wikiApi);
}
if (dataset instanceof C2WDataset) {
return new C2KBDatasetWrapper<C2WDataset>((C2WDataset) dataset, wikiApi);
}
LOGGER.error("Couldn't find a matching wrapper for \"" + dataset.getClass().getName() + "\". Returning null.");
return null;
}
protected abstract static class AbstractTopicSystemWrapper<T extends TopicDataset> extends AbstractDataset {
protected BAT2NIF_TranslationHelper translater;
protected List<Document> documents;
public AbstractTopicSystemWrapper(T dataset, WikipediaApiInterface wikiApi) {
super(dataset.getName() + DATASET_NAME_SUFFIX);
this.translater = new BAT2NIF_TranslationHelper(wikiApi);
// Create the document list
String uri = DATASET_URI_PREFIX + dataset.getName().replace(' ', '+');
if (!uri.endsWith("/")) {
uri += '/';
}
documents = new ArrayList<Document>(dataset.getSize());
int documentId = 0;
for (String text : dataset.getTextInstanceList()) {
documents.add(new DocumentImpl(text, uri + Integer.toString(documentId)));
++documentId;
}
}
@Override
public int size() {
return documents.size();
}
@Override
public String getName() {
return name;
}
@Override
public List<Document> getInstances() {
return documents;
}
}
protected static class C2KBDatasetWrapper<T extends C2WDataset> extends AbstractTopicSystemWrapper<T> {
public C2KBDatasetWrapper(T dataset, WikipediaApiInterface wikiApi) {
super(dataset, wikiApi);
// Add the tags to the documents
List<HashSet<Tag>> tagLists = dataset.getC2WGoldStandardList();
int documentId = 0;
for (HashSet<Tag> tags : tagLists) {
this.documents.get(documentId).getMarkings().addAll(translater.translateTags(tags));
++documentId;
}
}
}
protected static class A2KBDatasetWrapper<T extends A2WDataset> extends C2KBDatasetWrapper<T> {
public A2KBDatasetWrapper(T dataset, WikipediaApiInterface wikiApi) {
super(dataset, wikiApi);
// Add the annotations to the documents
List<HashSet<it.unipi.di.acube.batframework.data.Annotation>> annotationLists = dataset
.getA2WGoldStandardList();
int documentId = 0;
for (HashSet<it.unipi.di.acube.batframework.data.Annotation> annotations : annotationLists) {
this.documents.get(documentId).getMarkings().addAll(translater.translateAnnotations(annotations));
++documentId;
}
}
}
}