/** * This file is part of General Entity Annotator Benchmark. * * General Entity Annotator Benchmark is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * General Entity Annotator Benchmark is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with General Entity Annotator Benchmark. If not, see <http://www.gnu.org/licenses/>. */ package org.aksw.gerbil.tools; import it.unimi.dsi.fastutil.ints.IntOpenHashSet; import it.unipi.di.acube.batframework.data.Annotation; import it.unipi.di.acube.batframework.data.Tag; import it.unipi.di.acube.batframework.problems.C2WDataset; import it.unipi.di.acube.batframework.problems.D2WDataset; import java.io.File; import java.io.IOException; import java.io.PrintStream; import java.util.HashSet; import java.util.List; import org.aksw.gerbil.dataset.DatasetConfiguration; import org.aksw.gerbil.datatypes.ExperimentType; import org.aksw.gerbil.exceptions.GerbilException; import org.aksw.gerbil.utils.SingletonWikipediaApi; import org.aksw.gerbil.web.config.DatasetsConfig; import org.aksw.gerbil.web.config.RootConfig; import org.apache.commons.io.IOUtils; @Deprecated public class DatasetWikiIdExporter { private static final String EXPORT_FOLDER_NAME = "export"; public static void main(String[] args) { List<DatasetConfiguration> datasetConfigs = DatasetsConfig.datasets(RootConfig.getEntityCheckerManager(), RootConfig.createSameAsRetriever()) .getConfigurations(); File exportFolder = new File(EXPORT_FOLDER_NAME); if (!exportFolder.exists()) { exportFolder.mkdirs(); } PrintStream output = null; DatasetWikiIdExporter analyzer = new DatasetWikiIdExporter(); for (DatasetConfiguration config : datasetConfigs) { try { output = new PrintStream(exportFolder.getAbsolutePath() + File.separator + config.getName().replaceAll("[/:]", "_") + "_wikiIds.txt"); analyzer.analyzeDataset(config, output); SingletonWikipediaApi.getInstance().flush(); } catch (GerbilException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { IOUtils.closeQuietly(output); } } } public DatasetWikiIdExporter() { } public void analyzeDataset(DatasetConfiguration config, PrintStream output) throws GerbilException { IntOpenHashSet ids = analyzeAsD2W(config); if (ids == null) { ids = analyzeAsC2W(config); } printIds(ids, output); } private IntOpenHashSet analyzeAsC2W(DatasetConfiguration config) throws GerbilException { D2WDataset dataset = (D2WDataset) config.getDataset(ExperimentType.D2KB); if (dataset == null) { return null; } List<HashSet<Annotation>> goldStandard = dataset.getD2WGoldStandardList(); IntOpenHashSet ids = new IntOpenHashSet(); for (HashSet<Annotation> annotations : goldStandard) { for (Annotation annotation : annotations) { ids.add(annotation.getConcept()); } } return ids; } private IntOpenHashSet analyzeAsD2W(DatasetConfiguration config) throws GerbilException { C2WDataset dataset = (C2WDataset) config.getDataset(ExperimentType.C2KB); if (dataset == null) { return null; } List<HashSet<Tag>> goldStandard = dataset.getC2WGoldStandardList(); IntOpenHashSet ids = new IntOpenHashSet(); for (HashSet<Tag> tags : goldStandard) { for (Tag tag : tags) { ids.add(tag.getConcept()); } } return ids; } private void printIds(IntOpenHashSet ids, PrintStream output) { int idArray[] = ids.toArray(new int[ids.size()]); for (int i = 0; i < idArray.length; ++i) { output.println(idArray[i]); } } }