/* * Copyright (C) 2015 Artificial Intelligence * Laboratory @ University of Udine. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ package it.uniud.ailab.dcore.eval; import java.util.Comparator; import java.util.Map; /** * A generic dataset loader. The concrete implementation will take care the duty * to load the results into the appropriate structure and provide a Comparator * implementation that follows the logic of the dataset. * * For example, keyphrase evaluation may be performed both on "simple" keyphrases, * or on lemmatized keyphrases, or on stemmed keyphrases. * * @author Marco Basaldella */ public abstract class GenericDataset implements Comparator<String> { /** * The path where the evaluator will find the input documents and the * gold standard results. */ protected final String datasetPath; /** * The training documents. Each document has an identifier and a content. */ private Map<String, String> trainingDocuments; /** * The expected answers for the test documents. Each list of training answers * is paired to the identifier of the corresponding document. */ private Map<String, String[]> trainingAnswers; /** * The test documents. Each document has an identifier and a content. */ private Map<String, String> testDocuments; /** * The expected answers for the test documents. Each list of test answers * is paired to the identifier of the corresponding document. */ private Map<String, String[]> testAnswers; /** * A value that indicates wheter if the documents have already been loaded * or not. */ private boolean isLoaded; /** * An output-friendly string that identifies the dataset. */ private final String identifier; /** * Create a concrete dataset that will contain the data contained in the * specified path. * * @param datasetPath The folder where the Dataset will look for the document. * @param identifier An output-friendly string that identifies the dataset. */ public GenericDataset(String datasetPath,String identifier) { this.datasetPath = datasetPath; this.isLoaded = false; this.identifier = identifier; } /** * Get an output-friendly string that identifies the dataset. * * @return an output-friendly string that identifies the dataset. */ public String getIdentifier() { return identifier; } /** * Compares a <b>candidate</b> item with a <b>dataset provided</b> item. Please * note that the the object to test <b>must</b> be passed as first parameter, * while the object to test against <b>must</b> be passed as second * parameter. * * @param o1 the object to test, generated by the Distiller * @param o2 the reference object, provided by the training set. * @return 0 if o1 and o2 are equal, another number (indetermined) otherwise. */ @Override public abstract int compare(String o1, String o2); /** * Loads the input documents and returns them. * * @return the input documents mapped with their identifier. */ protected abstract Map<String, String> loadTestSet(); /** * Loads the test set answers and returns them. * * @return the test set answers mapped with the identifier of the * document they belong to. */ protected abstract Map<String, String[]> loadTestAnswers(); /** * Loads the training documents and returns them. * * @return the training documents mapped with their identifier. */ protected abstract Map<String, String> loadTrainingSet(); /** * Loads the training set expected answers and returns them. * * @return the training set answers mapped with the identifier of the * document they belong to. */ protected abstract Map<String, String[]> loadTrainingAnswers(); /** * Get the test set documents for the dataset. * * @return the test set for the dataset. */ public Map<String, String> getTestSet() { return testDocuments; } /** * Get the test set results for the dataset. * * @return the expected results on the test set of the dataset. */ public Map<String, String[]> getTestAnswers() { return testAnswers; } /** * Get the training set documents for the dataset. * * @return the training set for the dataset. */ public Map<String, String> getTrainingSet() { return trainingDocuments; } /** * Get the training set results for the dataset. * * @return the expected results on the training set of the dataset. */ public Map<String, String[]> getTrainingAnswers() { return trainingAnswers; } /** * Gets the dataset status: true if the data has already been loaded, * false otherwise. * * @return the dataset status. */ public boolean isLoaded() { return isLoaded; } /** * Sets the dataset status. * * @param isLoaded the dataset status. */ private void setLoaded(boolean isLoaded) { this.isLoaded = isLoaded; } /** * Load the dataset. */ public void load() { trainingDocuments = loadTrainingSet(); testDocuments = loadTestSet(); trainingAnswers = loadTrainingAnswers(); testAnswers = loadTestAnswers(); setLoaded(true); } /** * Get the folder that contains the training set. * * @return the folder that contains the training set. */ public abstract String getTrainingFolder(); /** * Get the folder that contains the test set. * * @return the folder that contains the test set. */ public abstract String getTestFolder(); }