package org.wikibrain.core.dao;
import gnu.trove.map.TIntDoubleMap;
import gnu.trove.set.TIntSet;
import org.wikibrain.core.WikiBrainException;
import org.wikibrain.core.lang.Language;
import org.wikibrain.core.model.CategoryGraph;
import org.wikibrain.core.model.LocalCategoryMember;
import org.wikibrain.core.model.LocalPage;
import java.util.Collection;
import java.util.Map;
import java.util.Set;
/**
*
* An interface that describes a Dao to determine local category membership.
*
* @author Shilad Sen
* @author Ari Weiland
*
*/
public interface LocalCategoryMemberDao extends Dao<LocalCategoryMember> {
/**
* Returns the best guess at the top level categories associated with a language.
*
* <p>
* This first looks for a language-specific "override" specified in the reference.conf.
* If that fails, it looks for a language-specific mapping for the
* <a href="https://www.wikidata.org/wiki/Q4587687">Category:Main topic classifications
* (Q4587687)</a>Wikidata concept and uses child categories in the language.
* </p>
*
* <p>Be warned: This may fail for some languages.</p>
*
* @param language
* @return
* @throws DaoException
*/
public Set<LocalPage> guessTopLevelCategories(Language language) throws DaoException;
/**
* Supplemental method that saves a membership relationship based on
* a LocalCategory and LocalArticle
* @param category a LocalCategory
* @param article a LocalArticle that is a member of the LocalCategory
* @throws DaoException if there was an error saving the item
* @throws org.wikibrain.core.WikiBrainException if the category and article are in different languages
*/
public void save(LocalPage category, LocalPage article) throws DaoException, WikiBrainException;
public LocalPage getClosestCategory(LocalPage page, Set<LocalPage> candidates, boolean weightedDistance) throws DaoException;
/**
* For each article, identifies the closest category among the specified candidate set.
* Distance is measured using shortest path in the category graph.
*
* @param candidateCategories The categories to consider as candidates (e.g. those considered "top-level").
* @param pageIds If not null, only considers articles in the provided pageIds.
* @param weighted If true, use page-rank weighted edges so paths that traverse more
* general categories are penalized more highly.
* @return Map with candidates as keys and the articles that have them as closest category
* as values. The values are a map of article ids to distances.
* @throws DaoException
*/
Map<LocalPage, TIntDoubleMap> getClosestCategories(Set<LocalPage> candidateCategories, TIntSet pageIds, boolean weighted) throws DaoException;
/**
* See #getClosestCategories with pageIds = null and weighted = true.
* @param topLevelCats
* @return
*/
Map<LocalPage, TIntDoubleMap> getClosestCategories(Set<LocalPage> topLevelCats) throws DaoException;
/**
* Returns distances to specified categories for requested page.
* Distance is measured using shortest path in the category graph.
*
* @param candidateCategories The categories to consider as candidates (e.g. those considered "top-level").
* @param pageId The article id we want to find.
* @param weighted If true, use page-rank weighted edges so paths that traverse more
* general categories are penalized more highly.
* @return Map with article ids as keys and distances to each category id as values.
* @throws DaoException
*
*/
TIntDoubleMap getCategoryDistances(Set<LocalPage> candidateCategories, int pageId, boolean weighted) throws DaoException;
/**
* Gets a collection of page IDs of articles that are members of the category
* specified by the language and category ID
* @param language the language of the category
* @param categoryId the category's ID
* @return a collection of page IDs of articles
* @throws DaoException if there was an error retrieving the pages
*/
public Collection<Integer> getCategoryMemberIds(Language language, int categoryId) throws DaoException;
/**
* Gets a collection of page IDs of articles that are members of the category
* @param localCategory the category
* @return a collection of page IDs of articles
* @throws DaoException if there was an error retrieving the pages
*/
public Collection<Integer> getCategoryMemberIds(LocalPage localCategory) throws DaoException;
/**
* Gets a map of local articles mapped from their page IDs, based on a category
* specified by a language and category ID
* @param language the language of the category
* @param categoryId the category's ID
* @return a map of page IDs to articles
* @throws DaoException if there was an error retrieving the pages
*/
public Map<Integer, LocalPage> getCategoryMembers(Language language, int categoryId) throws DaoException;
/**
* Gets a map of local articles mapped from their page IDs, based on a specified category
* @param localCategory the category to find
* @return a map of page IDs to articles
* @throws DaoException if there was an error retrieving the pages
*/
public Map<Integer, LocalPage> getCategoryMembers(LocalPage localCategory) throws DaoException;
/**
* Gets a collection of page IDs of categories that the article specified by
* the language and category ID is a member of
* @param language the language of the article
* @param articleId the articles's ID
* @return a collection of page IDs of categories
* @throws DaoException if there was an error retrieving the pages
*/
public Collection<Integer> getCategoryIds(Language language, int articleId) throws DaoException;
/**
* Gets a collection of page IDs of categories that the article is a member of
* @param localArticle the article
* @return a collection of page IDs of categories
* @throws DaoException if there was an error retrieving the pages
*/
public Collection<Integer> getCategoryIds(LocalPage localArticle) throws DaoException;
/**
* Gets a map of local categories mapped from their page IDs, based on an article
* specified by a language and article ID
* @param language the language of the article
* @param articleId the article's ID
* @return a map of page IDs to categories
* @throws DaoException if there was an error retrieving the pages
*/
public Map<Integer, LocalPage> getCategories(Language language, int articleId) throws DaoException;
/**
* Gets a map of local categories mapped from their page IDs, based on a specified article
* @param localArticle the article to find
* @return a map of page IDs to categories
* @throws DaoException if there was an error retrieving the pages
*/
public Map<Integer, LocalPage> getCategories(LocalPage localArticle) throws DaoException;
/**
* Returns a compact representation of the category graph.
* The return value of this object is shared and cached, so caller must not change it.
* TODO: make CategoryGraph immutable.
* @param language
* @return
*/
public CategoryGraph getGraph(Language language) throws DaoException;
}