/*
* Article.java - The class holds information about the article node and links-in and links-out
*
* Copyright (c) 2005-2007 Andrew Krizhanovsky /aka at mail.iias.spb.su/
* Distributed under GNU Public License.
*/
package wikipedia.kleinberg;
import wikipedia.data.ArticleIdAndTitle;
import wikipedia.util.*;
import java.util.*;
public class Article extends Node {
public NodeType type;
public final static Article[] NULL_ARTICLE_ARRAY = new Article[0];
private final static int [] NULL_INTEGER_ARRAY = new int [0];
//public final static String[] NULL_STRING_ARRAY = new String[0];
/** x[p] := sum of y[q], for all q pointing to p, i.e. authority weight of p */
public float x; //public ArrayList<Integer> links_in;
public float x_new;
/** y[q] := sum of x[q], for all q pointed to by p; i.e. hub weight of p */
public float y;
public float y_new; //public ArrayList<Integer> links_out;
//public int count_l_from; // SELECT COUNT(*) FROM links WHERE l_from=page_id
//public int count_l_to; // count_l_from > links_in.length
/** The categories data will be printed to GraphViz .dot file if it is true */
public static boolean bdraw_categories = false;
/** id of categories for this article */
//public ArrayList<Integer> id_categories;
public int[] id_categories;
/** true for redirect page, it corresponds to page.page_is_redirect in MySQL.
* This is not NodeType, since redirect page can be (or not) rated as synonym
*/
//public boolean is_redirect;
/** Titles (and id) of redirect pages, analog of "What links here" in WP */
public List<ArticleIdAndTitle> redirect;
// is redirect:
// todo
// see:
// 1. createArticleWithCategories: id < 0
// 2. CategoryBlackList.DeleteUsingBlackList: id = PageTable.getIDByTitleNamespace
// 3. Links.getAllLinksFromNodes: id_from < 0
// 4. CategoryBlackList.inBlackList:0 < cl_from
public Article() {
super.init();
redirect = ArticleIdAndTitle.NULL_ARTICLEIDANDTITLE_LIST;
type= NodeType.DEFAULT; // unknown and initial type's value
x = 1.f;
y = 1.f;
}
/** Creates map <page_id, reference-to-class-Article> */
public static Map<Integer, Article> createMapIdToArticleWithoutRedirects(Article[] nodes) {
Map<Integer, Article> m = new HashMap<Integer, Article>(); // nodes.length);
for(int i=0; i<nodes.length; i++) {
if(nodes[i].page_id > 0) {
m.put(nodes[i].page_id, nodes[i]);
}
}
return m;
}
/** Creates map <page_title, reference-to-class-Article> for articles
* with positive id, i.e. non-redirects.
*/
public static Map<String, Article> createMapTitleToArticleWithoutRedirects(Article[] nodes) {
// -XX:+HeapDumpOnOutOfMemoryError -mn256m -mx512m
Map<String, Article> m = new HashMap<String, Article>(); // nodes.length
for(int i=0; i<nodes.length; i++) {
if(nodes[i].page_id > 0) {
m.put(nodes[i].page_title, nodes[i]);
}
}
return m;
}
// GraphViz functions -------------------------------------------
//
/** Create line: "W1 [label=\"1287\\nРобот\n\\nx=12.0\ny=3.0\"];\n" */
public String GraphVizNode() {
String s_form = "";
switch (type.toInt()) {
case -2: s_form = ",shape=box,style=filled,fillcolor=yellow"; break; // hubs
case -1: s_form = ",shape=polygon,sides=4,peripheries=3"; break; // source
case 0: s_form = ",shape=invtriangle,style=filled,fillcolor=grey"; break; // selected synonyms
case 1: s_form = ",shape=box"; break;
}
return new String("W" + page_id + " [label=\"" +
page_id + "\\n" + page_title + // id and the name
"\\nx=" + x + "\\ny=" + y + "\"" + // x and y values
s_form + // node's shape form
"];\n");
}
/**
* Create edges 1) for links_out like "W1 -> W2;\n"
* 2) for id_categories like "W1 -> C2;\n", W - main articles (nodes), C - category articles
*/
public String GraphVizLinksOut() {
String result = "", bold_edge = "";
int i;
if(null != links_out) {
if (NodeType.HUB == type)
bold_edge = " [style=bold]";
for(i=0; i<links_out.length; i++) {
result += "W" + page_id + " -> " + "W" + links_out[i] + bold_edge + ";\n";
}
}
if( bdraw_categories && null != id_categories)
{
for(i=0; i<id_categories.length; i++) {
result += "W" + page_id + " -> " + "C" + id_categories[i] + " [style=dotted]" + ";\n";
}
}
return result;
}
// eo GraphViz functions -----------------------------------------
/** Joins unique articles from nodes and addend. */
public static Article[] joinUnique(Article[] nodes, Article[] addend) {
int size; // number of unique nodes in
int i, j;
if (null == addend || 0 == addend.length)
return nodes;
if (null == nodes)
return addend;
boolean[] b_addend = new boolean[addend.length];
// calculate size of new array
for (j=0; j<addend.length; j++) {
b_addend[j] = true;
next_addend:
for (i=0; i<nodes.length; i++) {
if ( addend[j].page_id == nodes[i].page_id) {
b_addend[j] = false;
break next_addend;
}
}
}
size = nodes.length;
for (j=0; j<b_addend.length; j++) {
size += (b_addend[j] ? 1 : 0);
}
// fill new array
Article[] result = new Article[size];
for (i=0; i<nodes.length; i++) {
result[i] = nodes[i];
}
int add_index = nodes.length;
for (i=0; i<addend.length; i++) {
if(b_addend[i])
result[add_index ++] = addend[i];
}
return result;
}
/**
* Types: 0 - source article, 1 - root, 2 - base set, 3 - uknnown and initial value
* Rule: type value can decrease, but it do not increase.
*/
public static void SetType (Article[] nodes, NodeType type) {
int i;
if (null == nodes)
return;
for (i=0; i<nodes.length; i++) {
if (nodes[i].type.toInt() > type.toInt())
nodes[i].type = type;
}
}
/** x[p] := sum of y[q], for all q pointing to p
* @param links_in
* @return Return absolute value of difference between old and new value
*/
public void CalculateNewX(Map<Integer, Article> nodes, int n_nodes) {
x_new = 0.f;
if(null != links_in) {
for (int i=0; i<links_in.length; i++) {
x_new += nodes.get( links_in[i] ).y;
}
}
}
/** y[q] := sum of x[q], for all q pointed to by p
* @param links_out
*/
public void CalculateNewY(Map<Integer, Article> nodes, int n_nodes) {
y_new = 0.f;
if(null != links_out) {
for (int i=0; i<links_out.length; i++) {
y_new += nodes.get( links_out[i] ).x;
}
}
}
/*
public static void NormalizeXY (HashMap<Integer, Article> nodes) {
float links_in_number, sum_x, sum_y;
Iterator<Article> it;
// get sum x and sum y for each node in nodes
sum_x = 0.f;
sum_y = 0.f;
it = nodes.values().iterator();
while (it.hasNext()) {
Article node = it.next();
sum_x += node.x;
sum_y += node.y;
}
// Normalize x and y values via sum_x and sum_y
it = nodes.values().iterator();
while (it.hasNext()) {
Article node = it.next();
node.x = node.x / sum_x;
node.y = node.y / sum_y;
}
}
*/
public static void NormalizeNewXNewY (Map<Integer, Article> nodes) {
float links_in_number, sum_x, sum_y;
Iterator<Article> it;
// get sum x and sum y for each node in nodes
sum_x = 0.f;
sum_y = 0.f;
it = nodes.values().iterator();
while (it.hasNext()) {
Article node = it.next();
sum_x += node.x_new;
sum_y += node.y_new;
}
// Normalize x_new and y_new values via sum_x and sum_y
it = nodes.values().iterator();
while (it.hasNext()) {
Article node = it.next();
node.x_new = node.x_new / sum_x;
node.y_new = node.y_new / sum_y;
}
}
/*
public void UpdateXY() {
x = x_new;
y = y_new;
}*/
// get x and y total change (error)
public float[] UpdateXY(Map<Integer, Article> nodes) {
float[] total_error = new float[2];
total_error[0] = 0f; // x total incrementation
total_error[1] = 0f; // y
Iterator<Article> it = nodes.values().iterator();
while (it.hasNext()) {
Article node = it.next();
total_error[0] += Math.abs(node.x - node.x_new);
total_error[1] += Math.abs(node.y - node.y_new);
node.x = node.x_new;
node.y = node.y_new;
}
return total_error;
}
static final Comparator<Article> X_ORDER = new Comparator<Article>() {
public int compare(Article n1, Article n2) {
if (n1.x > n2.x)
return -1;
return 1;
}
};
static final Comparator<Article> Y_ORDER = new Comparator<Article>() {
public int compare(Article n1, Article n2) {
if (n1.y > n2.y)
return -1;
return 1;
}
};
/** Gets ID array of articles from the 'map_title_article' by 'titles' of
* articles.
*
* @param titles titles of the articles
* @param map_title_article map from title of the article to the article
*
* @return empty array if titles are absent in the 'map_title_article'
*/
public static int[] getIdExistedInMap (Set<String> titles, Map<String, Article> map_title_article) {
if(0 == titles.size())
return NULL_INTEGER_ARRAY;
// counts number of titles presented in map_title_article;
int size = 0;
for(String t:titles) {
if(map_title_article.containsKey(t))
size ++;
}
if(0 == size)
return NULL_INTEGER_ARRAY;
int[] result = new int [size];
size = 0;
for(String t:titles) {
if(map_title_article.containsKey(t))
result[size ++] = map_title_article.get(t).page_id;
}
return result;
}
/** Creates an article with ->id_categories[]. If there are problems then
* returns null.
*
* @param title title of the article to be created
* @param id ID of the article to be created
*/
public static Article createArticleWithCategories (SessionHolder session, String title, int id) {
Article a = null;
// skips titles with spaces
if(session.skipTitle(title) || id < 0) {
if (null != session.dump) {
session.dump.file.PrintNL(
String.format("Removed:%-20s It contains skipped characters (e.g. spaces/underscores).",
title));
session.dump.file.Flush();
}
return null;
}
List<String> titles_level_1_cats = new ArrayList<String>();
String black_category = session.category_black_list.inBlackList(id, titles_level_1_cats, session.source_article_id);
if (null == black_category) {
a = new Article();
a.page_id = id;
a.page_title = title;
a.id_categories = CategoryBlackList.getFirstLevelCategoriesID (session, id);
} else {
if (null != session.dump) {
// + " id:" + id +
session.dump.file.PrintNL(
String.format("Removed:%-20s steps:%3d black-list category:%s",
title, session.category_black_list.getPassedSteps(), black_category));
//session.dump.file.PrintNL( "Removed:" + title + " steps:" + session.category_black_list.steps + " black-list category:" + black_category);
session.dump.file.Flush();
}
}
return a;
}
/** Select randomly n_limit elements from source
*/
/*public static Article[] getRandNodeArray(Article[] source, int n_limit) {
int i, counter;
if (null == source || 0 == n_limit)
return null;
if (n_limit < 0 || n_limit >= source.length)
return source;
boolean[] b_rand = RandShuffle.getRandArray (n_limit, source.length);
Article[] result_nodes = new Article[n_limit];
counter = 0;
for (i=0; i<b_rand.length; i++) {
if (b_rand[i]) {
result_nodes[counter ++] = source[i];
}
}
return result_nodes;
}*/
}