/* * CreateCategoryArticleGraph.java * * Copyright (c) 2005 Andrew Krizhanovsky /aka at mail.iias.spb.su/ * Distributed under GNU Public License. */ package wikipedia.clustering; import wikipedia.kleinberg.*; import java.util.*; /** * Class Description * */ public class CreateCategoryArticleGraph { public Map<Integer, Article> articles; public Map<Integer, Category> categories; private int [] category_id; private String [] category_title; private int [] article_id; private String [] article_title; private int [][] edge_category2category; private int [][] edge_category2article; /** Creates a new instance of CreateCategoryArticleGraph */ public CreateCategoryArticleGraph() { init(); } /** Test tree with categories and nodes 4 category started from C_ 5 articles started from A_ /--- C_All id=1------------------ / | | \ C_Religious id=2 | C_Science id=3 C_Art id=4-. | \ | / | \ | \ | A_Linux id=11 | A_Smartphone id=13 | | | | A_God id=10 A_Palm id=12 A_Tolstoj id=14 */ public void init () { int [] category_id_local = {1, 2, 3, 4}; String [] category_title_local = {"All", "Religious", "Science", "Art"}; category_id = category_id_local; category_title = category_title_local; int [] article_id_local = {10, 11, 12, 13, 14}; String [] article_title_local = {"God", "Linux", "Palm", "Smartphone", "Tolstoj"}; article_id = article_id_local; article_title = article_title_local; int [][] edge_category2category_local = { {1, 2}, {1,3}, {1,4} }; int [][] edge_category2article_local = { {1, 11}, {2,10}, {2,11}, {3,11}, {3,12}, {3, 13}, {4,13}, {4,14} }; edge_category2category = edge_category2category_local; edge_category2article = edge_category2article_local; setupArticlesAndCategories(); } /** Test tree with categories and nodes 4 articles started from A_ 6 category started from C_(W), where W - is the cluster(category) weight c.weight = 1 + n_article -W- - the weight of the clusters to be merged Merge 0: (initial) 6 clusters: (1)C8-2-C9(1) | 3 3 |\ 3 X 3 3 |/ \ | \ (2)C0-4-(2)C1-4-C2(2) C3(2) | | | | A10 A11 A12 A13 Merge 1: (merges clusters with minimal result weight): 5 clusters: C8C9(2) | |\ 4 4 4 | | \ (2)C0-4-(2)C1-4-C2(2) C3(2) | | | | A10 A11 A12 A13 Merge 2: 3 clusters: C8C9C3A13(4) or C8C9C1A11(4) /| /|\ 8 6 6 6 .__6__. / | / | \ C0A10C1A11(4)-6-C2A12(2) C0A10(2)-6-C2A12(2) C3A13(2) Merge 3: \|/ */ public void init2 () { int [] category_id_local = { 8, 9, 0, 1, 2, 3}; String [] category_title_local = {"C8", "C9", "C0", "C1", "C2", "C3"}; category_id = category_id_local; category_title = category_title_local; int [] article_id_local = { 10, 11, 12, 13}; String [] article_title_local = {"A10", "A11", "A12", "A13"}; article_id = article_id_local; article_title = article_title_local; int [][] edge_category2category_local = { {8, 9}, {8, 1}, {8,2}, {9, 1}, {9,2}, {9,3}, {1, 0}, {1,2} }; int [][] edge_category2article_local = { {0, 10}, {1,11}, {2,12}, {3,13} }; edge_category2category = edge_category2category_local; edge_category2article = edge_category2article_local; setupArticlesAndCategories(); } public void setupArticlesAndCategories() { int i, s; Iterator it; categories = new HashMap<Integer, Category>(); for (i=0; i<category_id.length; i++) { Category c = new Category(); c.page_id = category_id [i]; c.page_title = category_title [i]; categories.put(c.page_id, c); } articles = new HashMap<Integer, Article>(); for (i=0; i<article_id.length; i++) { Article a = new Article(); a.page_id = article_id [i]; a.page_title = article_title [i]; articles.put(a.page_id, a); } // fill id_categories of articles { Map<Integer,List<Integer>> m = new HashMap<Integer,List<Integer>>(); // 1. fill m for (i=0; i<edge_category2article.length; i++) { s = edge_category2article[i][1]; if(!m.containsKey(s)) m.put(s, new ArrayList<Integer>()); m.get(s).add(edge_category2article[i][0]); } // 2. copy m to a.id_categories for(it = m.keySet().iterator(); it.hasNext();) { Integer id = (Integer)it.next(); List<Integer> l = m.get(id); Article a = articles.get(id); a.id_categories = new int[l.size()]; for(i=0; i<l.size(); i++) { a.id_categories[i] = l.get(i); } } } // fill links_in and links_out of categories Map<Integer,List<Integer>> m_out = new HashMap<Integer,List<Integer>>(); Map<Integer,List<Integer>> m_in = new HashMap<Integer,List<Integer>>(); for (i=0; i<edge_category2category.length; i++) { //Category c; //c = categories.get( edge_category2category[i][0] ); // source category //c . links_out.add( edge_category2category[i][1] ); s = edge_category2category[i][0]; if(!m_out.containsKey(s)) m_out.put(s, new ArrayList<Integer>()); m_out.get(s).add(edge_category2category[i][1]); //c = categories.get( edge_category2category[i][1] ); // destination category //c . links_in.add( edge_category2category[i][0] ); s = edge_category2category[i][1]; if(!m_in.containsKey(s)) m_in.put(s, new ArrayList<Integer>()); m_in.get(s).add(edge_category2category[i][0]); } for(it = m_out.keySet().iterator(); it.hasNext();) { Integer id = (Integer)it.next(); List<Integer> l = m_out.get(id); Category c = categories.get(id); c.links_out = new int[l.size()]; for(i=0; i<l.size(); i++) { c.links_out[i] = l.get(i); } } for(it = m_in.keySet().iterator(); it.hasNext();) { Integer id = (Integer)it.next(); List<Integer> l = m_in.get(id); Category c = categories.get(id); c.links_in = new int[l.size()]; for(i=0; i<l.size(); i++) { c.links_in[i] = l.get(i); } } } }