/* * CategorylinksTest.java * JUnit based test */ package wikokit.base.wikipedia.sql; //import wikipedia.kleinberg.SessionHolder; import wikokit.base.wikipedia.sql.PageNamespace; import wikokit.base.wikipedia.sql.Categorylinks; import wikokit.base.wikipedia.sql.PageTableBase; import wikokit.base.wikipedia.sql.Connect; import wikokit.base.wikipedia.language.Encodings; import wikokit.base.wikipedia.language.LanguageType; import junit.framework.*; import java.sql.*; import java.util.*; public class CategorylinksTest extends TestCase { public Connect connect, connect_ru, connect_simple; static String cat1, art1, subcategory1; static int cat1_id, art1_id, subcategory1_id; public CategorylinksTest(String testName) { super(testName); } protected void setUp() throws Exception { connect_simple = new Connect(); connect_simple.Open(Connect.WP_HOST,Connect.WP_SIMPLE_DB, Connect.WP_USER, Connect.WP_PASS, LanguageType.simple); connect = new Connect(); connect.Open(Connect.WP_HOST, Connect.WP_DB, Connect.WP_USER, Connect.WP_PASS, LanguageType.en); connect_ru = new Connect(); connect_ru.Open(Connect.WP_RU_HOST,Connect.WP_RU_DB,Connect.WP_RU_USER,Connect.WP_RU_PASS, LanguageType.ru); // simple WP cat1 = connect_simple.enc.EncodeFromJava("Folklore"); art1 = connect_simple.enc.EncodeFromJava("Ghost_light"); subcategory1 = connect_simple.enc.EncodeFromJava("Superstitions"); cat1_id = PageTableBase.getIDByTitleNamespace(connect_simple, cat1, PageNamespace.CATEGORY); // 41712 art1_id = PageTableBase.getIDByTitleNamespace(connect_simple, art1, PageNamespace.MAIN); // 50387 subcategory1_id = PageTableBase.getIDByTitleNamespace(connect_simple, subcategory1, PageNamespace.CATEGORY); } protected void tearDown() throws Exception { connect_simple.Close(); connect.Close(); connect_ru.Close(); } public static Test suite() { TestSuite suite = new TestSuite(CategorylinksTest.class); return suite; } /** Assert (The table categorylinks should not be empty.) */ public void testCountCategoryLinksEn () { int links = Categorylinks.countCategoryLinks (connect); assertTrue (links >= 542368); // 542368 } public void testCountCategoryLinksRu () { int links_ru = Categorylinks.countCategoryLinks (connect_ru); assertTrue (links_ru >= 218694); // 218694 // 37000 } /** Assert: category "Folklore" has the article "Ghost_light" in Simple WP. */ public void testGetArticlesIDSubcategoryIDByCategoryTitle_simple() { System.out.println("testGetArticlesIDSubcategoryIDByCategoryTitle_simple"); String category_title = "Folklore"; List<Integer> res = Categorylinks.getArticlesIDSubcategoryIDByCategoryTitle(connect_simple, category_title); assertTrue(null != res); assertTrue( 0 < res.size()); boolean b_contains_article = false; boolean b_contains_subcategory = false; for(int id:res) { String s = PageTableBase.getTitleByID(connect_simple, id); if(s.equalsIgnoreCase(art1)) { b_contains_article = true; } if(s.equalsIgnoreCase(subcategory1)) { b_contains_subcategory = true; } } assertTrue(b_contains_article); assertTrue(b_contains_subcategory); } /** Assert: category "Астрономия" has the article "Спутник" in Russian WP. */ public void testGetArticlesIDSubcategoryIDByCategoryTitle_ru() { System.out.println("testGetArticlesIDSubcategoryIDByCategoryTitle_ru"); String article_title = connect_ru.enc.EncodeFromJava("Спутник"); String category_title = connect_ru.enc.EncodeFromJava("Астрономия"); List<Integer> res = Categorylinks.getArticlesIDSubcategoryIDByCategoryTitle(connect_ru, category_title); assertTrue(null != res); assertTrue( 0 < res.size()); boolean b = false; for(int id:res) { String s = PageTableBase.getTitleByID(connect_ru, id); if(s.equalsIgnoreCase(article_title)) { b = true; break; } } assertTrue(b); } /** Test of GetCategoryTitleByArticleID method, of class wikipedia.Categorylinks. * SQL: SELECT cl_to FROM categorylinks WHERE cl_from = 14946; // cur_id of "Контрабас" (page_id=12097) * * Table categorylinks * cl_from cl_to SQL * 12097 Питание SELECT cl_to FROM categorylinks WHERE cl_from = 12097; * 14946 Смычковые_инструменты SELECT cl_to FROM categorylinks WHERE cl_from = 14946; * 22614 Поэты_Азербайджана | Cleanup SELECT cl_to FROM categorylinks WHERE cl_from = 22614; * * Table page * page_id page_title SQL * 12097 Контрабас SELECT page_id FROM page WHERE page_title = "Контрабас"; * 22614 Смычковые_инструменты SELECT page_id FROM page WHERE page_title = "Смычковые_инструменты"; */ public void testGetCategory_ru() { System.out.println("testGetCategory_ru"); int page_id, i, j; String[] page_title = {"Контрабас", "1917_год", "Литература"}; String[][] should_be_category = {{"Контрабас", "Незавершённые_статьи_о_музыке"}, {"1917_год"}, {"Литература", "Незавершённые_статьи_о_литературе"}}; int categories_max_steps = 99; Encodings e = connect_ru.enc; for(i=0; i<page_title.length; i++) { page_id = connect_ru.page_table.getIDByTitle(connect_ru, e.EncodeFromJava(page_title[i])); //String latin1 = Encodings.UTF8ToLatin1(page_title[i]); //page_id = connect_ru.page_table.GetIDByTitle(connect_ru, latin1); String[] categories = Categorylinks.GetCategoryTitleByArticleID(connect_ru, page_id); assertFalse (null == categories); for (j=0; j<categories.length; j++) { assert(0==categories[j].compareTo (should_be_category[i][j])); } } // english //Directed_acyclic_graph // session.connect = connect; } /* public void testgetFirstLevel_ru() { System.out.println("testgetFirstLevel_ru"); int n_limit, page_id; String page_title, categories[], result, latin1, category, category2; List<Integer> first_level_categories; Encodings e = connect.enc; // Russian // "Домра" has categories: // "Щипковые музыкальные инструменты" // "Музыкальные инструменты народов России" List<String> domra_categories = new ArrayList<String>(); domra_categories.add( e.EncodeFromJava("Музыкальные_инструменты_народов_России")); domra_categories.add( e.EncodeFromJava("Щипковые_музыкальные_инструменты") ); page_title = e.EncodeFromJava("Домра"); page_id = PageTableBase.getIDByTitle(connect_ru, page_title); first_level_categories = Categorylinks.getFirstLevel(connect_ru, page_id); assertTrue(2 == first_level_categories.size()); category = PageTableBase.getTitleByID(connect_ru, first_level_categories.get(0)); assertTrue(domra_categories.contains(category)); category = PageTableBase.getTitleByID(connect_ru, first_level_categories.get(1)); assertTrue(domra_categories.contains(category)); }*/ }