package wikipedia.sql; import wikipedia.language.Encodings; import wikipedia.kleinberg.*; import wikipedia.data.ArticleIdAndTitle; import wikipedia.util.*; import junit.framework.*; import java.util.*; public class LinksTest extends TestCase { public Connect connect, connect_ru; Links links; Article[] source_nodes, t1_tredirect_nodes, redirect_nodes2, z2_array, z_redirect_z2, y_redirect_y2, z_redirect_z2_y_redirect_y2; SessionHolder session; int categories_max_steps; static String t1, t_redirect, t2, tr1, tr2; static int t1_id, t_redirect_id, t2_id; static String s1, s_redirect, s2; static int s1_id, s_redirect_id, s2_id; static String z1, z_redirect, z2; static int z1_id, z_redirect_id, z2_id; static String y1, y_redirect, y2; static int y1_id, y_redirect_id, y2_id; static String title_from, title_to; static int id_from, id_to; static Article[] a1_to; private long t_start, t_end; private float t_work; public List<String> ru_local_blacklist; private final String[] black_array_ru = {"Персоналии_по_алфавиту"}; // m_out - local map<title of article, list of titles links_out> // m_in - local map<title of article, list of titles links_in> private Map<String,Set<String>> m_out = new HashMap<String,Set<String>>(); private Map<String,Set<String>> m_in = new HashMap<String,Set<String>>(); public LinksTest(String testName) { super(testName); } protected void setUp() throws java.lang.Exception { connect = new Connect(); //connect.Open("localhost", "enwiki?useUnicode=true&characterEncoding=UTF-8&autoReconnect=true&useUnbufferedInput=false", "javawiki", ""); connect.Open(Connect.WP_HOST, Connect.WP_DB, Connect.WP_USER, Connect.WP_PASS); connect_ru = new Connect(); //connect_ru.Open("localhost", "ruwiki?useUnicode=false&characterEncoding=ISO8859_1&autoReconnect=true&useUnbufferedInput=false", "javawiki", ""); //Java:MySQL ISO8859_1:latin1 connect_ru.Open(Connect.WP_RU_HOST, Connect.WP_RU_DB, Connect.WP_RU_USER, Connect.WP_RU_PASS); tr1 = connect.enc.EncodeFromJava("Трансформеры"); tr2 = connect.enc.EncodeFromJava("Робот_(значения)"); source_nodes = new Article[2]; source_nodes[0] = new Article(); source_nodes[1] = new Article(); source_nodes[0].page_title = tr1; source_nodes[1].page_title = tr2; source_nodes[0].page_id = PageTable.getIDByTitleNamespace(connect_ru, tr1, PageNamespace.MAIN); //18991; source_nodes[1].page_id = PageTable.getIDByTitleNamespace(connect_ru, tr2, PageNamespace.MAIN);//22233; t1 = connect.enc.EncodeFromJava("Джемини"); t_redirect = connect.enc.EncodeFromJava("MIT"); t2 = connect.enc.EncodeFromJava("Массачусетсский_технологический_институт"); t1_id = PageTable.getIDByTitleNamespace(connect_ru, t1, PageNamespace.MAIN); t_redirect_id = PageTable.getIDByTitleNamespace(connect_ru, t_redirect, PageNamespace.MAIN); t2_id = PageTable.getIDByTitleNamespace(connect_ru, t2, PageNamespace.MAIN); // Джемини -> MIT -> Массачусетсский технологический институт t1_tredirect_nodes = new Article[2]; t1_tredirect_nodes[0] = new Article(); t1_tredirect_nodes[0].page_title = t1; t1_tredirect_nodes[0].page_id = t1_id; t1_tredirect_nodes[1] = new Article(); t1_tredirect_nodes[1].page_title = t_redirect; t1_tredirect_nodes[1].page_id = t_redirect_id; s1 = connect.enc.EncodeFromJava("Польская_Википедия"); s_redirect = connect.enc.EncodeFromJava("Бот_(программа)"); s2 = connect.enc.EncodeFromJava("Робот_(программа)"); s1_id = PageTable.getIDByTitleNamespace(connect_ru, s1, PageNamespace.MAIN); s_redirect_id = PageTable.getIDByTitleNamespace(connect_ru, s_redirect, PageNamespace.MAIN); s2_id = PageTable.getIDByTitleNamespace(connect_ru, s2, PageNamespace.MAIN); // rare words: // Линеал z1 -> ЯПФ z_redirect -> Ярусно-параллельная_форма_графа z2 // Мультивселенная y1 -> Кот_Шредингера y_redirect -> Кот_Шрёдингера y2 z1 = connect.enc.EncodeFromJava("Линеал"); z_redirect = connect.enc.EncodeFromJava("ЯПФ"); z2 = connect.enc.EncodeFromJava("Ярусно-параллельная_форма_графа"); z1_id = PageTable.getIDByTitleNamespace(connect_ru, z1, PageNamespace.MAIN); z_redirect_id = PageTable.getIDByTitleNamespace(connect_ru, z_redirect, PageNamespace.MAIN); z2_id = PageTable.getIDByTitleNamespace(connect_ru, z2, PageNamespace.MAIN); y1 = connect.enc.EncodeFromJava("Мультивселенная"); y_redirect = connect.enc.EncodeFromJava("Кот_Шредингера"); y2 = connect.enc.EncodeFromJava("Кот_Шрёдингера"); y1_id = PageTable.getIDByTitleNamespace(connect_ru, y1, PageNamespace.MAIN); y_redirect_id = PageTable.getIDByTitleNamespace(connect_ru, y_redirect, PageNamespace.MAIN); y2_id = PageTable.getIDByTitleNamespace(connect_ru, y2, PageNamespace.MAIN); // z2_array z2_array = new Article[1]; z2_array[0] = new Article(); z2_array[0].page_id = z2_id; z2_array[0].page_title = z2; // z_redirect_z2, y_redirect_y2, z2_y2 z_redirect_z2 = new Article[2]; z_redirect_z2[0] = new Article(); z_redirect_z2[0].page_id = z_redirect_id; z_redirect_z2[0].page_title = z_redirect; z_redirect_z2[1] = new Article(); z_redirect_z2[1].page_id = z2_id; z_redirect_z2[1].page_title = z2; y_redirect_y2 = new Article[2]; y_redirect_y2[0] = new Article(); y_redirect_y2[0].page_id = y_redirect_id; y_redirect_y2[0].page_title = y_redirect; y_redirect_y2[1] = new Article(); y_redirect_y2[1].page_id = y2_id; y_redirect_y2[1].page_title = y2; z_redirect_z2_y_redirect_y2 = new Article[4]; z_redirect_z2_y_redirect_y2[0] = new Article(); z_redirect_z2_y_redirect_y2[0].page_id = z_redirect_id; z_redirect_z2_y_redirect_y2[0].page_title = z_redirect; z_redirect_z2_y_redirect_y2[1] = new Article(); z_redirect_z2_y_redirect_y2[1].page_id = z2_id; z_redirect_z2_y_redirect_y2[1].page_title = z2; z_redirect_z2_y_redirect_y2[2] = new Article(); z_redirect_z2_y_redirect_y2[2].page_id = y_redirect_id; z_redirect_z2_y_redirect_y2[2].page_title = y_redirect; z_redirect_z2_y_redirect_y2[3] = new Article(); z_redirect_z2_y_redirect_y2[3].page_id = y2_id; z_redirect_z2_y_redirect_y2[3].page_title = y2; session = new SessionHolder(); session.initObjects(); categories_max_steps = 99; // a1_to = Робот // Трансформеры -> Робот session.connect = connect_ru; title_from = session.connect.enc.EncodeFromJava("Трансформер"); // Transformers (toyline) title_to = session.connect.enc.EncodeFromJava("Робот"); // Robot id_from = PageTable.getIDByTitleNamespace(connect_ru, title_from, PageNamespace.MAIN); id_to = PageTable.getIDByTitleNamespace(connect_ru, title_to, PageNamespace.MAIN); a1_to = new Article[1]; a1_to[0] = new Article(); a1_to[0].page_id = id_to; a1_to[0].page_title = title_to; ru_local_blacklist = new ArrayList<String>(); for(int i=0; i<black_array_ru.length; i++) { ru_local_blacklist.add(session.connect.enc.EncodeFromJava(black_array_ru[i])); } } protected void tearDown() throws java.lang.Exception { connect.Close(); connect_ru.Close(); } public static junit.framework.Test suite() { junit.framework.TestSuite suite = new junit.framework.TestSuite(LinksTest.class); return suite; } public void testGetLinksSQL_ru() { System.out.println("testGetLinksSQL_ru"); session.Init(connect_ru, null, categories_max_steps); session.skipTitlesWithSpaces(false); session.randomPages(false); String title_from = "Трансформеры"; // Transformers (toyline) String title_to = "Робот"; // Robot title_from = session.connect.enc.EncodeFromJava(title_from); title_to = session.connect.enc.EncodeFromJava(title_to); int id_to = PageTable.getIDByTitleNamespace(connect_ru, title_to, PageNamespace.MAIN); String str_sql_count_size, str_sql; str_sql_count_size = "SELECT COUNT(pl_from) AS size FROM pagelinks WHERE pl_title='Gettext' AND pl_namespace = 0"; str_sql = "SELECT pl_from FROM pagelinks WHERE pl_title='Gettext' AND pl_namespace = 0"; int n_limit = 2; Article[] result = Links.getLinksSQL(session, str_sql_count_size, str_sql, n_limit); assertTrue(result.length <= 2); } /** test that redirects of articles in 'map_id_article_exist' is updated * in createArticlesByIdAndTitleTo() */ public void testCreateArticlesByIdAndTitle_ru() { System.out.println("testCreateArticlesByIdAndTitle_ru"); String x1, x_redirect, x2; x1 = "cat"; x_redirect = "redirect_to_dog"; x2 = "dog"; int x1_id, x_redirect_id, x2_id; x1_id = 101; x_redirect_id = -102; x2_id = 103; Article[] article_exist = new Article[1]; article_exist[0] = new Article(); article_exist[0].page_title = x2; article_exist[0].page_id = x2_id; Map<Integer, Article> map_id_article_exist = Article.createMapIdToArticleWithoutRedirects (article_exist); // x1 -> x_redirect -> x2 ArticleIdAndTitle[] aid_array = new ArticleIdAndTitle[2]; aid_array[0] = new ArticleIdAndTitle(x_redirect_id, x_redirect); aid_array[1] = new ArticleIdAndTitle(x2_id, x2); Links.addTitlesToMaps(x1, x_redirect, m_out, m_in); Links.addTitlesToMaps(x_redirect, x2, m_out, m_in); assertEquals(0, map_id_article_exist.get(x2_id).redirect.size()); List<Article> result = Links.createArticlesByIdAndTitle( session, aid_array, map_id_article_exist, -1, m_out, m_in); // result = aid_array[x_redirect, x2] - map_id_article_exist{x2} = x_redirect assertEquals(1, result.size()); assertEquals(x_redirect, result.get(0).page_title); // test that redirects of articles in 'map_id_article_exist' is updated // i.e. test copy: //assertEquals(1, map_id_article_exist.get(x2_id).redirect.size()); //assertEquals(x_redirect, map_id_article_exist.get(x2_id).redirect.get(0).title); } public void testGetFromByTitleTo_ArticleIdAndTitle_ru() { System.out.println("getFromByTitleTo_ArticleIdAndTitle_ru"); session.Init(connect_ru, null, categories_max_steps); session.skipTitlesWithSpaces(true); session.randomPages(false); // a1_to = Робот // Трансформеры -> Робот ArticleIdAndTitle[] to = new ArticleIdAndTitle[1]; to[0] = new ArticleIdAndTitle(); to[0].title = title_to; to[0].id = id_to; //int root_set_size = -1; int n_limit = 4; ArticleIdAndTitle[] aid = Links.getFromByTitleTo(session, title_to, PageNamespace.MAIN, n_limit); assertTrue( null != aid ); assertEquals(4, aid.length); aid = Links.getFromByTitleTo(session, title_to, PageNamespace.MAIN, -1); assertTrue(aid.length > 4); assertTrue(ArticleIdAndTitle.getTitles(aid).contains(title_from)); } // test Qur'an public void testGetFromByTitleTo_apostrophe_en() { System.out.println("getFromByTitleTo_apostrophe_en"); session.Init(connect, null, categories_max_steps); session.skipTitlesWithSpaces(true); session.randomPages(false); String title = connect.enc.EncodeFromJava("Qur'an"); int id = PageTable.getIDByTitleNamespace(connect, title, PageNamespace.MAIN); ArticleIdAndTitle[] to = new ArticleIdAndTitle[1]; to[0] = new ArticleIdAndTitle(); to[0].title = title; to[0].id = id; int n_limit = 4; ArticleIdAndTitle[] aid = Links.getFromByTitleTo(session, title, PageNamespace.MAIN, n_limit); assertTrue( null != aid ); assertEquals(4, aid.length); } public void testGetFromByTitleTo_ArticleIdAndTitle_check_randomness_todo() { System.out.println("getFromByTitleTo_ArticleIdAndTitle_check_randomness_todo"); // todo // ... fail("todo"); } /** * Test of GetLFromByLTo method, of class wikipedia.Links. */ public void testGetLFromByLTo_ru() { System.out.println("getLFromByLTo_ru"); // page_id page_title // 10484 Робот // new Mediawiki 1.5 // page.page_title === pagelinks.pl_title // SELECT page_title FROM page WHERE page_id=10484 AND page_namespace = 0; // SELECT pl_from FROM pagelinks WHERE pl_title IN (SELECT page_title FROM page WHERE page_id=10484 AND page_namespace = 0) AND pl_namespace = 0; // old 1.4 // SELECT l_from FROM links WHERE l_to=10484; // SELECT page_id FROM cur WHERE page_namespace = 0 AND page_id IN (SELECT l_from FROM links WHERE l_to=10484); // // OUT: 22233, 18991 6.50 sec // id Трансформеры -> id Робот // id Робот_(значения)-> id Робот session.Init(connect_ru, null, categories_max_steps); session.skipTitlesWithSpaces(true); session.randomPages(false); // a1_to = Робот // Трансформеры -> Робот //int root_set_size = -1; int inc = -1; // 4; int base_set_size = -1; // 1; m_out.clear(); m_in.clear(); Article[] nodes = Links.getLFromByLTo(session, a1_to, inc, base_set_size, m_out, m_in); assertTrue(0 <= nodes.length); // 13 35 assertTrue(Article.ContainTitle(nodes, title_from)); Map<String, Article> m = Article.createMapTitleToArticleWithoutRedirects(nodes); for(Article n:nodes) { assertTrue(m.containsKey(n.page_title)); } } public void testGetLFromByLToNodes_ru() { System.out.println("GetLFromByLToNodes_ru"); session.Init(connect_ru, null, categories_max_steps); session.skipTitlesWithSpaces(false); session.randomPages(false); // SELECT pl_from FROM pagelinks WHERE pl_title IN (SELECT page_title FROM page WHERE page_id=10484 AND page_namespace = 0) AND pl_namespace = 0; // new 1.5 // SELECT page_title FROM page WHERE (page_id=18991 OR page_id=22233) AND page_namespace = 0; // out: Трансформеры, Р РѕР±РѕС‚_(значения) // // SELECT pl_from FROM pagelinks WHERE pl_title IN (SELECT page_title FROM page WHERE (page_id=18991 OR page_id=22233) AND page_namespace = 0) AND pl_namespace = 0; // speed up by spliting: // SELECT DISTINCT pl_from FROM pagelinks WHERE (pl_title='Трансформеры' OR pl_title='Р РѕР±РѕС‚_(значения)') AND pl_namespace = 0; // out: 25 rows in set (0.06 sec) // // old 1.4 // SELECT page_id FROM cur WHERE page_namespace = 0 AND page_id IN (SELECT l_from FROM links WHERE l_to IN (18991, 22233)); // // OUT: 16482, 10484 // // // title_from: title_to: // NNNNN Разряд_(персонаж_мультфильма) -> 18991 Трансформеры // 41606 Сайкил -> 18991 Трансформеры // NNNNN Робот -> 18991 Трансформеры // // 10484 Робот -> NNNNN Робот_(значения) // String title_from1 = session.connect.enc.EncodeFromJava("Разряд_(персонаж_мультфильма)"); String title_from2 = session.connect.enc.EncodeFromJava("Робот"); String title_from3 = session.connect.enc.EncodeFromJava("Сайкил"); String title_to1 = session.connect.enc.EncodeFromJava("Трансформеры"); // Transformers (toyline) String title_to2 = session.connect.enc.EncodeFromJava("Робот_(значения)"); // Robot (disambiguation) //String title_to2 = session.connect.enc.EncodeFromJava("Самолёт"); int id_from1 = PageTable.getIDByTitleNamespace(connect_ru, title_from1, PageNamespace.MAIN); int id_from2 = PageTable.getIDByTitleNamespace(connect_ru, title_from2, PageNamespace.MAIN); int id_from3 = PageTable.getIDByTitleNamespace(connect_ru, title_from3, PageNamespace.MAIN); int id_to1 = PageTable.getIDByTitleNamespace(connect_ru, title_to1, PageNamespace.MAIN); int id_to2 = PageTable.getIDByTitleNamespace(connect_ru, title_to2, PageNamespace.MAIN); Article[] n_to = new Article[2]; n_to[0] = new Article(); n_to[0].page_id = id_to1; n_to[0].page_title = title_to1; n_to[1] = new Article(); n_to[1].page_id = id_to2; n_to[1].page_title = title_to2; m_out.clear(); m_in.clear(); Article[] nodes = links.getLFromByLTo(session, n_to, -1, -1, m_out, m_in); assertTrue(27 <= nodes.length); // 27 31 // check that links to Robot contains three articles with id_from1, 2, and 3 assertTrue(Article.ContainID(nodes, id_from1)); assertTrue(Article.ContainID(nodes, id_from2)); assertTrue(Article.ContainID(nodes, id_from3)); } /** Checks correct treating of redirect pages for getLFromByLTo by * testing with rare words. */ public void testGetLFromByLToNodes_z_redirect_z2_ru() { System.out.println("testGetLFromByLTo_redirects_ru"); session.Init(connect_ru, null, categories_max_steps); session.skipTitlesWithSpaces(false); session.randomPages(false); // rare words // Линеал z1 -> ЯПФ z_redirect -> Ярусно-параллельная_форма_графа z2 // Мультивселенная y1 -> Кот_Шредингера y_redirect -> Кот_Шрёдингера y2 // Source: ЯПФ z_redirect -> Ярусно-параллельная_форма_графа z2 // Result: Линеал z1 -> Ярусно-параллельная_форма_графа z2 // z_redirect_z2, y_redirect_y2, z_redirect_z2_y_redirect_y2 m_out.clear(); m_in.clear(); assertEquals(0, z_redirect_z2[1].redirect.size()); // z_redirect_z2[1].page_title = z2; Article[] nodes = Links.getLFromByLTo(session, z_redirect_z2, -1, -1, m_out, m_in); assertEquals(1, z_redirect_z2[1].redirect.size()); assertEquals(z_redirect, z_redirect_z2[1].redirect.get(0).title); // checks nodes Article a1; Map<String, Article> from_nodes = Article.createMapTitleToArticleWithoutRedirects(nodes); assertTrue(from_nodes.containsKey(z1)); // + assertFalse(from_nodes.containsKey(z_redirect)); // - a1 = from_nodes.get(z1); assertTrue(a1 != null); assertEquals(0, a1.redirect.size()); // checks m_out and m_in assertTrue(m_in.containsKey(z2)); Set<String> s = m_in.get(z2); assertTrue(s.contains(z1)); assertFalse(s.contains(z_redirect)); assertFalse(m_out.containsKey(z_redirect)); assertFalse(m_in. containsKey(z_redirect)); assertTrue(m_out.containsKey(z1)); s = m_out.get(z1); assertTrue(s.contains(z2)); assertFalse(s.contains(z_redirect)); } /** Checks correct treating of redirect pages for getLFromByLTo by * testing with rare words. */ public void testGetLFromByLToNodes_z2_ru() { System.out.println("testGetLFromByLToNodes_z2_ru"); session.Init(connect_ru, null, categories_max_steps); session.skipTitlesWithSpaces(false); session.randomPages(false); // Source: ЯПФ z_redirect -> Ярусно-параллельная_форма_графа z2 // Result: Линеал z1 -> Ярусно-параллельная_форма_графа z2 // z2_array m_out.clear(); m_in.clear(); assertEquals(0, z2_array[0].redirect.size()); // z2_array[0].page_title = z2; Article[] nodes = Links.getLFromByLTo(session, z2_array, -1, -1, m_out, m_in); assertEquals(1, z2_array[0].redirect.size()); assertEquals(z_redirect, z2_array[0].redirect.get(0).title); // checks nodes Article a1; Map<String, Article> from_nodes = Article.createMapTitleToArticleWithoutRedirects(nodes); assertTrue(from_nodes.containsKey(z1)); // + assertFalse(from_nodes.containsKey(z_redirect)); // - a1 = from_nodes.get(z1); assertEquals(0, a1.redirect.size()); // checks m_out and m_in assertTrue(m_in.containsKey(z2)); Set<String> s = m_in.get(z2); assertTrue(s.contains(z1)); assertFalse(s.contains(z_redirect)); assertFalse(m_out.containsKey(z_redirect)); assertFalse(m_in. containsKey(z_redirect)); assertTrue(m_out.containsKey(z1)); s = m_out.get(z1); assertTrue(s.contains(z2)); assertFalse(s.contains(z_redirect)); } public void testGetLFromByLToNodes_y_redirect_y2_ru() { System.out.println("testGetLFromByLTo_redirects_ru"); session.Init(connect_ru, null, categories_max_steps); session.skipTitlesWithSpaces(false); session.randomPages(false); // Source: Кот_Шредингера y_redirect -> Кот_Шрёдингера y2 // Result: Мультивселенная y1 -> Кот_Шрёдингера y2 // z_redirect_z2, y_redirect_y2, z_redirect_z2_y_redirect_y2 m_out.clear(); m_in.clear(); Article[] nodes = Links.getLFromByLTo(session, y_redirect_y2, -1, -1, m_out, m_in); // checks nodes Map<String, Article> from_nodes = Article.createMapTitleToArticleWithoutRedirects(nodes); assertTrue(from_nodes.containsKey(y1)); // + assertFalse(from_nodes.containsKey(y_redirect)); // - Article a1 = from_nodes.get(y1); assertTrue(a1 != null); // y_redirect_y2 has redirect y_redirect assertTrue(y_redirect_y2[1].redirect.size() > 0); // checks m_out and m_in assertTrue(m_in.containsKey(y2)); Set<String> s = m_in.get(y2); assertTrue(s.contains(y1)); assertFalse(s.contains(y_redirect)); assertFalse(m_out.containsKey(y_redirect)); assertFalse(m_in. containsKey(y_redirect)); assertTrue(m_out.containsKey(y1)); s = m_out.get(y1); assertTrue(s.contains(y2)); assertFalse(s.contains(y_redirect)); } public void testGetLFromByLToNodes_z_redirect_z2_y_redirect_y2_ru() { System.out.println("testGetLFromByLTo_redirects_ru"); session.Init(connect_ru, null, categories_max_steps); session.skipTitlesWithSpaces(false); session.randomPages(false); // rare words // Линеал z1 -> ЯПФ z_redirect -> Ярусно-параллельная_форма_графа z2 // Мультивселенная y1 -> Кот_Шредингера y_redirect -> Кот_Шрёдингера y2 // Source: ЯПФ z_redirect -> Ярусно-параллельная_форма_графа z2 // Кот_Шредингера y_redirect -> Кот_Шрёдингера y2 // Result: Линеал z1 -> Ярусно-параллельная_форма_графа z2 // Мультивселенная y1 -> Кот_Шрёдингера y2 // z_redirect_z2, y_redirect_y2, z_redirect_z2_y_redirect_y2 m_out.clear(); m_in.clear(); Article[] nodes = Links.getLFromByLTo(session, z_redirect_z2_y_redirect_y2, -1, -1, m_out, m_in); // /////////////////////////// // test z_redirect_z2 // checks nodes Map<String, Article> from_nodes = Article.createMapTitleToArticleWithoutRedirects(nodes); assertTrue(from_nodes.containsKey(z1)); // + assertFalse(from_nodes.containsKey(z_redirect)); // - assertEquals(null, from_nodes.get(z2)); Article a_z2 = z_redirect_z2_y_redirect_y2[1]; assertTrue(a_z2.redirect.size() > 0); // checks m_out and m_in assertTrue(m_in.containsKey(z2)); Set<String> s = m_in.get(z2); assertTrue(s.contains(z1)); assertFalse(s.contains(z_redirect)); assertFalse(m_out.containsKey(z_redirect)); assertFalse(m_in. containsKey(z_redirect)); assertTrue(m_out.containsKey(z1)); s = m_out.get(z1); assertTrue(s.contains(z2)); assertFalse(s.contains(z_redirect)); // /////////////////////////// // test y_redirect_y2 // checks nodes from_nodes = Article.createMapTitleToArticleWithoutRedirects(nodes); assertTrue(from_nodes.containsKey(y1)); // + assertFalse(from_nodes.containsKey(y_redirect)); // - assertEquals(null, from_nodes.get(y2)); Article a_y2 = z_redirect_z2_y_redirect_y2[3]; assertTrue(a_y2.redirect.size() > 0); // checks m_out and m_in assertTrue(m_in.containsKey(y2)); s = m_in.get(y2); assertTrue(s.contains(y1)); assertFalse(s.contains(y_redirect)); assertFalse(m_out.containsKey(y_redirect)); assertFalse(m_in. containsKey(y_redirect)); assertTrue(m_out.containsKey(y1)); s = m_out.get(y1); assertTrue(s.contains(y2)); assertFalse(s.contains(y_redirect)); } // check filtering by category blacklist: // Given: // Article "Жуковский,_Николай_Егорович" has category "Персоналии_по_алфавиту" // Article "Жуковский,_Николай Егорович" -> Article "Самолёт" // To check: // filter "Жуковский,_Николай_Егорович" by category "Персоналии_по_алфавиту" // public void testGetLFromByLToNodes_blacklist_ru() { System.out.println("testGetLFromByLToNodes_blacklist_ru check filtering by category blacklist"); session.Init(connect_ru, null, categories_max_steps); session.skipTitlesWithSpaces(false); session.randomPages(false); String title_from = session.connect.enc.EncodeFromJava("Жуковский,_Николай_Егорович"); String title_to = session.connect.enc.EncodeFromJava("Самолёт"); int id_from = PageTable.getIDByTitleNamespace(connect_ru, title_from, PageNamespace.MAIN); int id_to = PageTable.getIDByTitleNamespace(connect_ru, title_to, PageNamespace.MAIN); Article[] n = new Article[1]; n[0] = new Article(); n[0].page_id = id_to; n[0].page_title = title_to; m_out.clear(); m_in.clear(); Article[] nodes1 = links.getLFromByLTo(session, n, -1, -1, m_out, m_in); assertTrue(Article.ContainID(nodes1, id_from)); m_out.clear(); m_in.clear(); //int n_limit = 1; //session.category_black_list.setMaxSteps(n_limit); session.Init(connect, ru_local_blacklist, categories_max_steps); Article[] nodes2 = links.getLFromByLTo(session, n, -1, -1, m_out, m_in); assertFalse(Article.ContainID(nodes2, id_from)); } /** String stupid concatenation: t_work=30.659 * String append by StringBuffer: t_work=0.112 *//* public void testGetTitleToByIDFrom_stringConcatenation_speedup() { System.out.println("testGetTitleToByIDFrom_stringConcatenation_speedup"); session.Init(connect_ru, null, categories_max_steps); session.skipTitlesWithSpaces(false); session.randomPages(false); int len = 100; int[] id_from = new int[len]; for(int i=0;i<len;i++) { id_from[i] = i; } t_start = System.currentTimeMillis(); String[] r = Links.GetTitleToByIDFrom(session, id_from, PageNamespace.MAIN); t_end = System.currentTimeMillis(); t_work = (t_end - t_start)/1000f; // in sec System.out.println("t_work="+t_work); }*/ public void testGetTitleToOneByIDFrom_ru() { System.out.println("testGetTitleToOneByIDFrom_ru"); String result; // Redirects: // t1 -> t_redirect -> t2 // Джемини -> MIT -> Массачусетсский_технологический_институт // 62186 -> -52141 -> 52137 session.Init(connect_ru, null, categories_max_steps); session.skipTitlesWithSpaces(false); session.randomPages(false); result = Links.getTitleToOneByIDFrom(session, t_redirect_id); assertEquals(result, t2); //return null, if ... (2) title should be skipped session.skipTitlesWithSpaces(true); result = Links.getTitleToOneByIDFrom(session, t_redirect_id); assertEquals(result, null); } public void testGetTitleToByIDFrom_ru() { System.out.println("testGetTitleToByIDFrom_ru"); // Трансформеры -> // Робот_(значения) -> // -> (page_id) 8110 8647 263 16482 10484 9856 10578 // (page_title) 2005 Анимация Internet Разряд_(персонаж_мультфильма) Робот Робот_(программа) Танец session.Init(connect_ru, null, categories_max_steps); session.skipTitlesWithSpaces(false); session.randomPages(false); String[] title_from={"Трансформеры", "Робот_(значения)"}; title_from[0] = session.connect.enc.EncodeFromJava(title_from[0]); title_from[1] = session.connect.enc.EncodeFromJava(title_from[1]); int[] id_from = new int[2]; id_from[0] = PageTable.getIDByTitleNamespace(connect_ru, title_from[0], PageNamespace.MAIN); id_from[1] = PageTable.getIDByTitleNamespace(connect_ru, title_from[1], PageNamespace.MAIN); String[] r = Links.getTitleToByIDFrom(session, id_from, PageNamespace.MAIN); assertTrue(27 <= r.length); // 17 int[] id_null=null; String[] r_empty = Links.getTitleToByIDFrom(session, id_null, PageNamespace.MAIN); assertTrue(0 == r_empty.length); int[] id_from2={0}; String[] r2 = Links.getTitleToByIDFrom(session, id_from2, PageNamespace.MAIN); assertTrue(0 == r2.length); } /** Checks omitting of articles' titles with spaces (underscores) */ public void testGetTitleToByIDFrom_ru_skipTitlesWithSpaces() { System.out.println("testGetTitleToByIDFrom_ru_skipTitlesWithSpaces"); // Трансформеры -> // Робот_(значения) -> // -> (page_id) 8110 8647 263 16482 10484 9856 10578 // (page_title) 2005 Анимация Internet Разряд_(персонаж_мультфильма) Робот Робот_(программа) Танец session.Init(connect_ru, null, categories_max_steps); session.randomPages(false); String[] title_from={"Трансформер", "Робот_(значения)"}; title_from[0] = session.connect.enc.EncodeFromJava(title_from[0]); title_from[1] = session.connect.enc.EncodeFromJava(title_from[1]); int[] id_from = new int[2]; id_from[0] = PageTable.getIDByTitleNamespace(connect_ru, title_from[0], PageNamespace.MAIN); id_from[1] = PageTable.getIDByTitleNamespace(connect_ru, title_from[1], PageNamespace.MAIN); session.skipTitlesWithSpaces(false); String[] r1_with_spaces = Links.getTitleToByIDFrom(session, id_from, PageNamespace.MAIN); List<String> l1_with_spaces = new ArrayList<String>(); // list l1 for(String s:r1_with_spaces) { l1_with_spaces.add(s);} String sr1 = session.connect.enc.EncodeFromJava("Робот_(программа)"); assertTrue(l1_with_spaces.contains(sr1)); //assertTrue(32 <= r.length); // 17 session.skipTitlesWithSpaces(true); String[] r2_without_spaces = Links.getTitleToByIDFrom(session, id_from, PageNamespace.MAIN); assertTrue(r1_with_spaces.length > r2_without_spaces.length); List<String> l2_without_spaces = new ArrayList<String>(); for(String s:r2_without_spaces) { l2_without_spaces.add(s);} assertFalse(l2_without_spaces.contains(sr1)); } //Article[] base_nodes2 = links.GetLToByLFrom(connect, root_nodes); public void testGetLToByLFrom_ru() { System.out.println("testGetLToByLFrom_ru"); // new 1.5 // SELECT pl_title FROM pagelinks WHERE pl_from IN (18991, 22233) AND pl_namespace = 0; // out: 17 rows in set (0.24 sec), e.g.: Бархан_(Soundwave), РљРѕРјРёРєСЃ // foreach pl_title: // PageTable p.GetIDByTitle(pl_title); // // old 1.4 // SELECT page_id FROM cur WHERE page_namespace = 0 AND page_id IN (SELECT l_to FROM links WHERE l_from IN (18991, 22233)); // OUT: 16482, 10484 // 18991 Трансформеры -> // 22233 Робот_(значения) -> // -> (page_id) 8110 8647 263 16482 10484 9856 10578 // (page_title) 2005 Анимация Internet Разряд_(персонаж_мультфильма) Робот Робот_(программа) Танец session.Init(connect_ru, null, categories_max_steps); session.skipTitlesWithSpaces(false); session.randomPages(false); m_out.clear(); m_in.clear(); Article[] nodes = links.getLToByLFrom(session, source_nodes, -1, m_out, m_in); assertTrue(18 <= nodes.length); // 11 18 21 List<Integer> id = new ArrayList<Integer>(nodes.length); List<String> title = new ArrayList<String> (nodes.length); for(Article a:nodes) { id. add(a.page_id); title.add(a.page_title); } assertFalse(id.contains(0)); // s1 -> s_redirect -> s2 // Польская_Википедия -> Бот_(программа) -> Робот_(программа) assertTrue(id.contains(s2_id)); // + targed redirect assertTrue(title.contains(s2)); assertFalse(id.contains(s_redirect_id)); // - redirect itself assertFalse(title.contains(s_redirect)); Article[] nodes2 = links.getLToByLFrom(session, source_nodes, 3, m_out, m_in); assertTrue(3 >= nodes2.length); Article[] nodes3 = links.getLToByLFrom(session, source_nodes, 0, m_out, m_in); assertTrue(1 == nodes3.length); } /** Checks correct treating of redirect pages, e.g.: * Redirect Джемини -> MIT -> Массачусетсский_технологический_институт * Redirect Польская_Википедия -> Бот_(программа) -> Робот_(программа) */ public void testGetLToByLFrom_ru_redirects() { System.out.println("testGetLToByLFrom_ru_redirects"); session.Init(connect_ru, null, categories_max_steps); session.skipTitlesWithSpaces(false); session.randomPages(false); m_out.clear(); m_in.clear(); Article[] nodes = links.getLToByLFrom(session, t1_tredirect_nodes, -1, m_out, m_in); assertTrue(0 <= nodes.length); // 11 25 // Redirects: t1 -> t_redirect -> t2 // Джемини -> MIT -> Массачусетсский_технологический_институт // checks nodes Article a2; Map<String, Article> t1_to_nodes = Article.createMapTitleToArticleWithoutRedirects(nodes); assertTrue(t1_to_nodes.containsKey(t2)); // + assertFalse(t1_to_nodes.containsKey(t_redirect)); // - a2 = t1_to_nodes.get(t2); assertTrue(a2.redirect.size() > 0); // checks m_out and m_in assertTrue(m_out.containsKey(t1)); Set<String> out1 = m_out.get(t1); assertTrue(out1.contains(t2)); assertFalse(out1.contains(t_redirect)); assertFalse(m_out.containsKey(t_redirect)); assertFalse(m_in. containsKey(t_redirect)); assertTrue(m_in.containsKey(t2)); Set<String> in2 = m_in.get(t2); assertTrue(in2.contains(t1)); assertFalse(in2.contains(t_redirect)); // Redirect "Бот_(программа)" -> "Робот_(программа)" // todo } /** Checks correct resolving of redirect pages, with skipSpaces enabled. */ public void testGetLToByLFrom_ru_redirects__with_skipSpaces() { System.out.println("testGetLToByLFrom_ru_redirects__with_skipSpaces"); session.Init(connect_ru, null, categories_max_steps); session.skipTitlesWithSpaces(true); session.randomPages(false); m_out.clear(); m_in.clear(); Article[] nodes = links.getLToByLFrom(session, t1_tredirect_nodes, -1, m_out, m_in); assertTrue(0 <= nodes.length); // 11 25 // Redirects: t1 -> t_redirect -> t2 // Джемини -> MIT -> Массачусетсский_технологический_институт // checks nodes Map<String, Article> t1_to_nodes = Article.createMapTitleToArticleWithoutRedirects(nodes); assertFalse(t1_to_nodes.containsKey(t2)); // - assertFalse(t1_to_nodes.containsKey(t_redirect)); // - assertTrue(session.removed_articles.hasTitle(t_redirect)); assertTrue(session.removed_articles.hasTitle(t2)); assertTrue(session.removed_articles.hasId(t_redirect_id)); assertTrue(session.removed_articles.hasId(t2_id)); // checks m_out and m_in assertTrue(m_out.containsKey(t1)); Set<String> out1 = m_out.get(t1); assertFalse(out1.contains(t2)); assertFalse(out1.contains(t_redirect)); assertFalse(m_out.containsKey(t_redirect)); assertFalse(m_in. containsKey(t_redirect)); assertFalse(m_in.containsKey(t2)); } /** Checks correct treating of redirect pages, when source node is redirect * itself: * Redirect MIT -> Массачусетсский технологический институт * Redirect "Бот_(программа)" -> "Робот_(программа)" */ public void testGetLToByLFrom_ru_redirects__source_node_todo() { System.out.println("testGetLToByLFrom_ru_redirects__source_node_todo"); fail("todo"); } /** Checks that redirect pages (linked to pages with categories in blacklist) * are in blacklist too, e.g.: * 1965 -> 1965_год, when blacklist has category: Века */ public void testGetLToByLFrom_redirects_with_BlackList_ru_todo() { System.out.println("testGetLToByLFrom_redirects_with_BlackList_ru_todo"); fail("todo"); } /** Check the Remark of GetLToByLFrom(): "Returns only articles which id are * absent in the blacklist." */ public void testGetLToByLFrom_with_BlackList_ru() { System.out.println("testGetLToByLFrom_with_BlackList_ru"); List<String> category_black_list_ru = new ArrayList<String>(); // Плектр (article) -> Домра (article) -> "Щипковые инструменты" (category) // "Домра" has category "Щипковые_музыкальные_инструменты" // 1. "Щипковые_музыкальные_инструменты" is not in the blacklist category_black_list_ru.add("Foo"); session.Init(connect_ru, category_black_list_ru, categories_max_steps); session.randomPages(false); Encodings e = session.connect.enc; String title = e.EncodeFromJava("Плектр"); // Plectrum, category:Щипковые_музыкальные_инструменты String title2 = e.EncodeFromJava("Домра"); // , category:Щипковые_музыкальные_инструменты String category_title = e.EncodeFromJava("Щипковые_музыкальные_инструменты"); Article[] source_nodes = new Article[1]; source_nodes[0] = new Article(); source_nodes[0].page_id = PageTable.getIDByTitle(connect_ru, title); source_nodes[0].page_title = title; m_out.clear(); m_in.clear(); Article[] nodes = links.getLToByLFrom(session, source_nodes, -1, m_out, m_in); if(0 <= nodes.length) { List<String> titles = new ArrayList<String>(nodes.length); for(Article a:nodes) { titles.add(a.page_title); } assertTrue(titles.contains(title2)); } // 2. "Щипковые_музыкальные_инструменты" is in the blacklist category_black_list_ru.add(category_title); //category_black_list_ru.add(latin1_music_instruments); session.Init(connect_ru, category_black_list_ru, categories_max_steps); session.randomPages(false); m_out.clear(); m_in.clear(); Article[] nodes2 = links.getLToByLFrom(session, source_nodes, -1, m_out, m_in); List<String> titles = new ArrayList<String>(nodes2.length); for(Article a:nodes2) { titles.add(a.page_title); } assertTrue(0 == titles.size() || !titles.contains(title2)); } /** Checks that "The article is omitted if ... (3) article id is in pl_from[]. */ public void testGetLToByLFrom_omit_pl_from_ru() { System.out.println("testGetLToByLFrom_omit_pl_from_ru"); List<String> category_black_list_ru = new ArrayList<String>(); session.Init(connect_ru, null, categories_max_steps); session.randomPages(false); Encodings e = session.connect.enc; String title = e.EncodeFromJava("Робот"); String title2 = e.EncodeFromJava("Андроид"); int root_set_size = 200; int increment = 1; Article[] nodes; { Article[] a1 = new Article[2]; a1[0] = new Article(); a1[0].page_id = PageTable.getIDByTitle(session.connect, title); a1[0].page_title = title; a1[1] = new Article(); a1[1].page_id = PageTable.getIDByTitle(session.connect, title2); a1[1].page_title = title2; Article[] root_nodes = a1; // 2.2 m_out.clear(); m_in.clear(); Article[] base_nodes2 = Links.getLToByLFrom(session, root_nodes, -1, m_out, m_in); nodes = base_nodes2; } assertTrue(0 <= nodes.length); List<String> titles = new ArrayList<String>(nodes.length); for(Article a:nodes) { titles.add(a.page_title); } assertFalse(titles.contains(title)); assertFalse(titles.contains(title2)); } /** Tests the case, when m_in or m_out contains articles which are absent * in 'map_title_article'. */ public void testGetAllLinksFromNodes_ru() { System.out.println("testGetAllLinksFromNodes_ru"); int categories_max_steps = 99; session.Init(connect_ru, null, categories_max_steps); session.skipTitlesWithSpaces(false); session.randomPages(false); Article t2_node = new Article(); t2_node.page_title = t2; t2_node.page_id = t2_id; // Redirects: t1 -> t_redirect -> t2 // Джемини -> MIT -> Массачусетсский_технологический_институт Map<String, Article> map_t1_t2_to_nodes = Article.createMapTitleToArticleWithoutRedirects(t1_tredirect_nodes); map_t1_t2_to_nodes.put(t2, t2_node); Links.addTitlesToMaps(t1, t_redirect, m_out, m_in); // oooops, it is absent in map_t1_t2_to_nodes Links.addTitlesToMaps(t1, t2, m_out, m_in); // yes, it is presented in map_t1_t2_to_nodes Links.getAllLinksFromNodes(session, map_t1_t2_to_nodes, new Article[0], m_out, m_in); // only t1 -> t2 should be treated // only t1 -> t_redirect should be skipped Article t1_node = t1_tredirect_nodes[0]; assertTrue (null != t1_node.links_out); assertEquals(1, t1_node.links_out.length); assertEquals(null, t1_node.links_in); assertEquals(null, t2_node.links_out); assertEquals(1, t2_node.links_in.length); // t_redirect has no links_out and links_in assertEquals(null, t1_tredirect_nodes[1].links_out); assertEquals(null, t1_tredirect_nodes[1].links_in); } /* // Let's count the number of links in the graph: Трансформеры -> Робот. // =1. public void testGetAllLinks_ru() { System.out.println("testGetAllLinks_ru"); int categories_max_steps = 99; session.Init(connect_ru, null, categories_max_steps); session.randomPages(false); //int increment = 1; //Article[] nodes = links.GetLFromByLTo(session, source_nodes, increment); //Article[] nodes = links.GetLToByLFrom(session, source_nodes); String s; s = session.connect.enc.EncodeFromJava("Трансформеры"); source_nodes[0].page_title = s; source_nodes[0].page_id = PageTable.getIDByTitleNamespace(connect_ru, s, PageNamespace.MAIN); s = session.connect.enc.EncodeFromJava("Робот"); source_nodes[1].page_title = s; source_nodes[1].page_id = PageTable.getIDByTitleNamespace(connect_ru, s, PageNamespace.MAIN); DCEL dcel = new DCEL(); Article node = new Article(); Map<Integer, Article> map_id_article = node.createMapIdToArticleWithoutRedirects (source_nodes); Map<String, Article> map_title_article = node.createMapTitleToArticleWithoutRedirects(source_nodes); int links_begin = dcel.CountLinksIn(map_id_article); Links.getAllLinks(session, map_title_article); // Трансформеры -> Робот assertTrue(1 <= source_nodes[0].links_out.length); // Трансформеры -> assertTrue(1 <= source_nodes[1].links_in. length); // -> Робот int links_end = DCEL.CountLinksIn(map_id_article); assertTrue(links_end >= 1 + links_begin); }*/ /** Example Russian 'domra' (id=749): * SELECT COUNT(*) FROM links WHERE l_from=749; * Out 10. public void testCountLinksFrom() { System.out.println("testCountLinksFrom"); int article_id = 749; int i = links.CountLinksFrom(connect_ru, article_id); assertTrue(10 <= i); }*/ /** Example Russian 'domra' (id=749): * SELECT COUNT(*) FROM links WHERE l_from=749; 6 * SELECT COUNT(*) FROM links WHERE l_to=749; 3 public void testCountLinks() { int article_id = 749; int count_l_from = links.CountLinks(connect_ru, "l_from", article_id); int count_l_to = links.CountLinks(connect_ru, "l_to", article_id); assertTrue(6 <= count_l_from); assertTrue(3 <= count_l_to); assertTrue(1 == 0); }*/ } /** Gets article's frequency in the table pagelinks: count(pl_from). * Example Russian 'domra' (id=749): * SELECT COUNT(*) as size FROM pagelinks WHERE pl_from=749; */ /*public static int CountLinksFrom(Connect connect, int article_id) { Statement s = null; ResultSet rs= null; int size = 0; String str_sql = null; try { s = connect.conn.createStatement(); str_sql = "SELECT COUNT(pl_from) AS size FROM pagelinks WHERE pl_from=" + article_id; s.executeQuery(str_sql); rs = s.getResultSet(); if (rs.next()) { size = rs.getInt("size"); } } catch(SQLException ex) { System.err.println("SQLException (Links.java CountLinks()): sql='" + str_sql + "' " + ex.getMessage()); } finally { if (rs != null) { try { rs.close(); } catch (SQLException sqlEx) { } rs = null; } if (s != null) { try { s.close(); } catch (SQLException sqlEx) { } s = null; } } return size; }*/ /* Calculates and set up number of links for each node in hashmap. public static void CountLinks(Connect connect, HashMap<Integer, Article> nodes) { Iterator<Integer> it = nodes.keySet().iterator(); int size, i; while (it.hasNext()) { int id = it.next(); // article's id (page_id) Article n = nodes.get(id); n.count_l_from = n.links_out.length; n.count_l_to = n.links_in .length; //n.count_l_from = CountLinksFrom(connect, id); //n.count_l_to = CountLinksTo (connect, id); } }*/ // Return links list from the table links /*public static int[] GetIntFromLinks(Connect connect, String field, String str_where_sql) { Statement s = null; ResultSet rs= null; int[] result = null; int size, i = 0; String str_sql = null; try { s = connect.conn.createStatement(); str_sql = "SELECT COUNT(" + field + ") AS size FROM links " + str_where_sql; s.executeQuery(str_sql); rs = s.getResultSet(); if (rs.next()) { size = rs.getInt("size"); if (0 < size) { result = new int[size]; s.executeQuery("SELECT " + field + " FROM links " + str_where_sql); rs = s.getResultSet(); while (rs.next()) { result[i++] = rs.getInt(field); } } } } catch(SQLException ex) { System.err.println("SQLException (Links.java GetIntFromLinks): sql='" + str_sql + "' " + ex.getMessage()); } finally { if (rs != null) { try { rs.close(); } catch (SQLException sqlEx) { } rs = null; } if (s != null) { try { s.close(); } catch (SQLException sqlEx) { } s = null; } } return result; }*/ /** * Get links which refers to the article l_to. * (Get only articles links, i.e. page_namespace = 0) * SQL: SELECT page_id, page_title FROM page WHERE page_namespace=0 AND page_id IN (SELECT l_from FROM links WHERE l_to=N) * Number of return links limited to n_limit * ? Are unique value are returned? */ /*public static Article[] GetLFromByLToIN(SessionHolder session, int l_to, int n_limit) { String str_in, str_from; String str_sql_count_size, str_sql; // 1. Calculate number of links // too complex & slow request: // "SELECT COUNT(page_id) AS size FROM page " + // "WHERE page_namespace=0 AND " + // "page_id IN (SELECT l_from FROM links WHERE l_to="+l_to+")"); // 1.a // Prepare SQL IN(...) int[] i_links_all = GetIntFromLinks(session.connect, "l_from", "WHERE l_to="+l_to); if (null == i_links_all) return null; int[] i_links = session.category_black_list.DeleteUsingBlackList (i_links_all, -1); String str_links = StringUtil.join(",", i_links); str_in = " IN(" + str_links + ") "; // 1.b str_from = "FROM page WHERE page_namespace=0 AND page_id" + str_in; str_sql_count_size = "SELECT COUNT(page_id) AS size " + str_from; str_sql = "SELECT page_id, page_title " + str_from; return GetLinksSQL(session, str_sql_count_size, str_sql, n_limit); }*/ /** * Get articles (page_id and page_title) which refer to some of articles. * Remark1: Get only articles links, i.e. page_namespace = 0 * TODO Remark2: select and return nodes which are not the source nodes l_to[] * * SELECT l_from FROM links WHERE l_to IN (page_id of Rp) UNIQUE LIMIT t * Ex.: Article[] base_nodes = links.GetLFromByLTo(connect, l_to); * ? Are unique value are returned? */ /*public static Article[] GetLFromByLTo(SessionHolder session, Article[] l_to) { Article[] l_from = null; String str_in, str_sub_in; String str_from, str_sql_count_size, str_sql; // Prepare SQL IN(...) via l_to[].page_id str_sub_in = "l_to IN ("; for (int i=0; i<l_to.length-1; i++) { str_sub_in += l_to[i].page_id + ","; } str_sub_in += l_to[ l_to.length-1 ].page_id; // skip last comma str_sub_in += ")"; // 1. Calculate number of links //s.executeQuery ("SELECT COUNT(page_id) AS size FROM page " + // "WHERE page_namespace=0 AND " + // "page_id IN (SELECT l_from FROM links WHERE "+str_sub_in); int[] i_links_all = GetIntFromLinks(session.connect, "l_from", "WHERE "+str_sub_in); if (null == i_links_all) return null; int[] i_links = session.category_black_list.DeleteUsingBlackList (i_links_all, -1); String str_links = StringUtil.join(",", i_links); str_in = " IN(" + str_links + ") "; str_from = "FROM page WHERE page_namespace=0 AND page_id" + str_in; str_sql_count_size = "SELECT COUNT(page_id) AS size " + str_from; str_sql = "SELECT page_id, page_title " + str_from; return GetLinksSQL(session, str_sql_count_size, str_sql, -1); }*/ // Article[] base_nodes = links.GetLToByLFrom(connect, root_nodes); /** * Gets articles (page_id and page_title) which refer to some of articles. * Remark1: Gets only articles (not categories), i.e. page_namespace = 0 * TODO Remark2: select and return nodes which are not the source nodes l_from[] * * @param n_limit max number of returned articles, negative value means no limit * Todo: select first n links in article (not first n links in table) * * ? Are unique value are returned? */ /* public static Article[] GetLToByLFrom(SessionHolder session, int l_from, int n_limit) { String str_from; String str_sql_count_size, str_sql; Article[] result = null; Article node = new Article(); // 1. Calculate number of links // too complex & slow request: // "SELECT COUNT(page_id) AS size FROM page " + // "WHERE page_namespace=0 AND " + // "page_id IN (SELECT l_to FROM links WHERE l_from="+l_from+")"); // Execute subrequest SQL IN(...) int[] i_links_all = GetIntFromLinks(session.connect, "l_to", "WHERE l_from="+l_from); if (null == i_links_all) return null; int[] i_links = session.category_black_list.DeleteUsingBlackList(i_links_all, -1); for(int i=0; i<i_links.length; i++) { str_from = "FROM page WHERE page_namespace=0 AND page_id=" + i_links[i]; str_sql_count_size = "SELECT COUNT(page_id) AS size " + str_from; str_sql = "SELECT page_id, page_title " + str_from; Article[] add = GetLinksSQL(session, str_sql_count_size, str_sql, -1); result = node.JoinUnique(result, add); if(n_limit>=0 && result.length>= n_limit) { break; } } return result; } */ /* public static Article[] GetLToByLFrom(SessionHolder session, Article[] l_from) { int i; Article node = new Article(); Article[] result_nodes = null; for (i=0; i<l_from.length; i++) { Article[] add = GetLToByLFrom(session, l_from[i].page_id, -1); result_nodes = node.JoinUnique(result_nodes, add); } return result_nodes; }*/ /*public static void GetTitleToByIDFromQuery(ResultSet rs, Statement s,StringBuffer sb) { try { s.executeQuery(sb.toString()); } catch(SQLException ex) { System.err.println("SQLException (PageTable.java GetTitleByID()): " + ex.getMessage()); } }*/