/*
* StringMapTest.java
* JUnit based test
*/
package wikipedia.data;
import wikipedia.sql.Connect;
import wikipedia.sql.PageTable;
import wikipedia.sql.PageNamespace;
import wikipedia.sql.Links;
import wikipedia.kleinberg.SessionHolder;
import junit.framework.*;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.ArrayList;
import java.util.List;
public class StringMapTest extends TestCase {
public Connect connect, connect_ru;
static String t1, t_redirect, t2;
static int t1_id, t_redirect_id, t2_id;
static String s1, s_redirect, s2, s1b, s_redirect2, s3;
static int s1_id, s_redirect_id, s2_id, s1b_id, s_redirect2_id, s3_id;
static long t_start, t_end;
static float t_work;
static Map<String,Set<String>> m_out = new HashMap<String,Set<String>>();
static Map<String,Set<String>> m_in = new HashMap<String,Set<String>>();
public StringMapTest(String testName) {
super(testName);
}
protected void setUp() throws Exception {
connect = new Connect();
connect.Open(Connect.WP_HOST, Connect.WP_DB, Connect.WP_USER, Connect.WP_PASS);
connect_ru = new Connect();
connect_ru.Open(Connect.WP_RU_HOST, Connect.WP_RU_DB, Connect.WP_RU_USER, Connect.WP_RU_PASS);
// Redirects: t1 -> t_redirect -> t2
// Джемини -> MIT -> Массачусетсский_технологический_институт
// Польская_Википедия -> Бот_(программа) -> Робот_(программа)
// CAPTCHA -> Bot -> Робот_(программа)
t1 = connect.enc.EncodeFromJava("Джемини");
t_redirect = connect.enc.EncodeFromJava("MIT");
t2 = connect.enc.EncodeFromJava("Массачусетсский_технологический_институт");
s1 = connect.enc.EncodeFromJava("Польская_Википедия");
s_redirect = connect.enc.EncodeFromJava("Бот_(программа)");
s1b = connect.enc.EncodeFromJava("CAPTCHA");
s_redirect2 = connect.enc.EncodeFromJava("Bot");
s2 = connect.enc.EncodeFromJava("Робот_(программа)");
s3 = connect.enc.EncodeFromJava("Википедия");
m_out.clear(); m_in.clear();
Links.addTitlesToMaps(t1, t_redirect, m_out, m_in);
Links.addTitlesToMaps(s1, s_redirect, m_out, m_in);
Links.addTitlesToMaps(s1b,s_redirect2, m_out, m_in);
t1_id = PageTable.getIDByTitleNamespace(connect_ru, t1, PageNamespace.MAIN);
t_redirect_id = PageTable.getIDByTitleNamespace(connect_ru, t_redirect, PageNamespace.MAIN);
t2_id = PageTable.getIDByTitleNamespace(connect_ru, t2, PageNamespace.MAIN);
s1_id = PageTable.getIDByTitleNamespace(connect_ru, s1, PageNamespace.MAIN);
s_redirect_id = PageTable.getIDByTitleNamespace(connect_ru, s_redirect, PageNamespace.MAIN);
s1b_id = PageTable.getIDByTitleNamespace(connect_ru, s1b,PageNamespace.MAIN);
s_redirect2_id= PageTable.getIDByTitleNamespace(connect_ru, s_redirect2,PageNamespace.MAIN);
s2_id = PageTable.getIDByTitleNamespace(connect_ru, s2, PageNamespace.MAIN);
s3_id = PageTable.getIDByTitleNamespace(connect_ru, s3, PageNamespace.MAIN);
}
protected void tearDown() throws Exception {
connect.Close();
connect_ru.Close();
}
/** Fills maps m_in and m_out by data from 1. ArticleIdAndTitle,
* 2. map from identifier (to) pointed to identifiers (from). */
/*
public static void fill_m_in_m_out (Map<String,Set<String>> m_out, Map<String,Set<String>> m_in,
ArticleIdAndTitle[] aid,
Map<Integer, List<Integer>> m_id_to__id_from)*/
public void testFill_m_in_m_out() {
System.out.println("fill_m_in_m_out");
String w1, w2, w3;
int id1, id2, id3;
w1 = "word1"; id1 = 1;
w2 = "word2"; id2 = 2;
w3 = "words3"; id3 = 3;
ArticleIdAndTitle[] aid = new ArticleIdAndTitle[2];
aid[0] = new ArticleIdAndTitle (id1, w1);
aid[1] = new ArticleIdAndTitle (id2, w2);
ArticleIdAndTitle[] addon = new ArticleIdAndTitle[1];
addon[0] = new ArticleIdAndTitle (id3, w3);
// w1 -> w2
// w1 -> w3 -> w2
Map<Integer, List<Integer>> m_id_to__id_from = new HashMap<Integer, List<Integer>> ();
// empty test
m_out.clear(); m_in.clear();
StringMap.fill_m_in_m_out(m_out, m_in,
aid, addon,
m_id_to__id_from);
assertEquals(0, m_out.size());
assertEquals(0, m_in.size());
// test: w3 <- w1
List<Integer> id3__id_from = new ArrayList<Integer>();
id3__id_from.add(id1);
m_id_to__id_from.put(id3, id3__id_from);
m_out.clear(); m_in.clear();
StringMap.fill_m_in_m_out(m_out, m_in, aid, addon, m_id_to__id_from);
assertEquals(1, m_out.size());
assertEquals(1, m_in.size());
// test: w3 <- w1,
// w2 <- [w1, w3]
List<Integer> id2__id_from = new ArrayList<Integer>();
id2__id_from.add(id1);
id2__id_from.add(id3);
m_id_to__id_from.put(id3, id3__id_from);
m_id_to__id_from.put(id2, id2__id_from);
m_out.clear(); m_in.clear();
StringMap.fill_m_in_m_out(m_out, m_in, aid, addon, m_id_to__id_from);
// w1 -> w2
// w1 -> w3 -> w2
assertEquals(2, m_out.size());
assertEquals(2, m_in.size());
}
public void testSkipTitles() {
System.out.println("skipTitles");
SessionHolder session;
session = new SessionHolder();
session.initObjects();
//int categories_max_steps = 99;
//session.Init(connect_ru, null, categories_max_steps);
String w1, w2, w3;
w1 = "word1";
w2 = "word2";
w3 = "many_words_with_spaces";
m_out.clear(); m_in.clear();
Links.addTitlesToMaps(w1, w2, m_out, m_in);
Links.addTitlesToMaps(w1, w3, m_out, m_in);
Links.addTitlesToMaps(w3, w2, m_out, m_in);
// empty test
session.skipTitlesWithSpaces(false);
StringMap.skipTitles(session, m_out, m_in);
assertEquals(2, m_out.size());
assertEquals(2, m_in.size());
// test: skip w3
session.skipTitlesWithSpaces(true);
StringMap.skipTitles(session, m_out, m_in);
assertEquals(1, m_out.size());
assertEquals(1, m_in.size());
assertTrue(session.removed_articles.hasTitle(w3));
assertFalse(session.removed_articles.hasTitle(w1));
assertFalse(session.removed_articles.hasTitle(w2));
}
/** Removes the string from maps. */
//public static void removeString (String s, Map<String,Set<String>> m_out, Map<String,Set<String>> m_in)
public void testRemoveString() {
System.out.println("removeString");
String w1, w2, w3;
w1 = "word1";
w2 = "word2";
w3 = "word3333";
m_out.clear(); m_in.clear();
Links.addTitlesToMaps(w1, w2, m_out, m_in);
Links.addTitlesToMaps(w1, w3, m_out, m_in);
Links.addTitlesToMaps(w3, w2, m_out, m_in);
// empty test
StringMap.removeString("string which is absent in maps", m_out, m_in);
assertEquals(2, m_out.size());
assertEquals(2, m_in.size());
// test: remove w3
StringMap.removeString(w3, m_out, m_in);
assertEquals(1, m_out.size());
assertEquals(1, m_in.size());
/*
// test speed 2
t_start = System.currentTimeMillis();
String text = "text";
for(int i=0; i<1000000; i++) {
StringMap.removeString(w3, m_out, m_in);
}
t_end = System.currentTimeMillis();
t_work = (t_end - t_start)/1000f; // in sec
System.out.println("removeString() total time: " + t_work + "sec.");
*/
}
public void testReplaceTitleInMaps_one_link_ru() {
System.out.println("replaceTitleInMaps_one_link_ru");
m_out.clear(); m_in.clear();
Links.addTitlesToMaps(t1, t_redirect, m_out, m_in);
// replace t_redirect by t2
StringMap.replaceTitleInMaps (t_redirect, t2, m_out, m_in);
assertEquals(1, m_out.size());
assertEquals(1, m_in.size());
assertEquals(1, m_out.get(t1).size());
assertTrue(m_in.containsKey(t2));
assertEquals(1, m_in. get(t2).size());
assertTrue(m_in.containsKey(t2));
assertTrue(m_out.get(t1).contains( t2 ));
assertTrue( m_in.get( t2 ).contains(t1));
}
public void testReplaceTitleInMaps_two_links_ru() {
System.out.println("replaceTitleInMaps_two_links_ru");
m_out.clear(); m_in.clear();
Links.addTitlesToMaps(s1, s_redirect, m_out, m_in);
Links.addTitlesToMaps(s1b,s_redirect2, m_out, m_in);
// Facts :
// s1 s_redirect s2
// Польская_Википедия -> Бот_(программа) -> Робот_(программа)
// s1b s_redirect2 s2
// CAPTCHA -> Bot -> Робот_(программа)
// Source: Польская_Википедия -> Бот_(программа)
// CAPTCHA -> Bot
// Result: Польская_Википедия -> Робот_(программа)
// CAPTCHA -> Робот_(программа)
// replace t_redirect by t2
StringMap.replaceTitleInMaps (s_redirect, s2, m_out, m_in);
StringMap.replaceTitleInMaps (s_redirect2, s2, m_out, m_in);
assertEquals(2, m_out.size());
assertEquals(1, m_in.size());
assertEquals(1, m_out.get(s1).size());
assertEquals(1, m_out.get(s1b).size());
assertTrue(m_in.containsKey(s2));
assertEquals(2, m_in. get(s2).size());
assertTrue(m_out.get(s1).contains( s2 ));
assertTrue( m_in.get( s2 ).contains(s1));
assertTrue(m_out.get(s1b).contains( s2 ));
assertTrue( m_in.get( s2 ).contains(s1b));
}
public void testReplaceTitleInMaps_remove_redirect_links_ru() {
System.out.println("replaceTitleInMaps_remove_redirect_links_ru");
// test with 1 link
m_out.clear(); m_in.clear();
Links.addTitlesToMaps(s_redirect, s2, m_out, m_in);
// Facts (source) :
// s_redirect s2
// Бот_(программа) -> Робот_(программа)
//
// Replace Бот_(программа) by Робот_(программа)
// Result: nothing
// replace s_redirect by s2
StringMap.replaceTitleInMaps (s_redirect, s2, m_out, m_in);
assertEquals(0, m_out.size());
assertEquals(0, m_in.size());
// test with 2 links
m_out.clear(); m_in.clear();
Links.addTitlesToMaps(s_redirect, s2, m_out, m_in);
Links.addTitlesToMaps(s_redirect2, s2, m_out, m_in);
// Facts (source) :
// s_redirect s2
// Бот_(программа) -> Робот_(программа)
// s_redirect2 s2
// Bot -> Робот_(программа)
//
// Replace Бот_(программа) by Робот_(программа)
// Bot by Робот_(программа)
// Result: nothing
// replace s_redirect by s2
StringMap.replaceTitleInMaps (s_redirect, s2, m_out, m_in);
StringMap.replaceTitleInMaps (s_redirect2, s2, m_out, m_in);
assertEquals(0, m_out.size());
assertEquals(0, m_in.size());
}
public void testReplaceTitleInMaps_resolve_redirect_links_ru() {
System.out.println("replaceTitleInMaps_resolve_redirect_links_ru");
m_out.clear(); m_in.clear();
Links.addTitlesToMaps(s1, s_redirect, m_out, m_in);
Links.addTitlesToMaps(s_redirect, s2, m_out, m_in);
Links.addTitlesToMaps(s1, s3, m_out, m_in);
// Facts (source) :
// s1 s_redirect s2
// Польская_Википедия -> Бот_(программа) -> Робот_(программа)
// Польская_Википедия -> Википедия (s3)
//
// Replace s_redirect by s2
// Result: s1 -> s2, s1 -> s3
// replace s_redirect by s2
StringMap.replaceTitleInMaps (s_redirect, s2, m_out, m_in);
assertEquals(1, m_out.size());
assertEquals(2, m_in.size());
assertTrue(m_out.containsKey(s1));
assertTrue(m_in. containsKey(s2));
assertTrue(m_in. containsKey(s3));
assertEquals(2, m_out.get(s1).size());
assertEquals(1, m_in.get(s2).size());
assertEquals(1, m_in.get(s3).size());
assertTrue(m_out.get(s1).contains( s2 ));
assertTrue( m_in.get( s2 ).contains(s1));
assertTrue(m_out.get(s1).contains( s3 ));
}
}