/*
* LinksBaseSetTest.java
* JUnit based test
*/
package wikipedia.kleinberg;
import wikipedia.language.Encodings;
import wikipedia.sql.*;
import wikipedia.util.*;
import junit.framework.*;
import java.util.*;
public class LinksBaseSetTest extends TestCase {
public Connect connect, connect_ru;
Links links;
LinksBaseSet links_baseset;
DumpToGraphViz dump;
SessionHolder session;
public LinksBaseSetTest(String testName) {
super(testName);
}
protected void setUp() throws java.lang.Exception {
connect = new Connect();
connect.Open(Connect.WP_HOST, Connect.WP_DB, Connect.WP_USER, Connect.WP_PASS);
connect_ru = new Connect();
connect_ru.Open(Connect.WP_RU_HOST,Connect.WP_RU_DB,Connect.WP_RU_USER,Connect.WP_RU_PASS);
links_baseset = new LinksBaseSet();
dump = new DumpToGraphViz();
dump.file_dot.setFileInHomeDir("graphviz", "empty.txt", "Cp1251",true);
dump.file_bat.setFileInHomeDir("graphviz", "bat_ruwiki.bat", "Cp866",true);
dump.file_sh.setFileInHomeDir("graphviz", "bat_ruwiki.sh", "Cp1251",true);
session = new SessionHolder();
session.initObjects();
session.dump = dump;
}
protected void tearDown() throws java.lang.Exception {
connect.Close();
connect_ru.Close();
}
public static junit.framework.Test suite() {
junit.framework.TestSuite suite = new junit.framework.TestSuite(LinksBaseSetTest.class);
return suite;
}
/**
* Test of CreateBaseSet method, of class wikipedia.LinksBaseSet.
*/
public void testCreateBaseSet() {
/*String article;
article = "Робот"; // vertices:9 edges:11
//article = "Самолёт"; // too big: vertices:1383 edges:34192
//article = "Фуникулёр"; // medium size
dump.file.SetFilename(article + "_all.dot");
dump.file.Open(false, "UTF8");
dump.connect = connect_ru;
Map<Integer, Article> base_nodes = links_baseset.CreateBaseSet(connect_ru, article, dump);
if (article.equals("Робот")) {
assertEquals(base_nodes.size(), 9);
assertTrue("Robot should refer to Dance", base_nodes.containsKey(10578));
assertTrue("Robot should be referred to Transformers", base_nodes.containsKey(18991));
}*/
/*
String[] all_words = {"Робот", "Рычаг", "Фуникулёр",
"Глаз", "Окуляр", "Орбитальная станция", "Плазма", "Фут",
"Свёртка", "Ноты", "Предложение", "Преферанс", "Взрыв",
"Самосознание", "Свобода", "Оптимизм"};
// no refer: "Окуляр", "Свёртка",
String[] big_words = {"Фуникулёр", "Фут", "Глаз", "Ноты", "Предложение",
"Взрыв", "Самосознание", "Свобода"};
String[] small_words = {"Робот", "Рычаг", "Плазма",
"Преферанс", "Оптимизм"};
String[] new_words2 = {"Кварк", "Похмелье", "Кегль",
"Киноварь", "Компилятор"};
String[] new_words = {"Контрабас", "Конунг", "Константа",
"Спин", "Спорт", "Дзюдо"};
for(int i=0; i<new_words.length; i++) {
links_baseset.CreateBaseSet(connect_ru, new_words[i]);
}*/
}
/*
public void testCreateBaseSetEnglish() {
//links_baseset.CreateBaseSet(connect, "Innuendo");
//links_baseset.CreateBaseSet(connect, "Parallelogram");
String[] words_Blondel = {"disappear", "parallelogram", "sugar", "science"};
String[] rare_words = {"Parallelogram", "Sycamore", "Innuendo"};
for(int i=0; i<rare_words.length; i++) {
links_baseset.CreateBaseSet(connect, rare_words[i]);
}
}
*/
// test parametrized CreateBaseSet(,..., int root_set_size, int increment)
public void testCreateBaseSetParametrized_ru() {
System.out.println("testCreateBaseSetParametrized_ru");
int root_set_size, increment;
String article, article_fn;
// Kleinberg default values
root_set_size = 19; //200; 18, 19 - failed
increment = 50; //50;
// Simple test values
//root_set_size = 1;
//increment = 1;
int categories_max_steps = 5;
session.Init(connect_ru, null, categories_max_steps);
session.randomPages(false);
session.skipTitlesWithSpaces(true);
Encodings e = connect_ru.enc;
article = e.EncodeFromJava("Робот");
article_fn = StringUtilRegular.encodeRussianToLatinitsa(article, Encodings.enc_java_default, Encodings.enc_int_default);
/*article = "Parallelogram"; // 77,953 sec Hot MySQL: 48,344 sec
//article = "Science"; // 76,094 sec Hot MySQL: 50,094 sec
dump.connect = connect_ru;*/
dump.file_dot.setFileInHomeDir("graphviz", article_fn + ".dot", "UTF8",true);
List<String> synonyms = new ArrayList<String>();
Map<Integer, Article> base_nodes = LinksBaseSet.CreateBaseSet(article, synonyms, session, root_set_size, increment);
assertTrue(base_nodes.containsKey(session.source_article_id));
Article s = base_nodes.get(session.source_article_id);
assertTrue(null != s);
String android = e.EncodeFromJava("Андроид");
int android_id = PageTable.getIDByTitleNamespace(connect_ru, android, PageNamespace.MAIN);
String search_engine = e.EncodeFromJava("Поисковая_система");
int search_engine_id = PageTable.getIDByTitleNamespace(connect_ru, search_engine, PageNamespace.MAIN);
// 1) "Андроид" is presented and "Поисковая_система" is absent in base_nodes;
assertTrue (base_nodes.containsKey( android_id));
assertFalse(base_nodes.containsKey(search_engine_id));
// 2) assert that "Андроид" and "Киборг" (submitted as synonyms) are in base_nodes;
synonyms.add(android);
synonyms.add(search_engine);
base_nodes = LinksBaseSet.CreateBaseSet(article, synonyms, session, root_set_size, increment);
assertTrue(base_nodes.containsKey( android_id));
assertTrue(base_nodes.containsKey(search_engine_id));
Article a_android = base_nodes.get( android_id);
Article a_search_engine = base_nodes.get(search_engine_id);
assertEquals(NodeType.RATED_SYNONYMS, a_android. type);
assertEquals(NodeType.RATED_SYNONYMS, a_search_engine.type);
//dump.connect = connect_ru;
//links_baseset.CreateBaseSet(dump.connect, "Parallelogram", dump, 2, 2);
/*
String[] all_words = {"Робот", "Рычаг", "Фуникулёр",
"Глаз", "Окуляр", "Орбитальная станция", "Плазма", "Фут",
"Свёртка", "Ноты", "Предложение", "Преферанс", "Взрыв",
"Самосознание", "Свобода", "Оптимизм"};
// no refer: "Окуляр", "Свёртка",
String[] big_words = {"Фуникулёр", "Фут", "Глаз", "Ноты", "Предложение",
"Взрыв", "Самосознание", "Свобода"};
String[] small_words = {"Робот", "Рычаг", "Плазма",
"Преферанс", "Оптимизм"};
String[] new_words2 = {"Кварк", "Похмелье", "Кегль",
"Киноварь", "Компилятор"};
String[] new_words = {"Контрабас", "Конунг", "Константа",
"Спин", "Спорт", "Дзюдо"};
for(int i=0; i<new_words.length; i++) {
links_baseset.CreateBaseSet(dump.connect, new_words[i]);
}*/
}
public void testCreateBaseSet_ForAbsentWord() {
System.out.println("testCreateBaseSet_ForAbsentWord");
int root_set_size, increment;
String article;
// Kleinberg default values
/*root_set_size = 200;
increment = 50;
*/
// Simple test values
root_set_size = 1;
increment = 1;
List<String> synonyms = new ArrayList<String>();
//synonyms.add("syn1");
//synonyms.add("syn2");
article = "AbsentWord";
int categories_max_steps = 99;
session.Init(connect_ru, null, categories_max_steps);
//session.skipTitlesWithSpaces(true);
session.randomPages(false);
/*article = "Parallelogram"; // 77,953 sec Hot MySQL: 48,344 sec
//article = "Science"; // 76,094 sec Hot MySQL: 50,094 sec
dump.connect = connect;*/
dump.file_dot.SetFilename(article + ".dot");
dump.file_dot.Open(false, "UTF8");
Map<Integer, Article> base_nodes = LinksBaseSet.CreateBaseSet(article, synonyms, session, root_set_size, increment);
assertTrue(null == base_nodes);
}
}
/*
public HashMap<Integer, Article> CreateBaseSet(Connect connect, String page_title, SessionHolder session) {
Article node = new Article();
Links link = new Links();
DCEL dcel = new DCEL();
int increment, root_set_size;
root_set_size = 200;
//increment = 50;
// 1.
String latin1_article = Encodings.UTF8ToLatin1(page_title);
int p = connect.page_table.GetIDByTitle(connect, latin1_article);
session.source_article_id = p;
Article[] root_nodes = links.GetLFromByLTo(session, p, root_set_size);
if(null == root_nodes) {
if (null != session.dump) {
String bat_text = "\n:: " + session.dump.file_dot.GetFilename() +".dot \t Warning: no page refers to this page.\n";
session.dump.file_bat.Print(bat_text);
session.dump.file_bat.Flush();
}
return null; // nobody refers to the p page
}
node.SetType(root_nodes, 1);
//file_dot.WriteNew(path + "1_0_ruwiki.dot", session.dump.Dump(root_nodes), "UTF8");
// 2.1
Article[] base_nodes1 = links.GetLFromByLTo(session, root_nodes, -1);
//file_dot.WriteNew(path + "2_1_ruwiki.dot", session.dump.Dump(base_nodes1), "UTF8");
// 2.2
Article[] base_nodes2 = links.GetLToByLFrom(session, root_nodes);
//file_dot.WriteNew(path + "2_2_ruwiki.dot", session.dump.Dump(base_nodes2), "UTF8");
Article[] base_nodes = node.JoinUnique(base_nodes1, base_nodes2);
Article[] base_and_root_nodes = node.JoinUnique(base_nodes, root_nodes);
//file_dot.WriteNew(path + "2_2_unique.dot", session.dump.Dump(base_and_root_nodes), "UTF8");
node.SetType(base_and_root_nodes, 2);
// 2.3
HashMap<Integer, Article> hash_node = node.CreateHashMap(base_and_root_nodes);
hash_node.get(p).type = -1; // set type for the very source article
link.GetAllLinks(connect, hash_node);
if (null != session.dump) {
session.dump.Dump(hash_node);
// append 1) statistics and 2) dot command to bat file
String bat_text = "\n:: "+session.dump.file_dot.GetFilename()+"\t vertices:" + hash_node.values().size() +
"\t edges:" + dcel.CountLinksIn(hash_node) +
"\ndot.exe -Tjpeg " + session.dump.file_dot.GetFilename() + " -v -o " + session.dump.file_dot.GetFilenameWoExt() + ".jpeg\n";
session.dump.file_bat.Print(bat_text);
session.dump.file_bat.Flush();
}
return hash_node;
}*/