package wikipedia.kleinberg; import wikipedia.language.Encodings; import junit.framework.*; import wikipedia.clustering.*; import wikipedia.util.*; import wikipedia.sql.*; import java.util.*; public class DumpToGraphVizTest extends TestCase { public Connect connect, connect_ru; SessionHolder session; DumpToGraphViz dump; int categories_max_steps; Article[] source_nodes; String t1; int t1_id; public DumpToGraphVizTest(String testName) { super(testName); } protected void setUp() throws Exception { connect = new Connect(); //connect.Open("localhost", "enwiki?useUnicode=true&characterEncoding=UTF-8&autoReconnect=true&useUnbufferedInput=false", "javawiki", ""); connect.Open(Connect.WP_HOST, Connect.WP_DB, Connect.WP_USER, Connect.WP_PASS); connect_ru = new Connect(); //connect_ru.Open("localhost", "ruwiki?useUnicode=false&characterEncoding=ISO8859_1&autoReconnect=true&useUnbufferedInput=false", "javawiki", ""); //Java:MySQL ISO8859_1:latin1 connect_ru.Open(Connect.WP_RU_HOST, Connect.WP_RU_DB, Connect.WP_RU_USER, Connect.WP_RU_PASS); dump = new DumpToGraphViz(); dump.file_dot.setFileInHomeDir("graphviz", "empty.txt", "Cp1251",true); dump.file_bat.setFileInHomeDir("graphviz", "bat_ruwiki.bat", "Cp866",true); dump.file_sh.setFileInHomeDir("graphviz", "bat_ruwiki.sh", "Cp1251",true); session = new SessionHolder(); session.initObjects(); categories_max_steps = 99; session.dump = dump; t1 = connect.enc.EncodeFromJava("Джемини"); t1_id = PageTable.getIDByTitleNamespace(connect_ru, t1, PageNamespace.MAIN); source_nodes = new Article[1]; source_nodes[0] = new Article(); source_nodes[0].page_title = t1; source_nodes[0].page_id = t1_id; } protected void tearDown() throws Exception { connect.Close(); connect_ru.Close(); } // test with // 1. empty blacklist that number of treated categories is zero. // 2. dump is enabled // parameters: root_set_size=2, inc=1, article=Джемини public void testPrintSynonyms_empty_blacklist_categories_ru() { System.out.println("PrintSynonyms"); int root_set_size, increment; String article, article_fn; // Kleinberg default values root_set_size = 2; //200; increment = 1; //50; int categories_max_steps = 10; session.Init(connect_ru, null, categories_max_steps); session.randomPages(false); session.skipTitlesWithSpaces(true); session.dump = dump; Encodings e = connect_ru.enc; article = e.EncodeFromJava("Джемини"); //article_fn = StringUtilRegular.encodeRussianToLatinitsa(article, Encodings.enc_java_default, Encodings.enc_int_default); //dump.connect = connect_ru; //dump.file_dot.setFileInHomeDir("graphviz", article_fn + ".dot", "UTF8",true); List<Article> nodes_list = new ArrayList<Article>(); nodes_list.add(source_nodes[0]); session.dump.PrintSynonyms(session, nodes_list); assertEquals(11, session.category_black_list.getTotalCategoriesPassed()); } }