package wikipedia.sql_idf; import wikipedia.sql.Connect; import junit.framework.*; import java.util.*; public class TermTest extends TestCase { public Connect idfruwiki_conn; public Connect idfsimplewiki_conn; public List<TermPage> tp_list; Term t1, t2; String lemma1, lemma2, page_title_t12; int doc_freq_t = 7; public TermTest(String testName) { super(testName); } protected void setUp() throws Exception { idfruwiki_conn = new Connect(); idfruwiki_conn.Open(Connect.IDF_RU_HOST, Connect.IDF_RU_DB, Connect.IDF_RU_USER, Connect.IDF_RU_PASS); idfsimplewiki_conn = new Connect(); idfsimplewiki_conn.Open(Connect.IDF_SIMPLE_HOST, Connect.IDF_SIMPLE_DB, Connect.IDF_SIMPLE_USER, Connect.IDF_SIMPLE_PASS); java.sql.Connection conn = idfsimplewiki_conn.conn; tp_list = new ArrayList<TermPage>(2); lemma1 = "SHOE"; lemma2 = "BRUSH"; page_title_t12 = "page_title_SHOE_BRUSH"; Page.insert(conn, page_title_t12, 2); // int word_count = 2; Page p = Page.get(conn, page_title_t12); Term.incDocFreq(conn, null, lemma1, doc_freq_t, 0); Term.incDocFreq(conn, null, lemma2, doc_freq_t, 0); t1 = Term.get(conn, lemma1); t2 = Term.get(conn, lemma2); tp_list.add(new TermPage()); tp_list.add(new TermPage()); tp_list.get(0).setTerm(t2); tp_list.get(1).setTerm(t1); } protected void tearDown() throws Exception { Term.delete(idfsimplewiki_conn.conn, lemma1); Term.delete(idfsimplewiki_conn.conn, lemma2); Page.delete(idfsimplewiki_conn.conn, page_title_t12); idfruwiki_conn.Close(); idfsimplewiki_conn.Close(); } /** * Test of incLemmaDocFreq method, of class sql_idf.Term. */ public void testIncLemmaDocFreq_one_element_simple() { System.out.println("incLemmaDocFreq_one_element_simple"); java.sql.Connection conn = idfsimplewiki_conn.conn; String page_title = "testIncLemmaDocFreq_one_element_simple"; String lemma = "test_term.get"; int doc_freq_max = 10; int term_freq = -999; int word_count = 0; Term t = null; Term.delete(conn, lemma); t = Term.get (conn, lemma); assertEquals(null, t); Page.delete(conn, page_title); Page p = Page.get(conn, page_title); assertEquals(null, p); p = Page.getOrInsert(conn, page_title, word_count); int page_id = p.getPageID(); assertTrue(page_id != 0); int add_term_freq = 7; wikipedia.sql_idf.Term.incLemmaDocFreq(conn, lemma, add_term_freq, p, doc_freq_max); t = Term.get (conn, lemma); int term_id = t.getTermID(); p = Page.getOrInsert(conn, page_title, word_count); page_id = p.getPageID(); term_freq = TermPage.getTermFreqInDocument(conn, term_id, page_id); assertEquals(add_term_freq, term_freq); assertEquals(word_count, p.getWordCount()); word_count = 33; p = Page.getOrInsert(conn, page_title, word_count); p.storeWordCount(conn, word_count); wikipedia.sql_idf.Term.incLemmaDocFreq(conn, lemma, add_term_freq, p, doc_freq_max); term_freq = TermPage.getTermFreqInDocument(conn, term_id, page_id); assertEquals(add_term_freq, term_freq); assertEquals(word_count, p.getWordCount()); // delete //TermPage.delete(conn, term_id, page_id); Term.delete(conn, lemma); Page.delete(conn, page_title); TermPage.delete(conn, term_id, page_id); } public void testGet_ru() { System.out.println("get_ru"); Term t = null; String lemma = "test_term.get"; int inc_corpus_freq = 55; Term.delete (idfruwiki_conn.conn, lemma); t = Term.get (idfruwiki_conn.conn, lemma); assertEquals(null, t); Term.incDocFreq (idfruwiki_conn.conn, t, lemma, 1, inc_corpus_freq); // INSERT, t==null t = Term.get (idfruwiki_conn.conn, lemma); assertFalse(null == t); assertEquals(1, t.getDocFreq()); assertEquals(inc_corpus_freq, t.getCorpusFreq()); inc_corpus_freq = 5; Term.incDocFreq (idfruwiki_conn.conn, t, lemma, 3, inc_corpus_freq); t = Term.get (idfruwiki_conn.conn, lemma); Term.incDocFreq (idfruwiki_conn.conn, t, lemma, 6, inc_corpus_freq); t = Term.get (idfruwiki_conn.conn, lemma); assertFalse(null == t); assertEquals(10, t.getDocFreq()); assertEquals(65, t.getCorpusFreq()); Term.delete (idfruwiki_conn.conn, lemma); t = Term.get (idfruwiki_conn.conn, lemma); assertEquals(null, t); } public void testFillTerms_simple() { System.out.println("fillTerms_simple"); java.sql.Connection conn = idfsimplewiki_conn.conn; int df1, df2; t1.setDocFreq(0); t2.setDocFreq(0); df1 = t1.getDocFreq(); df2 = t2.getDocFreq(); assertEquals(0, df1); assertEquals(0, df2); Term.fillTerms (conn, tp_list); // tp_list lemmas: "SHOE" "BRUSH" df1 = t1.getDocFreq(); df2 = t2.getDocFreq(); assertEquals(doc_freq_t, df1); assertEquals(doc_freq_t, df2); } /** * Test of delete method, of class sql_idf.Term. */ public void testDelete_simple() { System.out.println("delete_simple"); Term t = null; java.sql.Connection conn = idfsimplewiki_conn.conn; // todo put something, get()!=null String lemma = "test_term.delete_simple"; int inc_corpus_freq = 55; t = Term.get (conn, lemma); assertEquals(null, t); int doc_freq = Term.incDocFreq (conn, t, lemma, 1, inc_corpus_freq); assertEquals(1, doc_freq); t = Term.get (conn, lemma); assertFalse(null == t); assertEquals(1, t.getDocFreq()); wikipedia.sql_idf.Term.delete(conn, lemma); t = Term.get (conn, lemma); assertEquals(null, t); } /** Checks constraints "doc_freq_max" in incLemmaDocFreq */ public void testIncLemmaDocFreq__doc_freq_max__simple() { System.out.println("incLemmaDocFreq__doc_freq_max__simple"); String page_title1, page_title2, page_title3; java.sql.Connection conn = idfsimplewiki_conn.conn; int doc_freq_max = 2; int n_rows_with_term_id, words_in_doc; words_in_doc = 3; t1.setDocFreq(0); t1.storeToDatabase(conn); t2.setDocFreq(0); t2.storeToDatabase(conn); // 1. success: 1 <= doc_freq_max page_title1 = "test_term.page_title1"; Page p1 = Page.getOrInsert(conn, page_title1, words_in_doc); Term.incLemmaDocFreq(conn, t1.getLemma(), words_in_doc, p1, doc_freq_max); n_rows_with_term_id = TermPage.countPagesWithTerm(conn, t1.getTermID()); assertEquals(1, n_rows_with_term_id); t1 = Term.get(conn, t1.getLemma()); assertEquals(1, t1.getDocFreq()); assertEquals(words_in_doc, t1.getCorpusFreq()); // 2. success: 2 <= doc_freq_max page_title2 = "test_term.page_title2"; Page p2 = Page.getOrInsert(conn, page_title2, words_in_doc+1); Term.incLemmaDocFreq(conn, t1.getLemma(), 2*words_in_doc, p2, doc_freq_max); n_rows_with_term_id = TermPage.countPagesWithTerm(conn, t1.getTermID()); assertEquals(2, n_rows_with_term_id); t1 = Term.get(conn, t1.getLemma()); assertEquals(2, t1.getDocFreq()); assertEquals((1+2)*words_in_doc, t1.getCorpusFreq()); // 3. fail: 3 > doc_freq_max => 2==n_rows_with_term_id String lemma = "t3 term_test"; page_title3 = "test_term.page_title3"; Page p3 = Page.getOrInsert(conn, page_title3, words_in_doc*3); Term.incLemmaDocFreq(conn, t1.getLemma(), 3*words_in_doc, p3, doc_freq_max); n_rows_with_term_id = TermPage.countPagesWithTerm(conn, t1.getTermID()); assertEquals(2, n_rows_with_term_id); // 2, not 3 t1 = Term.get(conn, t1.getLemma()); assertEquals(3, t1.getDocFreq()); // term.doc_freq = 3, real number of docs with term assertEquals((1+2+3)*words_in_doc, t1.getCorpusFreq()); // delete wikipedia.sql_idf.Page.delete(conn, page_title1); wikipedia.sql_idf.Page.delete(conn, page_title2); wikipedia.sql_idf.Page.delete(conn, page_title3); wikipedia.sql_idf.TermPage.deleteByTermID(conn, t1.getTermID()); } }