/* ParsedDB.java - Statistics of the database of the parsed Wiktionary. * * Copyright (c) 2010-2011 Andrew Krizhanovsky <andrew.krizhanovsky at gmail.com> * Distributed under EPL/LGPL/GPL/AL/BSD multi-license. */ package wikt.stat; import wikokit.base.wikt.sql.TLang; import wikokit.base.wikt.sql.TPOS; import wikokit.base.wikt.sql.TRelationType; import wikokit.base.wikipedia.sql.Statistics; import wikokit.base.wikipedia.sql.Connect; import wikt.stat.printer.CommonPrinter; import wikokit.base.wikipedia.language.LanguageType; import wikokit.base.wikt.constant.Label; import wikokit.base.wikt.constant.LabelCategoryLocal; import wikokit.base.wikt.multi.en.name.LabelEn; import wikokit.base.wikt.multi.ru.name.LabelCategoryRu; import wikokit.base.wikt.multi.ru.name.LabelRu; import wikokit.base.wikt.sql.label.TLabel; import wikokit.base.wikt.sql.label.TLabelCategory; /** Base parameters (number of records) of the database of the parsed Wiktionary. * * An example of the result table: * @see http://code.google.com/p/wikokit/wiki/Database_statistics * * The result could be inserted into the Wiktionary page. * @see todo */ public class ParsedDB { //private static final boolean DEBUG = true; /** Prints a row in a wiki table with data: * (1) the name of the table, 'table_name'; * (2) number of records in the table; * (3) the description. */ public static void printRowWithTableSize(Connect conn, String table_name,String description) { int size = Statistics.Count(conn, table_name); System.out.print("\n|-\n|| " + table_name + " || " + size + " || " + description); } public static void main(String[] args) { LanguageType native_lang; // Connect to wikt_parsed database Connect wikt_parsed_conn = new Connect(); // Russian //native_lang = LanguageType.ru; //wikt_parsed_conn.Open(Connect.RUWIKT_HOST, Connect.RUWIKT_PARSED_DB, Connect.RUWIKT_USER, Connect.RUWIKT_PASS, native_lang); // English native_lang = LanguageType.en; wikt_parsed_conn.Open(Connect.ENWIKT_HOST, Connect.ENWIKT_PARSED_DB, Connect.ENWIKT_USER, Connect.ENWIKT_PASS, native_lang); TLang.createFastMaps(wikt_parsed_conn); TPOS.createFastMaps(wikt_parsed_conn); TRelationType.createFastMaps(wikt_parsed_conn); LabelCategoryLocal temp0 = LabelCategoryRu.computing; // let's initialize maps in LabelCategoryRu class TLabelCategory.createFastMaps(wikt_parsed_conn); Label temp1 = LabelEn.Acadia; // let's initialize maps in LabelEn class Label temp2 = LabelRu.Yoruba; // ... in LabelRu class TLabel.createFastMaps(wikt_parsed_conn, native_lang); // System.out.println("LabelEn size = " + LabelEn.getAllLabels().size()); // System.out.println("LabelRu size = " + LabelRu.getAllLabels().size()); String db_name = wikt_parsed_conn.getDBName(); System.out.println("\n== Parameters of the created (parsed) Wiktionary database =="); CommonPrinter.printHeader (db_name); String empty_line = "\n|-\n|| || ||"; System.out.println("\n'''Table''' is a name of the table in the database."); System.out.println("\n'''Size''' is a number of records in the table."); System.out.println("\nThe table filled automatically by [http://code.google.com/p/wikokit/source/browse/trunk/wikt_parser/src/wikt/stat/ParsedDB.java wikt.stat.ParsedDB] of the ''wiwordik'' project.\n"); System.out.println("{| class=\"sortable prettytable\" style=\"text-align: left;\""); System.out.print("! Table || Size || Table description "); //int page_size = Statistics.Count(wikt_parsed_conn, "page"); //System.out.print("\n|-\n|| page || " + page_size + " || Number of words / entries"); printRowWithTableSize(wikt_parsed_conn, "page", "Number of words / entries"); printRowWithTableSize(wikt_parsed_conn, "relation", "Number of semantic relations, e.g. synonyms, antonyms, etc."); printRowWithTableSize(wikt_parsed_conn, "lang_pos", "Number of pairs: language & part of speech, one Wiktionary page can contain several such pairs."); printRowWithTableSize(wikt_parsed_conn, "wiki_text", "Number of meanings / definitions + number of semantic relations phrases (divided by comma, semicolon) + number of wikified translations."); printRowWithTableSize(wikt_parsed_conn, "wiki_text_words", "Number of wikified words (in meanings / definitions + in semantic relations + in translations)."); printRowWithTableSize(wikt_parsed_conn, "meaning", "Number of meanings, one word can have several meanings / definitions."); printRowWithTableSize(wikt_parsed_conn, "inflection", "It is extracted from wikified word definitions, e.g. <nowiki>[[normal form|</nowiki>'''inflection'''<nowiki>]]</nowiki>"); System.out.print(empty_line); printRowWithTableSize(wikt_parsed_conn, "label", "Number of unique labels."); printRowWithTableSize(wikt_parsed_conn, "label_category", "Number of categories of context labels."); printRowWithTableSize(wikt_parsed_conn, "label_meaning", "Number of labels used in meanings / definitions."); printRowWithTableSize(wikt_parsed_conn, "label_relation", "Number of labels used in semantic relations (only in ruwikt)."); System.out.print(empty_line); printRowWithTableSize(wikt_parsed_conn, "quote", "Number of quotations and examples, one meaning can have several quotes."); printRowWithTableSize(wikt_parsed_conn, "quot_translation", "Number of translations of quotes (quote in foreign languages can have translation)."); printRowWithTableSize(wikt_parsed_conn, "quot_transcription", "Number of transcriptions of quotes."); printRowWithTableSize(wikt_parsed_conn, "quot_ref", "Number of unique quote references (author, title, year,...)."); printRowWithTableSize(wikt_parsed_conn, "quot_author", "Number of authors of quotes."); printRowWithTableSize(wikt_parsed_conn, "quot_year", "Number of unique years (and range of years) of quotes."); printRowWithTableSize(wikt_parsed_conn, "quot_publisher", "Number of publishers of quotes."); printRowWithTableSize(wikt_parsed_conn, "quot_source", "Number of sources of quotes."); System.out.print(empty_line); printRowWithTableSize(wikt_parsed_conn, "translation", "Number of translation section boxes (at best: one translation box corresponds to one meaning)."); printRowWithTableSize(wikt_parsed_conn, "translation_entry", "Number of different translations (pairs of translations)."); // lang_pos with meaning || Number of words (pairs: language & part of speech) with non-empty meanings, definitions. It includes word forms. // todo may be System.out.println("\n|}"); CommonPrinter.printFooter(); wikt_parsed_conn.Close(); } }