/* LabelTableAll.java - context labels statistics in the database of the parsed Wiktionary.
*
* Copyright (c) 2013-2014 Andrew Krizhanovsky <andrew.krizhanovsky at gmail.com>
* Distributed under EPL/LGPL/GPL/AL/BSD multi-license.
*/
package wikt.stat;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import wikokit.base.wikipedia.language.LanguageType;
import wikokit.base.wikipedia.sql.Connect;
import wikokit.base.wikipedia.sql.Statistics;
import wikokit.base.wikt.api.WTMeaning;
import wikokit.base.wikt.constant.Label;
import wikokit.base.wikt.constant.LabelCategory;
import wikokit.base.wikt.constant.LabelCategoryLocal;
import wikokit.base.wikt.constant.POS;
import wikokit.base.wikt.multi.en.name.LabelEn;
import wikokit.base.wikt.multi.ru.name.LabelCategoryRu;
import wikokit.base.wikt.multi.ru.name.LabelRu;
import wikokit.base.wikt.sql.TLang;
import wikokit.base.wikt.sql.TLangPOS;
import wikokit.base.wikt.sql.TMeaning;
import wikokit.base.wikt.sql.TPOS;
import wikokit.base.wikt.sql.TPage;
import wikokit.base.wikt.sql.label.TLabel;
import wikokit.base.wikt.sql.label.TLabelCategory;
import wikokit.base.wikt.sql.label.TLabelMeaning;
import wikt.stat.printer.CommonPrinter;
/** Context labels statistics in the database of the parsed Wiktionary. */
public class LabelTableAll {
private static final boolean DEBUG = false;
/** Number of labels per language. */
private static Map<LanguageType, Integer> m_lang_n = new HashMap<LanguageType, Integer>();
/** Number of meanings for each label: <label, example_words and counter). */
private static Map<Label, ObjectWithWords> m_label_n = new HashMap<Label, ObjectWithWords>();
private static int MAX_EXAMPLE_WORDS = 3;
/** Inner class which contains an object with a (small, example) list of words using this object.
* An object is a label with several words with this label
*/
private static class ObjectWithWords {
ObjectWithWords(Label _label) {
label = _label;
example_words = new ArrayList<String>();
counter = 0;
}
/** Object's name, e.g. labels in meanings, or labels in relations, or labels in translations. */
public Label label;
/** Example of several entries which use this label. */
public List<String> example_words;
/** Counter of using this label in Wiktionary entries. */
public int counter;
/** Adds new label to the map m;
* if there is space (< MAX_EXAMPLE_WORDS), then add example word for this object.
*/
private static void add(String page_title,
Label _label,
Map<Label, ObjectWithWords> m)
{
if(null == _label) { // 0 == _object_name.length()) {
System.out.println("Warning (LabelTableAll.ObjectWithWords.add()): page=" +page_title+ " with empty _object_name!");
return;
}
ObjectWithWords s_w = m.get(_label);
if(null == s_w) {
s_w = new ObjectWithWords(_label);
s_w.counter = 1;
s_w.example_words = new ArrayList<String>();
if(!s_w.example_words.contains(page_title))
s_w.example_words.add(page_title);
m.put(_label, s_w);
} else {
s_w.counter += 1;
if(s_w.example_words.size() < MAX_EXAMPLE_WORDS) {
if(!s_w.example_words.contains(page_title))
s_w.example_words.add(page_title);
}
}
}
} // eo class ObjectWithWords
/** Counts number of labels, category_labels (m_category_n),...
* by selecting all records from the table 'quote' from the database of the parsed Wiktionary.<br><br>
* SELECT * FROM quote;
*
* @param connect connection to the database of the parsed Wiktionary
* @return map from the language into a number of translation boxes
* which contain synonyms, antonyms, etc. in English (etc.)
*/
public static Map<LanguageType, Integer> countLabels (Connect wikt_parsed_conn) {
// label_meaning -> meaning -> lang_pos -> lang -> count
Statement s = null;
ResultSet rs= null;
long t_start;
int n_unknown_lang_pos = 0; // translations into unknown languages
int n_total = Statistics.Count(wikt_parsed_conn, "label");
t_start = System.currentTimeMillis();
Map<Integer, Label> id2label = TLabel.getAllID2Labels();
// System.out.println("id2label size=" + id2label.size());
/*for (Map.Entry<Integer, Label> entry : id2label.entrySet()) {
Integer _label_id = entry.getKey();
Label _label = entry.getValue();
}*/
// SELECT label_id, meaning_id FROM label_meaning
// select * from label where category_id IS NULL and counter>0
// SELECT label_id, meaning_id FROM label_meaning, label WHERE id=label_id AND category_id IS NULL and counter>0 LIMIT 3;
try {
s = wikt_parsed_conn.conn.createStatement();
StringBuilder str_sql = new StringBuilder();
if(DEBUG)
str_sql.append("SELECT label_id, meaning_id FROM label_meaning LIMIT 5000"); // 10000 37000
//str_sql.append("SELECT label_id, meaning_id FROM label_meaning WHERE label_id=465");
else
str_sql.append("SELECT label_id, meaning_id FROM label_meaning");
s.executeQuery (str_sql.toString());
rs = s.getResultSet ();
int n_cur = 0;
while (rs.next ())
{
n_cur ++;
int label_id = rs.getInt("label_id");
Label label = id2label.get( label_id );
TMeaning m = TMeaning.getByID(wikt_parsed_conn, rs.getInt("meaning_id"));
TLangPOS lang_pos = m.getLangPOS(wikt_parsed_conn);
TLang tlang = lang_pos.getLang();
LanguageType lang = tlang.getLanguage();
if(m_lang_n.containsKey(lang) ) {
int n = m_lang_n.get(lang);
m_lang_n.put(lang, n + 1);
} else
m_lang_n.put(lang, 1);
if(null == m) {
System.out.println("Warning (LabelTableAll.countLabels()): there is label with label_id=" +label_id+ " with NULL meaning_id!");
continue;
}
if(null != lang_pos) {
TPage tpage = lang_pos.getPage();
String page_title = tpage.getPageTitle();
if(null != label) {
ObjectWithWords.add(page_title, label, m_label_n);
}
if(DEBUG && 0 == n_cur % 1000) { // % 100
//if(n_cur > 333)
// break;
long t_cur, t_remain;
t_cur = System.currentTimeMillis() - t_start;
t_remain = (long)((n_total - n_cur) * t_cur/(60f*1000f*(float)(n_cur)));
// where time for 1 page = t_cur / n_cur
// in min, since /(60*1000)
t_cur = (long)(t_cur/(60f*1000f));
//t_cur = t_cur/(60f*1000f));
if(null != tpage) {
System.out.println(n_cur + ": " + tpage.getPageTitle() +
", duration: " + t_cur + // t_cur/(60f*1000f) +
" min, remain: " + t_remain +
" min");
}
}
} else
n_unknown_lang_pos ++;
}
} catch(SQLException ex) {
System.out.println("SQLException (LabelTableAll.countLabels()): " + ex.getMessage());
} finally {
if (rs != null) { try { rs.close(); } catch (SQLException sqlEx) { } rs = null; }
if (s != null) { try { s.close(); } catch (SQLException sqlEx) { } s = null; }
}
//long t_end;
//float t_work;
//t_end = System.currentTimeMillis();
//t_work = (t_end - t_start)/1000f; // in sec
int n_labels_by_hand = TLabel.countLabelsAddedByHand (wikt_parsed_conn);
int n_labels_found_by_parser = TLabel.countLabelsFoundByParser(wikt_parsed_conn);
System.out.println(//"\nTime sec:" + t_work +
"\nTotal unique labels: " + n_total +
"\n\nUnique labels added by hand: " + n_labels_by_hand +
"\n\nUnique labels found by parser: " + n_labels_found_by_parser +
// "\n\nTotal meanings with labels: " + n_total_with_authors +
// "\n\nTotal meanings : " + n_total_with_authors +
"\n\nThere are labels for words in " + m_lang_n.size() + " languages." +
"\n\nUnknown<ref>'''Unknown''' - number of words with labels (but language code and POS are unknown)</ref>: "
+ n_unknown_lang_pos);
return m_lang_n;
}
/** Prints statistics about context labels added by hand.
*/
private static void printLabelsAddedByHand (
Map<Label, ObjectWithWords> m_source_n)
{
System.out.println("\n=== Labels added by hand ===");
//System.out.println("\n'''Number of entries''' is a number of (Language & POS level) entries per language. E.g. the Wiktionary article \"[[:en:rook|rook]]\" contains three English and two Dutch entries of Part Of Speech level.");
//System.out.println("\n'''Total''' is a total number of relations, i.e. synonyms + antonyms + etc...\n");
/** Number of quotes for each source: <source name, example_words and counter). */
System.out.println("{| class=\"sortable prettytable\" style=\"text-align: center;\"");
System.out.println("! English !! Template !! Short name !! Name !! Category !! Counter !! words");
// print values
for(Label _label : m_source_n.keySet()) {
ObjectWithWords s_w = m_source_n.get(_label);
if(LabelEn.isLabelFoundByParser( _label.getLinkedLabelEn()))
continue;
// _label added by hand, so != null
LabelEn linked_label_en = _label.getLinkedLabelEn();
LabelCategory label_category = linked_label_en.getCategory();
/*
LabelEn linked_label_en = _label.getLinkedLabelEn();
if(null == linked_label_en)
continue;
LabelCategory label_category = linked_label_en.getCategory();
if(LabelEn.isLabelFoundByParser( label_category))
continue;*/
System.out.println("|-");
System.out.print(
"|" + _label.getShortNameEnglish() +
"||{{" + _label.getShortName() +
"}}||" + _label.getShortName() +
"||" + _label.getName() +
"||" + label_category.getName() +
"||" + s_w.counter + "||" );
StringBuilder sb = new StringBuilder();
List<String> words = s_w.example_words;
for(String w : words)
sb.append("[[").append(w).append("]], ");
if(sb.length() > 3)
sb.delete(sb.length()-2, sb.length());
System.out.println( sb.toString() );
}
System.out.println("|}");
}
/** Prints statistics about context labels found by parser.
*/
private static void printLabelsFoundByParser (
Map<Label, ObjectWithWords> m_source_n)
{
System.out.println("\n=== Labels found by parser ===");
System.out.println("{| class=\"sortable prettytable\" style=\"text-align: center;\"");
System.out.println("! Short name !! Length !! Counter !! words");
// print values
for(Label _label : m_source_n.keySet()) {
ObjectWithWords s_w = m_source_n.get(_label);
if(!LabelEn.isLabelFoundByParser( _label.getLinkedLabelEn()))
continue;
/*LabelEn linked_label_en = _label.getLinkedLabelEn();
if(null != linked_label_en && !isLabelFoundByParser( linked_label_en.getCategory() )) // label was added by hand
continue;
*/
// replace since there are problems in wiki tables
String short_name = _label.getShortName().replace("+", "<nowiki>+</nowiki>");
System.out.println("|-");
System.out.print(
"|" + short_name +
"||" + _label.getShortName().length() +
"||" + s_w.counter + "||" );
StringBuilder sb = new StringBuilder();
List<String> words = s_w.example_words;
for(String w : words)
sb.append("[[").append(w).append("]], ");
if(sb.length() > 3)
sb.delete(sb.length()-2, sb.length());
System.out.println( sb.toString() );
}
System.out.println("|}");
}
/** Prints statistics about only
* (1) regional context labels added by hand (LabelCategory = regional) and
* (2) regional context labels found by parser (LabelCategory = "regional automatic").
*/
private static void printRegionalLabels (
Map<Label, ObjectWithWords> m_source_n)
{
System.out.println("\n=== Regional labels ===");
System.out.println("\nRegional labels added by hand, category = \"regional\".");
System.out.println("\nRegional labels found by parser, category = \"regional automatic\".");
System.out.println("{| class=\"sortable prettytable\" style=\"text-align: center;\"");
System.out.println("! Short name !! Category !! Counter !! words");
// print values
int counter = 0;
int total = 0;
for(Label _label : m_source_n.keySet()) {
ObjectWithWords s_w = m_source_n.get(_label);
LabelCategory lc = _label.getCategory();
/*LabelEn linked_label_en = _label.getLinkedLabelEn();
if(null == linked_label_en)
continue;
LabelCategory label_category = linked_label_en.getCategory();
*/
// print only regional labels
if(lc != LabelCategory.regional &&
lc != LabelCategory.regional_automatic)
continue;
// at this line: label_category != null;
counter ++;
total = total + s_w.counter;
// replace since there are problems in wiki tables
String short_name = _label.getShortName().replace("+", "<nowiki>+</nowiki>");
System.out.println("|-");
System.out.print(
"|" + short_name +
"||" + lc +
"||" + s_w.counter + "||" );
StringBuilder sb = new StringBuilder();
List<String> words = s_w.example_words;
for(String w : words)
sb.append("[[").append(w).append("]], ");
if(sb.length() > 3)
sb.delete(sb.length()-2, sb.length());
System.out.println( sb.toString() );
}
System.out.println("|}");
counter --; // Unique regional labels without [empty 'regional' label, i.e. whithout regions]
System.out.println("\nUnique regional labels used in definitions: " + counter );
System.out.println("\nTotal regional labels used in definitions: " + total );
}
/** Calculates number of categories of labels (only added by hand), read data from m_source_n,
* prints result to table "Label categories"
*/
private static void calcAndPrintAddedByHandLabelCategories (
Map<Label, ObjectWithWords> m_source_n,
LanguageType wikt_lang)
{
/** Total number of label categories: <label category, total number). */
Map<LabelCategory, Integer> m_category_n = new HashMap<LabelCategory, Integer>();
// 1. sum labels for each category
for(Label _label : m_source_n.keySet()) {
ObjectWithWords s_w = m_source_n.get(_label);
LabelCategory lc = _label.getCategory();
if(null == lc)
continue;
/*if(null == lc) {
// all except: |regional automatic||regional automatic||regional||182
LabelEn linked_label_en = _label.getLinkedLabelEn();
if(null == linked_label_en)
continue;
lc = linked_label_en.getCategory();
if(null == lc)
continue;
}*/
/* case 3: empty list
LabelCategory lc = LabelEn.getCategoryByLabel(_label);
if(null == lc)
continue;*/
if(m_category_n.containsKey(lc) ) {
int n = m_category_n.get(lc);
m_category_n.put(lc, n + s_w.counter);
} else
m_category_n.put(lc, s_w.counter);
}
// 2. print table
System.out.println("\n=== Label categories ===");
System.out.println("\nNumber of labels for each label's category.");
System.out.println("\nThese labels were added by hand only, since labels added automatically don't have categories (except \"regional automatic\" labels in Russian Wiktionary).\n");
// + translation of label category into Russian
String add_header = "";
if(wikt_lang == LanguageType.ru)
add_header = "! in Russian !";
System.out.println("{| class=\"sortable prettytable\" style=\"text-align: center;\"");
System.out.println(add_header+"! Category !! Parent category !! Number");
int total = 0;
for(LabelCategory _cat : m_category_n.keySet()) {
int n = m_category_n.get( _cat );
String add_translation = "";
if(wikt_lang == LanguageType.ru)
add_translation = "|"+LabelCategoryRu.getName(_cat)+"|";
System.out.println("|-");
System.out.println(
add_translation +
"|" + _cat.getName() +
"||" + _cat.getParent().getName() +
"||" + n );
total += n;
}
System.out.println("|}");
System.out.println("\nTotal labels added by hand (with categories): " + total );
}
/** Maximum number of meanings in one article (language - POS level) */
static final int MAX_MEANINGS = 100;
// ar_labels_meanings_words - all words, including {noun, verb, adverb, adjective} + pronoun, conjunction, etc.
private static String[][] ar_labels_meanings_words = new String[MAX_MEANINGS][MAX_MEANINGS];
private static String[][] ar_labels_meanings_words_noun = new String[MAX_MEANINGS][MAX_MEANINGS];
private static String[][] ar_labels_meanings_words_verb = new String[MAX_MEANINGS][MAX_MEANINGS];
private static String[][] ar_labels_meanings_words_adverb = new String[MAX_MEANINGS][MAX_MEANINGS];
private static String[][] ar_labels_meanings_words_adjective = new String[MAX_MEANINGS][MAX_MEANINGS];
private static int[][] ar_labels_meanings = new int[MAX_MEANINGS][MAX_MEANINGS];
private static int[][] ar_labels_meanings_noun = new int[MAX_MEANINGS][MAX_MEANINGS];
private static int[][] ar_labels_meanings_verb = new int[MAX_MEANINGS][MAX_MEANINGS];
private static int[][] ar_labels_meanings_adverb = new int[MAX_MEANINGS][MAX_MEANINGS];
private static int[][] ar_labels_meanings_adjective = new int[MAX_MEANINGS][MAX_MEANINGS];
/** Counts number of meanings with labels, writes result to two-dimensional array,
* fills by example words array ar_labels_meanings_words.
* (1) Counts all words, writes to ar_labels_meanings and ar_labels_meanings_words.
* (2) Counts ar_labels_meanings_noun, _verb, _adverb, _adjective only for one language: only_lang
* <br><br>
*
* @param connect connection to the database of the parsed Wiktionary
* // skip @param only_pos only this POS words will be counted,
* // if only_pos is NULL then all words will be counted
* @param only_lang only this language words will be counted,
* if only_lang is NULL then words of all languages will be counted
* @return integer two-dimensional array, where [X][Y] = Z means that
* X - number of meanings with labels;
* Y - total number of meanings;
* Z - number of words with Y meanings, where X meanings have one or more labels (X <= Y)
*/
public static void countNumberOfMeaningsWithLabels ( Connect wikt_parsed_conn,
LanguageType only_lang) {
// lang_pos -> meaning -> label_meaning
Statement s = null;
ResultSet rs= null;
long t_start;
int n_total = Statistics.Count(wikt_parsed_conn, "lang_pos");
t_start = System.currentTimeMillis();
try {
s = wikt_parsed_conn.conn.createStatement ();
s.executeQuery ("SELECT id FROM lang_pos");
rs = s.getResultSet ();
int n_cur = 0;
while (rs.next ())
{
n_cur ++;
int id = rs.getInt("id");
TLangPOS lang_pos_not_recursive = TLangPOS.getByID (wikt_parsed_conn, id);// fields are not filled recursively
if(null == lang_pos_not_recursive)
continue;
LanguageType lang = lang_pos_not_recursive.getLang().getLanguage();
TPage tpage = lang_pos_not_recursive.getPage();
String page_title = tpage.getPageTitle();
int n_meaning = WTMeaning.countMeanings(wikt_parsed_conn, lang_pos_not_recursive);
if(0 == n_meaning)
continue; // only meanings with nonempty definitions
POS p = lang_pos_not_recursive.getPOS().getPOS();
//if(null != only_pos && only_pos != p) // only our POS should be counted :)
// continue;
if(DEBUG)
System.out.print("\n" + page_title + ", meanings:" + n_meaning);
//System.out.print(", pos:" + p.toString());
int meanings_with_labels = 0;
TMeaning[] mm = TMeaning.get(wikt_parsed_conn, lang_pos_not_recursive);
for(TMeaning m : mm) {
String meaning_text = m.getWikiTextString();
if(0 == meaning_text.length())
continue;
if(DEBUG)
System.out.print("\n def: " + meaning_text);
Label[] labels = TLabelMeaning.get(wikt_parsed_conn, m);
if(null != labels && labels.length > 0)
meanings_with_labels ++;
}
ar_labels_meanings [meanings_with_labels] [n_meaning] ++;
ar_labels_meanings_words [meanings_with_labels] [n_meaning] = page_title;
if(only_lang == lang) { // calculates labels for 4 POS only for one language
switch(p.toString()) {
case "noun":
ar_labels_meanings_noun [meanings_with_labels] [n_meaning] ++;
ar_labels_meanings_words_noun [meanings_with_labels] [n_meaning] = page_title;
break;
case "verb":
ar_labels_meanings_verb [meanings_with_labels] [n_meaning] ++;
ar_labels_meanings_words_verb [meanings_with_labels] [n_meaning] = page_title;
break;
case "adverb":
ar_labels_meanings_adverb [meanings_with_labels] [n_meaning] ++;
ar_labels_meanings_words_adverb [meanings_with_labels] [n_meaning] = page_title;
break;
case "adjective":
ar_labels_meanings_adjective [meanings_with_labels] [n_meaning] ++;
ar_labels_meanings_words_adjective [meanings_with_labels] [n_meaning] = page_title;
break;
}
}
if(0 == n_cur % 1000) { // % 100
if(DEBUG && n_cur > 2999)
break;
long t_cur, t_remain;
t_cur = System.currentTimeMillis() - t_start;
t_remain = (long)((n_total - n_cur) * t_cur/(60f*1000f*(float)(n_cur)));
t_cur = (long)(t_cur/(60f*1000f));
System.out.println(n_cur + ": " +
", duration: " + t_cur + // t_cur/(60f*1000f) +
" min, remain: " + t_remain +
" min");
}
} // eo while
} catch(SQLException ex) {
System.err.println("SQLException (LabelTableAll.countLabelsForEachLangPOS()): " + ex.getMessage());
} finally {
if (rs != null) { try { rs.close(); } catch (SQLException sqlEx) { } rs = null; }
if (s != null) { try { s.close(); } catch (SQLException sqlEx) { } s = null; }
}
// return ar_labels_meanings;
}
private static void printMeaningsLabelsTableNumbersAndTableWords (
int[][] ar,
String[][] ar_words) {
// 1. Calculate maximum number of meanings,
// i.e. calculate maximum array index N with non-zero value in ar [N][N]
int max_non_zero_meaning = 0;
int max_non_zero_labels = 0;
for(int i=0; i<MAX_MEANINGS; i++) {
for(int j=0; j<MAX_MEANINGS; j++) {
if(ar[i][j] > 0 && j > max_non_zero_meaning)
max_non_zero_meaning = j;
if(ar[i][j] > 0 && i > max_non_zero_labels)
max_non_zero_labels = i;
}
}
int MAX = max_non_zero_meaning;
System.out.println("{| class=\"sortable prettytable\" style=\"text-align: center;\"");
System.out.print("! Y \\ X "); // top-left cell
// print horizontal header - number of meanings of words
for(int i=1; i<MAX+1; i++)
System.out.print("!!" + i);
System.out.println("");
int total = 0;
for(int i=0; i<MAX+1; i++) { // number of meanings with labels
// print vertical header - number of meanings with labels
System.out.println("|-");
System.out.print( "|" + i);
for(int j=1; j<MAX+1; j++) { // number of meanings
System.out.print(
"||" + ar[i][j] );
if(i > 0)
total += ar[i][j]; // counts only entries with labels
}
System.out.println("");
}
System.out.println("|}");
System.out.println("\nTotal number of entries (POS-level) with labels: " + total );
System.out.println("\nMaximum number of meanings (with labels): "+MAX);
System.out.println("\nMaximum number of meanings marked by labels: "+max_non_zero_labels);
// part 2.
System.out.println("\n\nThe same table with example words: ");
System.out.println("{| class=\"sortable prettytable\" style=\"text-align: center;\"");
System.out.print("! Y \\ X "); // top-left cell
// print horizontal header - number of meanings of words
for(int i=1; i<MAX+1; i++)
System.out.print("!!" + i);
System.out.println("");
for(int i=0; i<MAX+1; i++) {
// print vertical header - number of meanings with labels
System.out.println("|-");
System.out.print( "|" + i);
for(int j=1; j<MAX+1; j++) {
String s = ar_words[i][j];
s = null == s ? "" : "[[" + s + "]]" ;
System.out.print("||" + s);
}
System.out.println("");
}
System.out.println("|}");
}
/** Prints number of meanings with labels in form of table.<br><br>
*/
private static void printNumberOfMeaningsWithLabels (
int[][] ar_labels_meanings, String[][] ar_labels_meanings_words,
int[][] ar_labels_meanings_noun, String[][] ar_labels_meanings_words_noun,
int[][] ar_labels_meanings_verb, String[][] ar_labels_meanings_words_verb,
int[][] ar_labels_meanings_adverb, String[][] ar_labels_meanings_words_adverb,
int[][] ar_labels_meanings_adjective, String[][] ar_labels_meanings_words_adjective,
LanguageType native_lang)
{
System.out.println("\n=== Number of meanings with labels ===");
System.out.println("\nTable contains two-dimensional integer array, where [X][Y] = Z means that \n"
+ ":: X (horizontal) - total number of meanings; \n" +
":: Y (vertical) - number of meanings with labels; \n" +
":: Z (value in cell) - number of words with Y meanings, where X meanings have one or more labels (X <= Y)");
System.out.println("E.g. \"[[:ru:abdomen#Английский]]\" has 3 meanings, where 2 meanings are marked by labels, then [3][2] ++ (increments value in this cell of the table).");
System.out.println("\n==== All ====");
System.out.println("\n\nThese two tables (numbers and words) contains information about all languages and POS.");
printMeaningsLabelsTableNumbersAndTableWords(ar_labels_meanings, ar_labels_meanings_words);
System.out.println("\n==== "+native_lang.getName()+", noun ====");
System.out.println("\n\nThis table takes into account only "+native_lang.getName()+" words, POS = noun.");
printMeaningsLabelsTableNumbersAndTableWords(ar_labels_meanings_noun, ar_labels_meanings_words_noun);
System.out.println("\n==== "+native_lang.getName()+", verb ====");
System.out.println("\n\nThis table takes into account only "+native_lang.getName()+" words, POS = verb.");
printMeaningsLabelsTableNumbersAndTableWords(ar_labels_meanings_verb, ar_labels_meanings_words_verb);
System.out.println("\n==== "+native_lang.getName()+", adverb ====");
System.out.println("\n\nThis table takes into account only "+native_lang.getName()+" words, POS = adverb.");
printMeaningsLabelsTableNumbersAndTableWords(ar_labels_meanings_adverb, ar_labels_meanings_words_adverb);
System.out.println("\n==== "+native_lang.getName()+", adjective ====");
System.out.println("\n\nThis table takes into account only "+native_lang.getName()+" words, POS = adjective.");
printMeaningsLabelsTableNumbersAndTableWords(ar_labels_meanings_adjective, ar_labels_meanings_words_adjective);
}
public static void main(String[] args) {
// Connect to wikt_parsed database
Connect wikt_parsed_conn = new Connect();
// Russian
LanguageType native_lang = LanguageType.ru;
wikt_parsed_conn.Open(Connect.RUWIKT_HOST, Connect.RUWIKT_PARSED_DB, Connect.RUWIKT_USER, Connect.RUWIKT_PASS, LanguageType.ru);
// English
//LanguageType native_lang = LanguageType.en;
//wikt_parsed_conn.Open(Connect.ENWIKT_HOST, Connect.ENWIKT_PARSED_DB, Connect.ENWIKT_USER, Connect.ENWIKT_PASS, LanguageType.en);
TLang.createFastMaps(wikt_parsed_conn);
TPOS.createFastMaps(wikt_parsed_conn);
// ? TRelationType.createFastMaps(wikt_parsed_conn);
LabelCategoryLocal temp0 = LabelCategoryRu.computing; // let's initialize maps in LabelCategoryRu class
TLabelCategory.createFastMaps(wikt_parsed_conn);
Label temp1 = LabelEn.Acadia; // let's initialize maps in LabelEn class
Label temp2 = LabelRu.Yoruba; // ... in LabelRu class
TLabel.createFastMaps(wikt_parsed_conn, native_lang);
String db_name = wikt_parsed_conn.getDBName();
System.out.println("\n== Statistics of context labels in the Wiktionary parsed database ==");
System.out.println("\n''Last updated: summer 2014.''");
CommonPrinter.printHeader (db_name);
// part 1
int n_label_meaning = Statistics.Count(wikt_parsed_conn, "label_meaning");
System.out.println("\nTotal labels used in definitions (meanings): " + n_label_meaning );
int n_meaning = Statistics.countDistinct(wikt_parsed_conn, "label_meaning", "meaning_id");
System.out.println("\nTotal definitions with labels: " + n_meaning );
Map<LanguageType, Integer> m = LabelTableAll.countLabels(wikt_parsed_conn);
// part 2
LabelTableAll.countNumberOfMeaningsWithLabels(wikt_parsed_conn, native_lang);
wikt_parsed_conn.Close();
System.out.println();
// part 1 (print)
CommonPrinter.printSomethingPerLanguage(native_lang, m);
/** Number of using labels in meanings (definitions) */
LabelTableAll.printLabelsAddedByHand(m_label_n);
LabelTableAll.printLabelsFoundByParser(m_label_n);
LabelTableAll.printRegionalLabels(m_label_n);
LabelTableAll.calcAndPrintAddedByHandLabelCategories(m_label_n, native_lang);
// part 2 (print)
LabelTableAll.printNumberOfMeaningsWithLabels(
ar_labels_meanings, ar_labels_meanings_words,
ar_labels_meanings_noun, ar_labels_meanings_words_noun,
ar_labels_meanings_verb, ar_labels_meanings_words_verb,
ar_labels_meanings_adverb, ar_labels_meanings_words_adverb,
ar_labels_meanings_adjective, ar_labels_meanings_words_adjective,
native_lang);
CommonPrinter.printFooter();
}
}