/* WTranslationEntryRu.java - corresponds to a line in Translations of a word * in Russian Wiktionary. * * Copyright (c) 2009 Andrew Krizhanovsky <andrew.krizhanovsky at gmail.com> * Distributed under GNU General Public License. */ package wikokit.base.wikt.multi.ru; import wikokit.base.wikt.word.WTranslationEntry; import wikokit.base.wikipedia.language.LanguageType; import wikokit.base.wikt.util.WikiText; /** One line in the Translation section, i.e. a translation to one language, * e.g. "|en=[[airplane]], [[plane]], [[aircraft]]". */ public class WTranslationEntryRu { /** Parses one entry (one line) of a translation box, * extracts a language and a list of translations (wikified words) for this language, * creates and fills WTranslationEntry. * * @param wikt_lang language of Wiktionary * @param page_title word which are described in this article 'text' * @param text translaton box text * @return WTranslationEntry or null if the translation language or translation text are absent. */ public static WTranslationEntry parse( String page_title, String text) { // split "en=[[little]] [[bell]], [[handbell]], [[doorbell]]" into "en" and remain int pos_equal_sign = text.indexOf('='); if(-1 == pos_equal_sign) return null; // does exist any translation after "=" if(pos_equal_sign + 1 > text.length()) // Warnings and error messages are interesting return null; // only when there are any translations // 1. language code String lang_code = text.substring(0, pos_equal_sign).trim(); if(!LanguageType.has(lang_code)) { // concise logging: only one message for one uknown language code if(!LanguageType.hasUnknownLangCode(lang_code)) { LanguageType.addUnknownLangCode(lang_code); System.out.println("Warning in WTranslationEntryRu.parse(): The article '"+ page_title + "' has translation into unknown language with code: " + lang_code + "."); } if(lang_code.length() > 10) System.out.println("Error in WTranslationEntryRu.parse(): The article '"+ page_title + "' has too long unknown language code: " + lang_code + "."); return null; } LanguageType lang = LanguageType.get(lang_code); // 2. translation wikified text String trans_text = text.substring(pos_equal_sign+1); if(0 == trans_text.length() || trans_text.equalsIgnoreCase("[[]]")) return null; WikiText[] wt = WikiText.createSplitByComma(page_title, trans_text); if(0 == wt.length) return null; return new WTranslationEntry(lang, wt); } }