package wikokit.base.wikt.multi.ru.name;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import static org.junit.Assert.*;
import wikokit.base.wikt.constant.Label;
import wikokit.base.wikt.constant.LabelCategory;
import wikokit.base.wikt.multi.en.name.LabelEn;
import wikokit.base.wikt.util.LabelsText;
public class LabelRuTest {
private final static Label[] NULL_LABEL_ARRAY = new Label[0];
public LabelRuTest() {
}
@BeforeClass
public static void setUpClass() {
}
@AfterClass
public static void tearDownClass() {
}
@Before
public void setUp() {
}
@After
public void tearDown() {
}
// ///////////////////////////////////////////////////////////
// extractLabelsTrimText
@Test
public void testExtractLabelsTrimText_without_template_labels() {
System.out.println("extractLabelsTrimText_without_template_labels");
String line = "text without any labels and templates";
Label[] _labels = NULL_LABEL_ARRAY;
LabelsText expResult = new LabelsText(_labels, line);
LabelsText result = LabelRu.extractLabelsTrimText(line);
assertTrue( LabelsText.equals( expResult, result) );
}
@Test
public void testExtractLabelsTrimText_with_template_but_not_a_valid_label() {
System.out.println("extractLabelsTrimText_with_template_but_not_a_valid_label");
String line = "text {{with unknown template, but it is not a valid labеl}} sure";
Label[] _labels = NULL_LABEL_ARRAY;
LabelsText expResult = new LabelsText(_labels, line);
LabelsText result = LabelRu.extractLabelsTrimText(line);
assertTrue( LabelsText.equals( expResult, result) );
}
@Test
public void testExtractLabelsTrimText_with_one_context_label() {
System.out.println("extractLabelsTrimText_with_one_context_label");
String line = "{{амер.}} [[самолёт]], [[аэроплан]]"; // http://ru.wiktionary.org/wiki/airplane
String result_line = "[[самолёт]], [[аэроплан]]";
boolean label_en_ru = Label.equals( LabelEn.US, LabelRu.US);
assertTrue(label_en_ru);
Label[] _labels = { LabelEn.US };
LabelsText expResult = new LabelsText(_labels, result_line);
LabelsText result = LabelRu.extractLabelsTrimText(line);
assertTrue( LabelsText.equals( expResult, result) );
}
// {{амер.|en}} one label and one unusable parameter
@Test
public void testExtractLabelsTrimText_with_one_context_label_and_one_unusable_parameter() {
System.out.println("extractLabelsTrimText_with_one_context_label_and_one_unusable_parameter");
String line = "{{амер.|en}} [[самолёт]], [[аэроплан]] {{this template should remain in text}}"; // http://ru.wiktionary.org/wiki/airplane
String result_line = "[[самолёт]], [[аэроплан]] {{this template should remain in text}}";
Label[] _labels = { LabelEn.US };
LabelsText expResult = new LabelsText(_labels, result_line);
LabelsText result = LabelRu.extractLabelsTrimText(line);
assertTrue( LabelsText.equals( expResult, result) );
}
// {{устар.}}, {{рег.}} род лёгкой сохи, плужка {{Даль|толкование}}
@Test
public void testExtractLabelsTrimText_with_two_context_labels_and_one_template_at_the_end_of_text() {
System.out.println("extractLabelsTrimText_with_two_context_labels_and_one_template_at_the_end_of_text");
String line = "{{устар.}}, {{рег.}} род лёгкой сохи, плужка {{Даль|толкование}}"; // http://ru.wiktionary.org/wiki/самолёт
String result_line = "род лёгкой сохи, плужка {{Даль|толкование}}";
Label[] _labels = { LabelEn.obsolete, LabelEn.regional };
LabelsText expResult = new LabelsText(_labels, result_line);
LabelsText result = LabelRu.extractLabelsTrimText(line);
assertTrue( LabelsText.equals( expResult, result) );
}
// complex case: list of labels with short words between
// # {{устар.}} ''или'' {{поэт.}}; {{старин.}} {{=|город}}
@Test
public void testExtractLabelsTrimText_labels_and_short_words_between() {
System.out.println("extractLabelsTrimText_labels_and_short_words_between");
String line = "{{устар.}} ''или'' {{поэт.}}; {{старин.}} {{=|город}}"; // http://ru.wiktionary.org/wiki/град
String result_line = "то же, что [[город]]";
Label[] _labels = { LabelEn.obsolete, LabelEn.poetic, LabelEn.archaic, LabelEn.ru_equal };
LabelsText expResult = new LabelsText(_labels, result_line);
LabelsText result = LabelRu.extractLabelsTrimText(line);
assertTrue( LabelsText.equals( expResult, result) );
}
// ///////////////////////////////////////////////////////////
// LabelParamsRu
// only language code in regional labels, e.g.:
// {{рег.|lang=hr}} [[утюг]] // https://ru.wiktionary.org/wiki/pegla
@Test
public void testExtractLabelsTrimText_with_regional_and_only_lang_code() {
System.out.println("extractLabelsTrimText_with_regional_and_only_lang_code");
String line = "{{рег.|lang=hr}} [[утюг]]";
LabelsText result = LabelRu.extractLabelsTrimText(line);
Label[] result_labels = result.getLabels();
assertEquals( result_labels.length, 1); // one label regional
Label la = result_labels[0];
assertEquals(la.getShortName(), "regional");
assertEquals(la.getName(), "regional");
LabelCategory result_label_category = LabelEn.getCategoryByLabel(la);
assertNotNull(result_label_category);
assertEquals(result_label_category.getName(), "regional");
assertEquals(result_label_category, LabelCategory.regional);
}
// in ruwikt names of regions given as parameter to the template {{regional|regions free text}}
// Regional {{рег.|regions}} or {{обл.|regions}} // рег. == LabelEn.regional
// харьк., луг., донецкое, белгородск.
// {{рег.|сиб., сев.-вост.}} [[ловушка]]
@Test
public void testExtractLabelsTrimText_with_regional_with_parameter() {
System.out.println("extractLabelsTrimText_with_regional_with_parameter");
String line = "{{рег.|сиб., сев.-вост.}} [[ловушка]]"; // http://ru.wiktionary.org/wiki/кулёма
String result_line = "[[ловушка]]";
Label[] _labels = { LabelEn.regional };
// LabelText expResult = new LabelText(_labels, result_line);
LabelsText result = LabelRu.extractLabelsTrimText(line);
Label[] result_labels = result.getLabels();
assertEquals( result_labels.length, 1); // one label "сиб., сев.-вост."
Label la = result_labels[0];
//assertFalse(la.getAddedByHand()); // this label text was gathered automatically
assertEquals(la.getShortName(), "сиб., сев.-вост.");
assertEquals(la.getName(), "");
LabelCategory result_label_category = LabelEn.getCategoryByLabel(result_labels[0]);
assertNotNull(result_label_category);
assertEquals(result_label_category.getName(), "regional");
assertEquals(result_label_category, LabelCategory.regional);
}
// capacheca
// # {{рег.|Перу|lang=es}}, {{рег.|Чили|lang=es}} торговый [[лоток]]
//
@Test
public void testExtractLabelsTrimText_with_regional_with_lang_code() {
System.out.println("extractLabelsTrimText_with_regional_with_lang_code");
String line = "{{рег.|Перу|lang=es}}, {{рег.|Чили|lang=es}} торговый [[лоток]]";
LabelsText result = LabelRu.extractLabelsTrimText(line);
Label[] result_labels = result.getLabels();
assertEquals( result_labels.length, 2); // two labels: "Перу", "Чили"
Label la1 = result_labels[0];
assertEquals(la1.getShortName(), "Перу");
assertEquals(la1.getName(), "");
LabelCategory label_category1 = LabelEn.getCategoryByLabel(la1);
assertNotNull(label_category1);
assertEquals(label_category1.getName(), "regional");
assertEquals(label_category1, LabelCategory.regional);
Label la2 = result_labels[1];
assertEquals(la2.getShortName(), "Чили");
assertEquals(la2.getName(), "");
LabelCategory label_category2 = LabelEn.getCategoryByLabel(la2);
assertNotNull(label_category2);
assertEquals(label_category2.getName(), "regional");
assertEquals(label_category2, LabelCategory.regional);
}
// LabelParamsRu
// ///////////////////////////////////////////////////////////
// ///////////////////////////////////////////////////////////
// getPometaLabel
// {{помета|разг.}} [[что]] // "разг." == LabelEn.colloquial
@Test
public void testExtractLabelsTrimText_with_pometa_and_known_label() {
System.out.println("extractLabelsTrimText_with_pometa_and_known_label");
String line = "{{помета|разг.}} [[что]]";
String result_line = "[[что]]";
Label[] exp_labels = { LabelEn.colloquial };
LabelsText expResult = new LabelsText(exp_labels, result_line);
LabelsText result = LabelRu.extractLabelsTrimText(line);
assertTrue( LabelsText.equals( expResult, result) );
}
// {{помета|nocolor=1|разг.}} [[что]] // "разг." == LabelEn.colloquial
@Test
public void testExtractLabelsTrimText_with_nocolor_and_pometa_and_known_label() {
System.out.println("extractLabelsTrimText_with_nocolor_and_pometa_and_known_label");
String line = "{{помета|nocolor=1|разг.}} [[что]]";
String result_line = "[[что]]";
Label[] exp_labels = { LabelEn.colloquial };
LabelsText expResult = new LabelsText(exp_labels, result_line);
LabelsText result = LabelRu.extractLabelsTrimText(line);
assertTrue( LabelsText.equals( expResult, result) );
}
// {{помета|unknown context label}} [[что]]
@Test
public void testExtractLabelsTrimText_with_pometa_and_unknown_label() {
System.out.println("extractLabelsTrimText_with_pometa_and_unknown_label");
String line = "{{помета|unknown context label}} [[что]]";
String result_line = "[[что]]";
// Label[] exp_labels = { LabelEn.colloquial };
// LabelText expResult = new LabelText(exp_labels, result_line);
LabelsText result = LabelRu.extractLabelsTrimText(line);
//assertTrue( LabelText.equals( expResult, result) );
assertEquals( result.getText(), result_line);
assertEquals( 1, result.getLabels().length);
Label result_label = result.getLabels()[0];
assertEquals( "unknown context label", result_label.getShortName());
assertEquals( result_label.getName().length(), 0);
// assertEquals( result_label.getAddedByHand(), false); // added automatically
// parsing the same unknown label again:
result = LabelRu.extractLabelsTrimText(line);
assertEquals( 1, result.getLabels().length); // this is the same new added label
}
// {{помета|nocolor=1|unknown2 another context label}} [[что]]
@Test
public void testExtractLabelsTrimText_with_pometa_nocolor_and_unknown_label() {
System.out.println("extractLabelsTrimText_with_pometa_nocolor_and_unknown_label");
String line = "{{помета|nocolor=1|unknown2 another context label}} [[что]]";
String result_line = "[[что]]";
// Label[] exp_labels = { LabelEn.colloquial };
// LabelText expResult = new LabelText(exp_labels, result_line);
LabelsText result = LabelRu.extractLabelsTrimText(line);
//assertTrue( LabelText.equals( expResult, result) );
assertEquals( result.getText(), result_line);
assertEquals( 1, result.getLabels().length);
Label result_label = result.getLabels()[0];
assertEquals( "unknown2 another context label", result_label.getShortName());
assertEquals( result_label.getName().length(), 0);
//assertEquals( result_label.getAddedByHand(), false); // added automatically
// parsing the same unknown label again:
result = LabelRu.extractLabelsTrimText(line);
assertEquals( 1, result.getLabels().length); // this is the same new added label
}
// eo getPometaLabel
// ///////////////////////////////////////////////////////////
// ///////////////////////////////////////////////////////////
// extractFirstContextLabel
// Extraction of label(s) from the end of definition:
// e.g. "# some definition and {{the label at the end of definition}}"
@Test
public void testExtractFirstContextLabel_from_the_end_of_definition() {
System.out.println("testExtractFirstContextLabel_from_the_end_of_definition");
String line = "some definition {{помета|the label}} end ";
String result_line = "some definition end";
LabelsText result = LabelRu.extractLabelsTrimText(line);
assertEquals( result.getText(), result_line);
assertEquals( 1, result.getLabels().length);
Label result_label = result.getLabels()[0];
assertEquals( "the label", result_label.getShortName());
assertEquals( result_label.getName().length(), 0);
}
// eo extractFirstContextLabel
// ///////////////////////////////////////////////////////////
// ///////////////////////////////////////////////////////////
// special templates tranforming definition text, e.g. сокр., аббр.
// 1) # {{амер.}}, {{разг.|en}}, {{аббр.|en|w:Franklin Delano Roosevelt|Франклин Делано Рузвельт, 32-й президент США}}
// 2) checks synonyms: аббр. == сокр.
@Test
public void testExtractLabelsTrimText_with_abbrev() {
System.out.println("extractLabelsTrimText_with_abbrev");
// two equal (from parser POV) lines:
String line = "# {{амер.}}, {{разг.|en}}, {{аббр.|en|w:Franklin Delano Roosevelt|Франклин Делано Рузвельт, 32-й президент США}}";
String line_syn = "# {{амер.}}, {{разг.|en}}, {{сокр.||w:Franklin Delano Roosevelt|Франклин Делано Рузвельт, 32-й президент США}}";
String result_line = "от [[w:Franklin Delano Roosevelt]]; Франклин Делано Рузвельт, 32-й президент США";
Label[] exp_labels = { LabelEn.US, LabelEn.colloquial, LabelEn.abbreviation };
LabelsText expResult = new LabelsText(exp_labels, result_line);
LabelsText expResult_syn = new LabelsText(exp_labels, result_line);
LabelsText result = LabelRu.extractLabelsTrimText(line);
LabelsText result_syn = LabelRu.extractLabelsTrimText(line_syn);
assertTrue( LabelsText.equals( expResult, result) );
assertTrue( LabelsText.equals( expResult, result_syn) );
}
// old formatting:
// # {{военн.}}, {{сокр.}} [[командир]] [[батальон]]а
// new formatting
// # {{военн.}}, {{сокр.||командир батальона}}
@Test
public void testExtractLabelsTrimText_one_parameter() {
System.out.println("extractLabelsTrimText_with_abbrev");
// two equal (from parser POV) lines:
String line_old = "# {{сокр.}} [[командир]] [[батальон]]а";
String line_new = "# {{сокр.||командир батальона}}";
String line_result_old = "[[командир]] [[батальон]]а";
String line_result_new = "от [[командир батальона]]";
Label[] exp_labels = { LabelEn.abbreviation };
LabelsText expResult_old = new LabelsText(exp_labels, line_result_old);
LabelsText expResult_new = new LabelsText(exp_labels, line_result_new);
LabelsText result_old = LabelRu.extractLabelsTrimText(line_old);
LabelsText result_new = LabelRu.extractLabelsTrimText(line_new);
assertTrue( LabelsText.equals( expResult_old, result_old) );
assertTrue( LabelsText.equals( expResult_new, result_new) );
}
// Special template (form-of), it is not a context label in really :)
// "# {{хим.}} {{=|спирт}}, бесцветная летучая жидкость, получаемая при ферментации сахара" ->
@Test
public void testExtractLabelsTrimText_with_equal_template() {
System.out.println("extractLabelsTrimText_with_equal_template");
String line = "# {{хим.}} {{=|спирт}}, бесцветная летучая жидкость, получаемая при ферментации сахара";
String result_line = "то же, что [[спирт]], бесцветная летучая жидкость, получаемая при ферментации сахара";
Label[] exp_labels = { LabelEn.chemistry, LabelRu.equal };
LabelsText expResult = new LabelsText(exp_labels, result_line);
LabelsText result = LabelRu.extractLabelsTrimText(line);
assertTrue( LabelsText.equals( expResult, result) );
}
// "{{хим-элем|17|Cl|[[неметалл]] из группы [[галоген]]ов}}" -> хим. "[[химический элемент]] с [[атомный номер|атомным номером]] 17, обозначается [[химический символ|химическим символом]] Cl, [[неметалл]] из группы [[галоген]]ов"
@Test
public void testExtractLabelsTrimText_element_symbol() {
System.out.println("extractLabelsTrimText_element_symbol");
String line = "# {{хим-элем|17|Cl|[[неметалл]] из группы [[галоген]]ов}}";
String result_line = "[[химический элемент]] с [[атомный номер|атомным номером]] 17, обозначается [[химический символ|химическим символом]] Cl, [[неметалл]] из группы [[галоген]]ов";
Label[] exp_labels = { LabelEn.chemistry };
LabelsText expResult = new LabelsText(exp_labels, result_line);
LabelsText result = LabelRu.extractLabelsTrimText(line);
assertTrue( LabelsText.equals( expResult, result) );
}
// special templates tranforming definition text, e.g. сокр., аббр.
// ///////////////////////////////////////////////////////////
// eo extractLabelsTrimText
// ///////////////////////////////////////////////////////////
}