/* * Copyright 2014 Deutsche Nationalbibliothek * * Licensed under the Apache License, Version 2.0 the "License"; * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.culturegraph.mf.biblio.pica; import static org.mockito.Mockito.inOrder; import static org.mockito.Mockito.times; import org.culturegraph.mf.framework.StreamReceiver; import org.junit.After; import org.junit.Before; import org.junit.Test; import org.mockito.InOrder; import org.mockito.Mock; import org.mockito.MockitoAnnotations; /** * Tests for class {@link PicaMultiscriptRemodeler}. * * @author Christoph Böhme * */ public class PicaMultiscriptRemodelerTest { private static final String RECORD_ID = "1234"; private static final String FIELD_003AT = "003@"; private static final String FIELD_033A = "033A"; private static final String FIELD_021A = "021A"; private static final String FIELD_021C = "021C"; private static final String SCRIPT_LATIN = "Latn"; private static final String SCRIPT_GREEK = "Grek"; private static final String SCRIPT_ARABIC = "Arab"; private static final String SCRIPT_HEBREW = "Hebr"; private static final String VALUE_1 = "Subfield 1"; private static final String VALUE_2 = "Subfield 2"; private static final String VALUE_3 = "Subfield 3"; private static final String VALUE_1_GREEK = "ĸµ 1"; private static final String VALUE_2_GREEK = "ĸµ 2"; private static final String VALUE_1_ARABIC = "Subfield/Arabic 1"; private static final String VALUE_1_HEBREW = "Subfield/Hebrew 1"; private PicaMultiscriptRemodeler remodeler; @Mock private StreamReceiver receiver; @Before public void setup() { MockitoAnnotations.initMocks(this); remodeler = new PicaMultiscriptRemodeler(); remodeler.setReceiver(receiver); } @After public void cleanup() { remodeler.closeStream(); } @Test public void shouldSimplyPassThroughNonMultiscriptFields() { remodeler.startRecord(RECORD_ID); remodeler.startEntity(FIELD_003AT); remodeler.literal("0", RECORD_ID); remodeler.endEntity(); remodeler.startEntity(FIELD_033A); remodeler.literal("p", VALUE_1); remodeler.literal("p", VALUE_2); remodeler.literal("n", VALUE_3); remodeler.endEntity(); remodeler.endRecord(); final InOrder ordered = inOrder(receiver); ordered.verify(receiver).startRecord(RECORD_ID); ordered.verify(receiver).startEntity(FIELD_003AT); ordered.verify(receiver).literal("0", RECORD_ID); ordered.verify(receiver).endEntity(); ordered.verify(receiver).startEntity(FIELD_033A); ordered.verify(receiver).literal("p", VALUE_1); ordered.verify(receiver).literal("p", VALUE_2); ordered.verify(receiver).literal("n", VALUE_3); ordered.verify(receiver).endEntity(); ordered.verify(receiver).endRecord(); } @Test public void shouldRemodelMultscriptField() { remodeler.startRecord(RECORD_ID); emitMultscriptField(FIELD_021A, "01", SCRIPT_LATIN, VALUE_1); emitMultscriptField(FIELD_021A, "01", SCRIPT_GREEK, VALUE_1_GREEK); remodeler.endRecord(); final InOrder ordered = inOrder(receiver); ordered.verify(receiver).startRecord(RECORD_ID); verifyMultiscriptField(ordered, FIELD_021A, "01", SCRIPT_LATIN, VALUE_1, SCRIPT_GREEK, VALUE_1_GREEK); ordered.verify(receiver).endRecord(); } @Test public void shouldRemodelInterleafedMultscriptFields() { remodeler.startRecord(RECORD_ID); emitMultscriptField(FIELD_021C, "01", SCRIPT_LATIN, VALUE_1); emitMultscriptField(FIELD_021C, "02", SCRIPT_LATIN, VALUE_2); emitMultscriptField(FIELD_021C, "01", SCRIPT_GREEK, VALUE_1_GREEK); emitMultscriptField(FIELD_021C, "02", SCRIPT_GREEK, VALUE_2_GREEK); remodeler.endRecord(); final InOrder ordered = inOrder(receiver); ordered.verify(receiver).startRecord(RECORD_ID); verifyMultiscriptField(ordered, FIELD_021C, "01", SCRIPT_LATIN, VALUE_1, SCRIPT_GREEK, VALUE_1_GREEK); verifyMultiscriptField(ordered, FIELD_021C, "02", SCRIPT_LATIN, VALUE_2, SCRIPT_GREEK, VALUE_2_GREEK); ordered.verify(receiver).endRecord(); } @Test public void shouldPassThroughSingleMultiscriptField() { remodeler.startRecord(RECORD_ID); emitMultscriptField(FIELD_021A, "01", SCRIPT_LATIN, VALUE_1); remodeler.endRecord(); final InOrder ordered = inOrder(receiver); ordered.verify(receiver).startRecord(RECORD_ID); verifySingleMultiscriptField(ordered, FIELD_021A, "01", SCRIPT_LATIN, VALUE_1); ordered.verify(receiver).endRecord(); } @Test public void shouldPassThroughSingleMultiscriptFieldFollowedByCompleteMultiscriptFieldWithTheSameName() { remodeler.startRecord(RECORD_ID); emitMultscriptField(FIELD_021C, "01", SCRIPT_LATIN, VALUE_1); emitMultscriptField(FIELD_021C, "02", SCRIPT_LATIN, VALUE_2); emitMultscriptField(FIELD_021C, "02", SCRIPT_GREEK, VALUE_2_GREEK); remodeler.endRecord(); final InOrder ordered = inOrder(receiver); ordered.verify(receiver).startRecord(RECORD_ID); verifySingleMultiscriptField(ordered, FIELD_021C, "01", SCRIPT_LATIN, VALUE_1); verifyMultiscriptField(ordered, FIELD_021C, "02", SCRIPT_LATIN, VALUE_2, SCRIPT_GREEK, VALUE_2_GREEK); ordered.verify(receiver).endRecord(); } @Test public void shouldPassThroughSingleMultiscriptFieldFollowedByCompleteMultiscriptFieldWithDifferentName() { remodeler.startRecord(RECORD_ID); emitMultscriptField(FIELD_021A, "01", SCRIPT_LATIN, VALUE_1); emitMultscriptField(FIELD_021C, "01", SCRIPT_LATIN, VALUE_2); emitMultscriptField(FIELD_021C, "01", SCRIPT_GREEK, VALUE_2_GREEK); remodeler.endRecord(); final InOrder ordered = inOrder(receiver); ordered.verify(receiver).startRecord(RECORD_ID); verifySingleMultiscriptField(ordered, FIELD_021A, "01", SCRIPT_LATIN, VALUE_1); verifyMultiscriptField(ordered, FIELD_021C, "01", SCRIPT_LATIN, VALUE_2, SCRIPT_GREEK, VALUE_2_GREEK); ordered.verify(receiver).endRecord(); } @Test public void shouldPassThroughSingleMultiscriptFieldFollowedByNonMultiscriptField() { remodeler.startRecord(RECORD_ID); emitMultscriptField(FIELD_021A, "01", SCRIPT_LATIN, VALUE_1); remodeler.startEntity(FIELD_033A); remodeler.literal("n", VALUE_2); remodeler.endEntity(); remodeler.endRecord(); final InOrder ordered = inOrder(receiver); ordered.verify(receiver).startRecord(RECORD_ID); verifySingleMultiscriptField(ordered, FIELD_021A, "01", SCRIPT_LATIN, VALUE_1); ordered.verify(receiver).startEntity(FIELD_033A); ordered.verify(receiver).literal("n", VALUE_2); ordered.verify(receiver).endEntity(); ordered.verify(receiver).endRecord(); } @Test public void shouldPassThroughIncompleteMultiscriptFields() { remodeler.startRecord(RECORD_ID); remodeler.startEntity(FIELD_021C); remodeler.literal("T", "01"); remodeler.literal("a", VALUE_1); remodeler.endEntity(); remodeler.startEntity(FIELD_021C); remodeler.literal("U", SCRIPT_GREEK); remodeler.literal("a", VALUE_2_GREEK); remodeler.endEntity(); remodeler.endRecord(); final InOrder ordered = inOrder(receiver); ordered.verify(receiver).startRecord(RECORD_ID); ordered.verify(receiver).startEntity(FIELD_021C); ordered.verify(receiver).literal("T", "01"); ordered.verify(receiver).literal("a", VALUE_1); ordered.verify(receiver).endEntity(); ordered.verify(receiver).startEntity(FIELD_021C); ordered.verify(receiver).literal("U", SCRIPT_GREEK); ordered.verify(receiver).literal("a", VALUE_2_GREEK); ordered.verify(receiver).endEntity(); ordered.verify(receiver).endRecord(); } @Test public void shouldLabelArabicAsNonLatinRightToLeftScript() { remodeler.startRecord(RECORD_ID); emitMultscriptField(FIELD_021A, "01", SCRIPT_LATIN, VALUE_1); emitMultscriptField(FIELD_021A, "01", SCRIPT_ARABIC, VALUE_1_ARABIC); remodeler.endRecord(); final InOrder ordered = inOrder(receiver); ordered.verify(receiver).startRecord(RECORD_ID); verifyMultiscriptField(ordered, FIELD_021A, "01", SCRIPT_LATIN, VALUE_1, SCRIPT_ARABIC, VALUE_1_ARABIC); ordered.verify(receiver).endRecord(); } @Test public void shouldLabelHebrewAsNonLatinRightToLeftScript() { remodeler.startRecord(RECORD_ID); emitMultscriptField(FIELD_021A, "01", SCRIPT_LATIN, VALUE_1); emitMultscriptField(FIELD_021A, "01", SCRIPT_HEBREW, VALUE_1_HEBREW); remodeler.endRecord(); final InOrder ordered = inOrder(receiver); ordered.verify(receiver).startRecord(RECORD_ID); verifyMultiscriptField(ordered, FIELD_021A, "01", SCRIPT_LATIN, VALUE_1, SCRIPT_HEBREW, VALUE_1_HEBREW); ordered.verify(receiver).endRecord(); } @Test public void shouldClearStateOnResetStream() { remodeler.startRecord(RECORD_ID); emitMultscriptField(FIELD_021A, "01", SCRIPT_LATIN, VALUE_1); remodeler.resetStream(); remodeler.startRecord(RECORD_ID); emitMultscriptField(FIELD_021A, "01", SCRIPT_GREEK, VALUE_1_GREEK); remodeler.endRecord(); final InOrder ordered = inOrder(receiver); ordered.verify(receiver).startRecord(RECORD_ID); ordered.verify(receiver).resetStream(); ordered.verify(receiver).startRecord(RECORD_ID); verifySingleMultiscriptField(ordered, FIELD_021A, "01", SCRIPT_GREEK, VALUE_1_GREEK); ordered.verify(receiver).endRecord(); } private void emitMultscriptField(final String field, final String groupNumber, final String script, final String value) { remodeler.startEntity(field); remodeler.literal("T", groupNumber); remodeler.literal("U", script); remodeler.literal("a", value); remodeler.endEntity(); } private void verifyMultiscriptField(final InOrder ordered, final String field, final String groupNumber, final String script1, final String value1, final String script2, final String value2) { ordered.verify(receiver).startEntity(field); ordered.verify(receiver).startEntity(mapScriptToEntityName(script1)); ordered.verify(receiver).literal("T", groupNumber); ordered.verify(receiver).literal("U", script1); ordered.verify(receiver).literal("a", value1); ordered.verify(receiver).endEntity(); ordered.verify(receiver).startEntity(mapScriptToEntityName(script2)); ordered.verify(receiver).literal("T", groupNumber); ordered.verify(receiver).literal("U", script2); ordered.verify(receiver).literal("a", value2); ordered.verify(receiver, times(2)).endEntity(); } private void verifySingleMultiscriptField(final InOrder ordered, final String field, final String groupNumber, final String script, final String value) { ordered.verify(receiver).startEntity(field); ordered.verify(receiver).literal("T", groupNumber); ordered.verify(receiver).literal("U", script); ordered.verify(receiver).literal("a", value); ordered.verify(receiver).endEntity(); } private String mapScriptToEntityName(final String script) { if (SCRIPT_LATIN.equals(script)) { return PicaMultiscriptRemodeler.ENTITY_NAME_FOR_LATIN; } else if (SCRIPT_ARABIC.equals(script) || SCRIPT_HEBREW.equals(script)) { return PicaMultiscriptRemodeler.ENTITY_NAME_FOR_NON_LATIN_RL; } return PicaMultiscriptRemodeler.ENTITY_NAME_FOR_NON_LATIN_LR; } }