/*
* Copyright 2016 Christoph Böhme
*
* Licensed under the Apache License, Version 2.0 the "License";
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.culturegraph.mf.strings;
import static org.mockito.Mockito.inOrder;
import static org.mockito.Mockito.verify;
import java.text.Normalizer;
import org.culturegraph.mf.framework.StreamReceiver;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.mockito.InOrder;
import org.mockito.Mock;
import org.mockito.MockitoAnnotations;
/**
* Tests for class {@link StreamUnicodeNormalizer}.
*
* @author Christoph Böhme
*
*/
public final class StreamUnicodeNormalizerTest {
private static final String RECORD_ID = "id";
private static final String ENTITY_NAME = "entity-name";
private static final String LITERAL_NAME = "literal-name";
private static final String LITERAL_VALUE = "literal-value";
private static final String VALUE_WITH_DIACRITICS =
"Bauer, Sigmund: Über den Einfluß der Ackergeräthe auf den Reinertrag.";
private static final String VALUE_WITH_PRECOMPOSED_CHARS =
"Bauer, Sigmund: Über den Einfluß der Ackergeräthe auf den Reinertrag.";
private static final String ID_WITH_DIACRITICS = "id-Üä";
private static final String ID_WITH_PRECOMPOSED_CHARS = "id-Üä";
private static final String KEY_WITH_DIACRITICS = "key-Üä";
private static final String KEY_WITH_PRECOMPOSED_CHARS = "key-Üä";
private StreamUnicodeNormalizer streamUnicodeNormalizer;
@Mock
private StreamReceiver receiver;
@Before
public void setup() {
MockitoAnnotations.initMocks(this);
streamUnicodeNormalizer = new StreamUnicodeNormalizer();
streamUnicodeNormalizer.setReceiver(receiver);
}
@After
public void cleanup() {
streamUnicodeNormalizer.closeStream();
}
@Test
public void shouldForwardAllEvents() {
streamUnicodeNormalizer.startRecord(RECORD_ID);
streamUnicodeNormalizer.startEntity(ENTITY_NAME);
streamUnicodeNormalizer.literal(LITERAL_NAME, LITERAL_VALUE);
streamUnicodeNormalizer.endEntity();
streamUnicodeNormalizer.endRecord();
final InOrder ordered = inOrder(receiver);
ordered.verify(receiver).startRecord(RECORD_ID);
ordered.verify(receiver).startEntity(ENTITY_NAME);
ordered.verify(receiver).literal(LITERAL_NAME, LITERAL_VALUE);
ordered.verify(receiver).endEntity();
ordered.verify(receiver).endRecord();
}
@Test
public void shouldNormalizeValuesToNFCByDefault() {
streamUnicodeNormalizer.startRecord(RECORD_ID);
streamUnicodeNormalizer.literal(LITERAL_NAME, VALUE_WITH_DIACRITICS);
streamUnicodeNormalizer.endRecord();
verify(receiver).literal(LITERAL_NAME, VALUE_WITH_PRECOMPOSED_CHARS);
}
@Test
public void shouldNotNormalizeValuesIfConfigured() {
streamUnicodeNormalizer.setNormalizeValues(false);
streamUnicodeNormalizer.startRecord(RECORD_ID);
streamUnicodeNormalizer.literal(LITERAL_NAME, VALUE_WITH_DIACRITICS);
streamUnicodeNormalizer.endRecord();
verify(receiver).literal(LITERAL_NAME, VALUE_WITH_DIACRITICS);
}
@Test
public void shouldIgnoreNullValues() {
streamUnicodeNormalizer.startRecord(RECORD_ID);
streamUnicodeNormalizer.literal(LITERAL_NAME, null);
streamUnicodeNormalizer.endRecord();
verify(receiver).literal(LITERAL_NAME, null);
}
@Test
public void shouldNotNormalizeIdByDefault() {
streamUnicodeNormalizer.startRecord(ID_WITH_DIACRITICS);
streamUnicodeNormalizer.endRecord();
verify(receiver).startRecord(ID_WITH_DIACRITICS);
}
@Test
public void shouldNormalizeIdToNFCIfConfigured() {
streamUnicodeNormalizer.setNormalizeIds(true);
streamUnicodeNormalizer.startRecord(ID_WITH_DIACRITICS);
streamUnicodeNormalizer.endRecord();
verify(receiver).startRecord(ID_WITH_PRECOMPOSED_CHARS);
}
@Test
public void shouldNotNormalizeKeyByDefault() {
streamUnicodeNormalizer.startRecord(RECORD_ID);
streamUnicodeNormalizer.literal(KEY_WITH_DIACRITICS, LITERAL_VALUE);
streamUnicodeNormalizer.startEntity(KEY_WITH_DIACRITICS);
streamUnicodeNormalizer.endEntity();
streamUnicodeNormalizer.endRecord();
verify(receiver).literal(KEY_WITH_DIACRITICS, LITERAL_VALUE);
verify(receiver).startEntity(KEY_WITH_DIACRITICS);
}
@Test
public void shouldNormalizeKeysIfConfigured() {
streamUnicodeNormalizer.setNormalizeKeys(true);
streamUnicodeNormalizer.startRecord(RECORD_ID);
streamUnicodeNormalizer.literal(KEY_WITH_DIACRITICS, LITERAL_VALUE);
streamUnicodeNormalizer.startEntity(KEY_WITH_DIACRITICS);
streamUnicodeNormalizer.endEntity();
streamUnicodeNormalizer.endRecord();
verify(receiver).literal(KEY_WITH_PRECOMPOSED_CHARS, LITERAL_VALUE);
verify(receiver).startEntity(KEY_WITH_PRECOMPOSED_CHARS);
}
@Test
public void shouldNormalizeToNFDIfConfigured() {
streamUnicodeNormalizer.setNormalizationForm(Normalizer.Form.NFD);
streamUnicodeNormalizer.startRecord(RECORD_ID);
streamUnicodeNormalizer.literal(LITERAL_NAME,
KEY_WITH_PRECOMPOSED_CHARS);
streamUnicodeNormalizer.endRecord();
verify(receiver).literal(LITERAL_NAME, KEY_WITH_DIACRITICS);
}
}