/* * Copyright (c) 2005-2011 Grameen Foundation USA * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. * * See also http://www.apache.org/licenses/LICENSE-2.0.html for an * explanation of the license and how it is applied. */ package org.mifos.framework.util; import static org.hamcrest.CoreMatchers.is; import static org.hamcrest.CoreMatchers.not; import static org.junit.Assert.assertThat; import java.io.BufferedReader; import java.io.FileInputStream; import java.io.IOException; import java.net.URISyntaxException; import org.junit.Test; import org.mifos.framework.util.UnicodeUtil.UnicodeInputStream; /** * Tests detecting and decoding unicode files. May require changes if default * system file encoding is not UTF-8. */ public class UnicodeUtilTest { static final String EXPECTED_CONTENT = "Hello world"; static final String SAMPLE_UTF_16LE_TXT = "/sample_UTF-16LE.txt"; static final int ONE_KIBIBYTE = 1024; static final String DEFAULT_ENCODING = System.getProperty("file.encoding"); @Test public void fileIsExpectedLength() throws IOException, URISyntaxException { String filename = this.getClass().getResource(SAMPLE_UTF_16LE_TXT).toURI().getPath(); FileInputStream in = new FileInputStream(filename); byte data[] = new byte[ONE_KIBIBYTE]; int expectedFilesize = 26; assertThat("in.read() correct size", in.read(data), is(expectedFilesize)); in.close(); assertThat("not yet decodeable", new String(data).trim(), not(EXPECTED_CONTENT)); } @Test public void canDetectUtf16le() throws IOException, URISyntaxException { String filename = this.getClass().getResource(SAMPLE_UTF_16LE_TXT).toURI().getPath(); FileInputStream in = new FileInputStream(filename); UnicodeInputStream uis = new UnicodeInputStream(in, DEFAULT_ENCODING); String detectedEncoding = uis.getEncoding(); in.close(); uis.close(); assertThat(detectedEncoding, is("UTF-16LE")); } @Test public void canDecodeUtf16le() throws IOException, URISyntaxException { String filename = this.getClass().getResource(SAMPLE_UTF_16LE_TXT).toURI().getPath(); FileInputStream in = new FileInputStream(filename); byte data[] = new byte[ONE_KIBIBYTE]; in.read(data); in.close(); byte converted[] = UnicodeUtil.convert(data, "US-ASCII"); assertThat(new String(converted).trim(), is(EXPECTED_CONTENT)); } @Test public void canReadAndDecodeLineByLine() throws IOException, URISyntaxException { String filename = this.getClass().getResource(SAMPLE_UTF_16LE_TXT).toURI().getPath(); BufferedReader reader = UnicodeUtil.getUnicodeAwareBufferedReader(filename); String read = reader.readLine(); reader.close(); assertThat(read.trim(), is(EXPECTED_CONTENT)); } @Test public void canDetectUtf8() throws IOException, URISyntaxException { String filename = this.getClass().getResource("/sample_UTF-8.txt").toURI().getPath(); FileInputStream in = new FileInputStream(filename); byte data[] = new byte[ONE_KIBIBYTE]; int expectedFilesize = 12; assertThat("correct size", in.read(data), is(expectedFilesize)); in.close(); UnicodeInputStream uic = new UnicodeInputStream(new FileInputStream(filename), DEFAULT_ENCODING); assertThat("guessed encoding", uic.getEncoding(), is(DEFAULT_ENCODING)); assertThat("read unicode", uic.read(data), is(expectedFilesize)); uic.close(); assertThat("correct contents", new String(data).trim(), is(EXPECTED_CONTENT)); } }