package org.marketcetera.util.test;
import java.io.File;
import java.io.FileOutputStream;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CodingErrorAction;
import org.apache.commons.lang.CharEncoding;
import org.junit.Test;
import static org.junit.Assert.*;
import static org.marketcetera.util.test.UnicodeData.*;
/**
* @author tlerios@marketcetera.com
* @since 0.6.0
* @version $Id: UnicodeDataTest.java 16154 2012-07-14 16:34:05Z colin $
*/
/* $License$ */
public class UnicodeDataTest
extends TestCaseBase
{
private static final Charset NAT=
Charset.defaultCharset();
private static final Charset UTF8=
Charset.forName(CharEncoding.UTF_8);
private static final Charset UTF16BE=
Charset.forName(CharEncoding.UTF_16BE);
private static final Charset UTF16LE=
Charset.forName(CharEncoding.UTF_16LE);
private static final Charset UTF32BE=
Charset.forName("UTF-32BE");
private static final Charset UTF32LE=
Charset.forName("UTF-32LE");
private static final String TEST_FILE_PREFIX=
DIR_TARGET+File.separator+"unicode_";
private static void singleValid
(String str,
char[] chars,
int[] ucps,
byte[] nat,
byte[] utf8,
byte[] utf16be,
byte[] utf16le,
byte[] utf32be,
byte[] utf32le)
{
assertArrayEquals(str.toCharArray(),chars);
int i=0;
int j=0;
while (i<str.length()) {
int ucp=str.codePointAt(i);
assertEquals("At code point position "+j,ucp,ucps[j++]);
i+=Character.charCount(ucp);
}
assertArrayEquals(str.getBytes(),nat);
assertArrayEquals(str.getBytes(UTF8),utf8);
assertArrayEquals(str.getBytes(UTF16BE),utf16be);
assertArrayEquals(str.getBytes(UTF16LE),utf16le);
assertArrayEquals(str.getBytes(UTF32BE),utf32be);
assertArrayEquals(str.getBytes(UTF32LE),utf32le);
}
private static void singleInvalid
(Charset cs,
byte[] encoded)
{
CharsetDecoder dec=cs.newDecoder();
dec.onMalformedInput(CodingErrorAction.REPORT);
try {
dec.decode(ByteBuffer.wrap(encoded));
fail();
} catch (CharacterCodingException ex) {
// Desired.
}
}
private static void writeFile
(Charset charset,
String fileName,
byte[] data)
throws Exception
{
FileOutputStream out=new FileOutputStream
(TEST_FILE_PREFIX+fileName+".xml");
out.write(("<?xml version=\"1.0\" encoding=\""+
charset.name()+"\"?><root>").getBytes(charset));
out.write(data);
out.write("</root>".getBytes(charset));
out.close();
}
@Test
public void valid()
{
singleValid(SPACE,SPACE_CHARS,SPACE_UCPS,
SPACE_NAT,SPACE_UTF8,
SPACE_UTF16BE,SPACE_UTF16LE,
SPACE_UTF32BE,SPACE_UTF32LE);
singleValid(HELLO_EN,HELLO_EN_CHARS,HELLO_EN_UCPS,
HELLO_EN_NAT,HELLO_EN_UTF8,
HELLO_EN_UTF16BE,HELLO_EN_UTF16LE,
HELLO_EN_UTF32BE,HELLO_EN_UTF32LE);
singleValid(LANGUAGE_NO,LANGUAGE_NO_CHARS,LANGUAGE_NO_UCPS,
LANGUAGE_NO_NAT,LANGUAGE_NO_UTF8,
LANGUAGE_NO_UTF16BE,LANGUAGE_NO_UTF16LE,
LANGUAGE_NO_UTF32BE,LANGUAGE_NO_UTF32LE);
singleValid(HELLO_GR,HELLO_GR_CHARS,HELLO_GR_UCPS,
HELLO_GR_NAT,HELLO_GR_UTF8,
HELLO_GR_UTF16BE,HELLO_GR_UTF16LE,
HELLO_GR_UTF32BE,HELLO_GR_UTF32LE);
singleValid(HOUSE_AR,HOUSE_AR_CHARS,HOUSE_AR_UCPS,
HOUSE_AR_NAT,HOUSE_AR_UTF8,
HOUSE_AR_UTF16BE,HOUSE_AR_UTF16LE,
HOUSE_AR_UTF32BE,HOUSE_AR_UTF32LE);
singleValid(GOODBYE_JA,GOODBYE_JA_CHARS,GOODBYE_JA_UCPS,
GOODBYE_JA_NAT,GOODBYE_JA_UTF8,
GOODBYE_JA_UTF16BE,GOODBYE_JA_UTF16LE,
GOODBYE_JA_UTF32BE,GOODBYE_JA_UTF32LE);
singleValid(GOATS_LNB,GOATS_LNB_CHARS,GOATS_LNB_UCPS,
GOATS_LNB_NAT,GOATS_LNB_UTF8,
GOATS_LNB_UTF16BE,GOATS_LNB_UTF16LE,
GOATS_LNB_UTF32BE,GOATS_LNB_UTF32LE);
singleValid(G_CLEF_MSC,G_CLEF_MSC_CHARS,G_CLEF_MSC_UCPS,
G_CLEF_MSC_NAT,G_CLEF_MSC_UTF8,
G_CLEF_MSC_UTF16BE,G_CLEF_MSC_UTF16LE,
G_CLEF_MSC_UTF32BE,G_CLEF_MSC_UTF32LE);
singleValid(COMBO,COMBO_CHARS,COMBO_UCPS,
COMBO_NAT,COMBO_UTF8,
COMBO_UTF16BE,COMBO_UTF16LE,
COMBO_UTF32BE,COMBO_UTF32LE);
}
@Test
public void invalid()
{
CharsetEncoder enc=UTF8.newEncoder();
assertFalse(enc.canEncode(INVALID));
assertFalse(enc.canEncode(new String(INVALID_CHARS)));
assertFalse(enc.canEncode
(new String(INVALID_UCPS,0,INVALID_UCPS.length)));
singleInvalid(UTF8,INVALID_UTF8);
singleInvalid(UTF16BE,INVALID_UTF16BE);
singleInvalid(UTF16LE,INVALID_UTF16LE);
singleInvalid(UTF32BE,INVALID_UTF32BE);
singleInvalid(UTF32LE,INVALID_UTF32LE);
}
/**
* Writes a set of files that can opened using external
* unicode-aware tools (e.g. Firefox) to check visually the test
* data.
*
* @throws Exception Thrown if an I/O error occurs.
*/
@Test
public void writeFiles()
throws Exception
{
writeFile(NAT,"native",COMBO_NAT);
writeFile(UTF8,"utf8",COMBO_UTF8);
writeFile(UTF16BE,"utf16be",COMBO_UTF16BE);
writeFile(UTF16LE,"utf16le",COMBO_UTF16LE);
writeFile(UTF32BE,"utf32be",COMBO_UTF32BE);
writeFile(UTF32LE,"utf32le",COMBO_UTF32LE);
}
}