/** * Copyright (c) 2006 Jesper Steen M�ller * * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * IBM - Initial API and implementation */ package org.eclipse.emf.test.xml.encoding; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import java.io.File; import java.io.IOException; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import org.eclipse.emf.common.util.URI; import org.eclipse.emf.ecore.EAnnotation; import org.eclipse.emf.ecore.EcoreFactory; import org.eclipse.emf.ecore.xmi.XMIResource; import org.eclipse.emf.ecore.xmi.impl.XMIResourceImpl; import org.junit.After; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; /** * @author jsm */ @RunWith(Parameterized.class) public class UnicodeEncodingTest { // From the Unicode spec public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000; public static final int MIN_CODE_POINT = 0x000000; public static final int MAX_CODE_POINT = 0x10ffff; public static final char MIN_HIGH_SURROGATE = '\uD800'; public static final char MAX_HIGH_SURROGATE = '\uDBFF'; public static final char MIN_LOW_SURROGATE = '\uDC00'; public static final char MAX_LOW_SURROGATE = '\uDFFF'; public static char[] toSurrogatePair(int codePoint) { if (codePoint < 0 || codePoint > MAX_CODE_POINT) { throw new IllegalArgumentException(); } if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) { return new char []{ (char)codePoint }; } char[] result = new char [2]; int offset = codePoint - MIN_SUPPLEMENTARY_CODE_POINT; result[0] = (char)((offset >>> 10) + MIN_HIGH_SURROGATE); result[1] = (char)((offset & 0x3ff) + MIN_LOW_SURROGATE); return result; } protected File tempFile; protected final String encodingName; protected final String xmlVersion; public UnicodeEncodingTest(String encoding, String xmlVersion) { encodingName = encoding; this.xmlVersion = xmlVersion; } @Parameterized.Parameters(name="Encoding {0} for XML {1}") public static Collection<Object[]> parameters() { return Arrays.asList (new Object[][] { { "UTF-8", "1.0" }, { "UTF-16BE", "1.0" }, { "UTF-16LE", "1.0" }, { "ASCII", "1.0" }, { "ISO-8859-1", "1.0" }, // { "ISO-8859-5", "1.0" }, { "ASCII", "1.1" }, }); } @Before public void setUp() throws Exception { tempFile = File.createTempFile("EMF-encoding-test-" + encodingName, ".tmp.xml"); } @After public void tearDown() throws Exception { tempFile.delete(); } public void doEMFSaveAndLoad(String testString) throws IOException { URI fileURI = URI.createFileURI(tempFile.toString()); String sourceValue = testString + " represented as XML in " + encodingName; EAnnotation eObject = EcoreFactory.eINSTANCE.createEAnnotation(); eObject.setSource(sourceValue); // Including international characters XMIResource resource = new XMIResourceImpl(); resource.getContents().add(eObject); resource.setEncoding(encodingName); resource.setXMLVersion(xmlVersion); resource.setURI(fileURI); resource.save(new HashMap<String, Object>()); XMIResource loadedResource = new XMIResourceImpl(); loadedResource.setURI(fileURI); loadedResource.load(new HashMap<String, Object>()); assertTrue("No errors should occur while loading", loadedResource.getErrors().isEmpty()); EAnnotation loadedAnnotation = (EAnnotation)loadedResource.getContents().get(0); assertEquals("String read back by EMF was different from the one being saved", sourceValue, loadedAnnotation.getSource()); } @Test public void testStraightASCII() throws Exception { // Test in the ASCII range doEMFSaveAndLoad("Just straight ASCII small < and (&) big >"); } @Test public void testCharactersIn8bit() throws Exception { // Test in the 8 bit range of Unicode doEMFSaveAndLoad("A name in Western Europe: Jesper Steen M\u00F8ller"); } // Test outside the 8 bit range of Unicode @Test public void testArabicLetter() throws Exception { // Test in the 16 bit range of Unicode doEMFSaveAndLoad("This is an arabic glyph: \uFECE. "); } // Test beyond the 16 bit range of Unicode @Test public void testSupplementaryContent() throws Exception { /** * Test in the supplementary code point area (this will get tricky) * See <http://www.unicode.org/charts/PDF/U10330.pdf> */ char kusma[] = toSurrogatePair(0x1033A); // GOTHIC LETTER KUSMA doEMFSaveAndLoad("This is a gothic letter: " + new String(kusma) + ". "); // XML saving and loading has errors, too! } @Test public void testControlCharacters() throws Exception { if ("1.1".equals(xmlVersion)) { StringBuffer text = new StringBuffer(); for (char i = 1; i <= 0x1F; ++i) { text.append(i); } doEMFSaveAndLoad("These are control characters: " + text + ". "); } } }