/** * This file is part of Erjang - A JVM-based Erlang VM * * Copyright (c) 2010 by Trifork * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. **/ package erjang.m.unicode; import erjang.EBigString; import erjang.ENative; import erjang.BIF; import erjang.EObject; import erjang.EAtom; import erjang.ESeq; import erjang.ETuple; import erjang.EBinary; import erjang.ERT; import erjang.NotImplemented; import erjang.CharCollector; import erjang.m.erlang.ErlConvert; import java.io.CharArrayWriter; import java.io.IOException; import java.nio.charset.Charset; public class Native extends ENative { public static EAtom LATIN1_ATOM = EAtom.intern("latin1"); public static EAtom UNICODE_ATOM = EAtom.intern("unicode"); public static EAtom UTF8_ATOM = EAtom.intern("utf8"); public static EAtom UTF16_ATOM = EAtom.intern("utf16"); public static EAtom UTF32_ATOM = EAtom.intern("utf32"); public static EAtom LITTLE_ATOM = EAtom.intern("little"); public static EAtom BIG_ATOM = EAtom.intern("big"); public static EAtom ERROR_ATOM = EAtom.intern("error"); public static EAtom INCOMPLETE_ATOM = EAtom.intern("incomplete"); @BIF public static EObject characters_to_binary(EObject charlist, EObject encodingSpec) { return characters_to(charlist, encodingSpec, CHAR_ARRAY_TO_BINARY_CONVERTER); } @BIF public static EObject characters_to_list(EObject charlist, EObject encodingSpec) { return characters_to(charlist, encodingSpec, CHAR_ARRAY_TO_LIST_CONVERTER); } @BIF public static EObject characters_to_list(EObject charlist) { return characters_to(charlist, UNICODE_ATOM, CHAR_ARRAY_TO_LIST_CONVERTER); } public static EObject characters_to(EObject charlist, EObject encodingSpec, CharArrayConverter output_converter) { Charset encoding = encodingSpecToCharset(encodingSpec); if (encoding == null) throw ERT.badarg(charlist, encodingSpec); CharArrayWriter out = new CharArrayWriter(); CharCollector collector = new CharCollector(encoding, out); ESeq rest = ERT.NIL; try { rest = charlist.collectCharList(collector, rest); } catch (CharCollector.InvalidElementException e) { throw ERT.badarg(charlist, encodingSpec); } catch (CharCollector.CollectingException e) { EObject data = output_converter.convert(out); return ETuple.make(ERROR_ATOM, data, e.restOfInput); } catch (IOException e) { throw new Error(e); // Not supposed to happen. } try { collector.end(); if (rest != ERT.NIL) { return ETuple.make(INCOMPLETE_ATOM, output_converter.convert(out), ErlConvert.iolist_to_binary(rest.reverse())); } } catch (CharCollector.PartialDecodingException e) { EObject data = output_converter.convert(out); return ETuple.make(INCOMPLETE_ATOM, data); } catch (IOException e) { throw new Error(e); // Not supposed to happen. } return output_converter.convert(out); } public static Charset encodingSpecToCharset(EObject encoding) { EAtom ea; ETuple et; if ((ea = encoding.testAtom()) != null) { if (ea.equals(LATIN1_ATOM)) return Charset.forName("ISO-8859-1"); else if (ea.equals(UNICODE_ATOM) || ea.equals(UTF8_ATOM)) return Charset.forName("UTF-8"); else if (ea.equals(UTF16_ATOM)) return Charset.forName("UTF-16BE"); } else if ((et = encoding.testTuple()) != null) { EAtom ea2; if ((ea = et.elm(1).testAtom()) != null && (ea2 = et.elm(21).testAtom()) != null) { if (ea.equals(UTF16_ATOM)){ if (ea2.equals(LITTLE_ATOM)) return Charset.forName("UTF-16LE"); if (ea2.equals(BIG_ATOM)) return Charset.forName("UTF-16BE"); } else if (ea.equals(UTF32_ATOM)) { throw new NotImplemented(); } } } return null; } static abstract class CharArrayConverter { EObject convert(CharArrayWriter caw) { return convert(caw.toCharArray()); } abstract EObject convert(char[] chars); } final static CharArrayConverter CHAR_ARRAY_TO_BINARY_CONVERTER = new CharArrayConverter() { EObject convert(char[] chars) { String s = new String(chars); try { return new EBinary(s.getBytes("UTF-8")); } catch (java.io.UnsupportedEncodingException e) { throw new Error(e); // Not supposed to happen. } } }; final static CharArrayConverter CHAR_ARRAY_TO_LIST_CONVERTER = new CharArrayConverter() { // TODO: We could use something like EBinList here, just for chars: EObject convert(char[] chars) { return EBigString.make(chars, 0, chars.length); } }; @BIF public static EObject bin_is_7bit(EObject o1) { EBinary bin; if ((bin = o1.testBinary()) != null) { return ERT.box( bin.is_7bit() ); } return ERT.FALSE; } }