/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pdfbox.encoding.conversion; import org.apache.fontbox.cmap.CMap; import java.io.UnsupportedEncodingException; /** * CJKConverter converts encodings defined in CJKEncodings. * * @author Pin Xue (http://www.pinxue.net), Holly Lee (holly.lee (at) gmail.com) * @version $Revision: 1.0 $ */ public class CJKConverter implements EncodingConverter { // The encoding private String encodingName = null; // The java charset name private String charsetName = null; /** * Constructs a CJKConverter from a PDF encoding name. * * @param encoding the encoding to be used */ public CJKConverter(String encoding) { encodingName = encoding; charsetName = CJKEncodings.getCharset(encoding); } /** * Convert a string. It occurs when a cmap lookup returned * converted bytes successfully, but we still need to convert its * encoding. The parameter s is constructs as one byte or a UTF-16BE * encoded string. * * Note: pdfbox set string to UTF-16BE charset before calling into * this. * * {@inheritDoc} */ public String convertString(String s) { if ( s.length() == 1 ) { return s; } if ( charsetName.equalsIgnoreCase("UTF-16BE") ) { return s; } try { return new String(s.getBytes("UTF-16BE"), charsetName); } catch ( UnsupportedEncodingException uee ) { return s; } } /** * Convert bytes to a string. We just convert bytes within * coderange defined in CMap. * * {@inheritDoc} */ public String convertBytes(byte [] c, int offset, int length, CMap cmap) { if ( cmap != null ) { try { if ( cmap.isInCodeSpaceRanges(c, offset, length) ) { return new String(c, offset, length, charsetName); } else { return null; } } catch ( UnsupportedEncodingException uee ) { return new String(c, offset, length); } } // No cmap? return null; } }