CodedIO.java example

Explorer
webtools.sourceediting-master
/*******************************************************************************
 * Copyright (c) 2001, 2008 IBM Corporation and others.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 *     IBM Corporation - initial API and implementation
 *     Jens Lukowski/Innoopract - initial renaming/restructuring
 *     
 *******************************************************************************/
package org.eclipse.wst.sse.core.internal.encoding;

import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.util.Properties;

import org.eclipse.core.runtime.IPath;
import org.eclipse.core.runtime.Path;
import org.eclipse.core.runtime.Platform;
import org.eclipse.core.runtime.content.IContentDescription;
import org.eclipse.wst.sse.core.internal.encoding.util.Assert;
import org.eclipse.wst.sse.core.internal.encoding.util.Logger;
import org.osgi.framework.Bundle;


public abstract class CodedIO {

	private final boolean DEBUG = false;
	
	public static final int MAX_BUF_SIZE = 1024 * 8;

	public static final int MAX_MARK_SIZE = MAX_BUF_SIZE;

	public static final String NO_SPEC_DEFAULT = "NoSpecDefault"; //$NON-NLS-1$

	private static Properties overridenCharsets = null;

	/**
	 * <p>
	 * There are two well known understood cases where the standard/default
	 * Java Mappings are not sufficient. (Thanks to Hirotaka Matsumoto for
	 * providing these two). I believe there are others that individual
	 * customers have requested to override on a case by case basis, but I've
	 * lost the details. TODO-future: document some of those use-cases.
	 * </p>
	 * <ul>
	 * <li>ISO-8859-8-I</li>
	 * <p>
	 * In the code conversion point of view, ISO-9959-8 and ISO-8859-8-I are
	 * the same. However. the representation on the browser is different. (
	 * It's very very hard to explain this into the words, but once you will
	 * see, you will understand it :) Many BiDi HTML/JSPs use ISO-8859-8-I in
	 * META/page directive. So WSAD needs to support this encoding.
	 * </p>
	 * <li>X-SJIS</li>
	 * <p>
	 * Because Mosaic/Navigator 2.0 supported only X-SJIS/X-EUC-JP, lots of
	 * old HTML files used X-SJIS/X-EUC-JP so that the customers still want us
	 * to support this code conversion for HTML files.
	 * </p>
	 * </ul>
	 * 
	 * @param detectedCharsetName
	 * @return the detectedCharsetName, if no overrides, otherwise the charset
	 *         name that should be used instead of detectedCharsetName
	 */
	/**
	 * This method is deliberatly 'default access' since clients should not
	 * need to access this information directly.
	 */
	static public String checkMappingOverrides(String detectedCharsetName) {
		// This method MUST return what was passed in, if
		// there are no
		// overrides.
		String result = detectedCharsetName;
		String newResult = getOverridenCharsets().getProperty(detectedCharsetName);
		if (newResult != null) {
			result = newResult;
		}
		return result;
	}

	/**
	 * Note: once this instance is created, trace info still needs to be
	 * appended by caller, depending on the context its created.
	 */
	public static EncodingMemento createEncodingMemento(byte[] detectedBom, String javaCharsetName, String detectedCharsetName, String unSupportedName, String specDefaultEncoding, String reason) {
		EncodingMemento result = new EncodingMemento();
		result.setJavaCharsetName(javaCharsetName);
		result.setDetectedCharsetName(detectedCharsetName);
		// TODO: if detectedCharset and spec default is
		// null, need to use "work
		// bench based" defaults.
		if (specDefaultEncoding == null)
			result.setAppropriateDefault(NO_SPEC_DEFAULT);
		else
			result.setAppropriateDefault(specDefaultEncoding);
		if (unSupportedName != null) {
			result.setInvalidEncoding(unSupportedName);
		}
		// check if valid
		try {
			Charset.isSupported(javaCharsetName);
		} catch (IllegalCharsetNameException e) {
			result.setInvalidEncoding(javaCharsetName);
		}

		// check UTF83ByteBOMUsed and UnicodeStream
		if (detectedBom != null) {
			if (detectedBom.length == 2)
				result.setUnicodeStream(true);
			else if (detectedBom.length == 3)
				result.setUTF83ByteBOMUsed(true);
			result.setUnicodeBOM(detectedBom);
		}
		return result;
	}

	/**
	 * Note: once this instance is created, trace info still needs to be
	 * appended by caller, depending on the context its created.
	 */
	public static EncodingMemento createEncodingMemento(String detectedCharsetName) {
		return createEncodingMemento(detectedCharsetName, null);
	}

	/**
	 * Note: once this instance is created, trace info still needs to be
	 * appended by caller, depending on the context its created.
	 */
	public static EncodingMemento createEncodingMemento(String detectedCharsetName, String reason) {
		return createEncodingMemento(detectedCharsetName, reason, null);
	}

	/**
	 * Note: once this instance is created, trace info still needs to be
	 * appended by caller, depending on the context its created.
	 */
	public static EncodingMemento createEncodingMemento(String detectedCharsetName, String reason, String specDefaultEncoding) {
		EncodingMemento result = new EncodingMemento();
		result = new EncodingMemento();
		String javaCharset = getAppropriateJavaCharset(detectedCharsetName);
		result.setJavaCharsetName(javaCharset);
		result.setDetectedCharsetName(detectedCharsetName);
		// TODO: if detectedCharset and spec default is
		// null, need to use "work
		// bench based" defaults.
		if (specDefaultEncoding == null)
			result.setAppropriateDefault(NO_SPEC_DEFAULT);
		else
			result.setAppropriateDefault(specDefaultEncoding);
		// check if valid
		try {
			Charset.isSupported(javaCharset);
		} catch (IllegalCharsetNameException e) {
			result.setInvalidEncoding(javaCharset);
		}

		return result;
	}

	/**
	 * This method can return null, if invalid charset name (in which case
	 * "appropriateDefault" should be used, if a name is really need for some
	 * "save anyway" cases).
	 * 
	 * @param detectedCharsetName
	 * @return
	 */
	public static String getAppropriateJavaCharset(String detectedCharsetName) {
		// we don't allow null argument (or risk NPE or
		// IllegalArgumentException later at several
		// points.
		Assert.isNotNull(detectedCharsetName, "illegal charset argument. it can not be null"); //$NON-NLS-1$
		String result = detectedCharsetName;
		// 1. Check explicit mapping overrides from
		// property file
		result = CodedIO.checkMappingOverrides(detectedCharsetName);
		// 2. Use the "canonical" name from JRE mappings
		// Note: see Charset JavaDoc, the name you get one
		// with can be alias,
		// the name you get back is "standard" name.
		Charset javaCharset = null;
		// Note: this will immediatly throw
		// "UnsuppotedCharsetException" if it
		// invalid. Issue: Is it more client friendly to
		// eat that exception and return null?
		javaCharset = Charset.forName(result);
		if (javaCharset != null) {
			result = javaCharset.name();
		}
		return result;
	}

	/**
	 * @return Returns the overridenCharsets.
	 */
	private static Properties getOverridenCharsets() {
		if (overridenCharsets == null) {
			overridenCharsets = new Properties();
			Bundle keyBundle = Platform.getBundle(ICodedResourcePlugin.ID);
			IPath keyPath = new Path("config/override.properties"); //$NON-NLS-1$
			URL location = Platform.find(keyBundle, keyPath);
			InputStream propertiesInputStream = null;
			try {
				propertiesInputStream = location.openStream();
				overridenCharsets.load(propertiesInputStream);
			} catch (IOException e) {
				// if can't read, just assume there's no
				// overrides
				// and repeated attempts will not occur,
				// since they
				// will be represented by an empty
				// Properties object
			}
		}
		return overridenCharsets;
	}

	/**
	 * This class need not be instantiated (though its subclasses can be).
	 */
	protected CodedIO() {
		super();
	}

	protected EncodingMemento createMemento(IContentDescription contentDescription) {
		EncodingMemento result;
		String appropriateDefault = contentDescription.getContentType().getDefaultCharset();
		String detectedCharset = (String) contentDescription.getProperty(IContentDescriptionExtended.DETECTED_CHARSET);
		String unSupportedCharset = (String) contentDescription.getProperty(IContentDescriptionExtended.UNSUPPORTED_CHARSET);
		String javaCharset = contentDescription.getCharset();
		// integrity checks for debugging
		if (javaCharset == null) {
			Logger.log(Logger.INFO_DEBUG, "charset equaled null!"); //$NON-NLS-1$
		} else if (javaCharset.length() == 0) {
			Logger.log(Logger.INFO_DEBUG, "charset equaled emptyString!"); //$NON-NLS-1$
		}
		byte[] BOM = (byte[]) contentDescription.getProperty(IContentDescription.BYTE_ORDER_MARK);
		//result = (EncodingMemento)
		// contentDescription.getProperty(IContentDescriptionExtended.ENCODING_MEMENTO);
		result = createEncodingMemento(BOM, javaCharset, detectedCharset, unSupportedCharset, appropriateDefault, null);
		if (!result.isValid()) {
			result.setAppropriateDefault(appropriateDefault);
			// integrity check for debugging "invalid" cases.
			// the apprriate default we have, should equal what's in the
			// detected field. (not sure this is always required)
			if (DEBUG && appropriateDefault != null && !appropriateDefault.equals(detectedCharset)) {
				Logger.log(Logger.INFO_DEBUG, "appropriate did not equal detected, as expected for invalid charset case"); //$NON-NLS-1$
			}
		}
		return result;
	}
}