EncodingMemento.java example

Explorer
webtools.sourceediting-master
/*******************************************************************************
 * Copyright (c) 2001, 2008 IBM Corporation and others.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 *     IBM Corporation - initial API and implementation
 *     Jens Lukowski/Innoopract - initial renaming/restructuring
 *     
 *******************************************************************************/
package org.eclipse.wst.sse.core.internal.encoding;

import org.eclipse.core.runtime.content.IContentDescription;


/**
 * This class is to simply hold information and data about the type of
 * encoding found for a resource. It not only includes names, etc., but also
 * gives hints about the algorithm, or rule, that the encodng was determined.
 * Having all this info in a central object, associated with the Document
 * (technically, IStructuredDocument), allows for better user error messages,
 * and better handling of knowing how to dump a file, given we know how it was
 * loaded.
 * 
 * Note: the data in this class is only valid if its has actually gone through
 * the loading or dumping sequence. It is not accurate, for example, if a
 * structuredDocument is simply created and then setText called. In this type
 * of case, accuracy for loading and dumping is not required, since its all
 * re-discovered. One limitation is that structuredDocument's created "from
 * scratch" this way, don't have any encoding information to count on, and
 * would have to arrange the processing to be done. (And it is done,
 * automatically if going through loader or dumper, but perhaps not in future
 * new uses. TODO: this can be inproved in future versions.)
 * 
 * isInitialized is set when the loader or dumper processes have been used,
 * but even this can't be counted on 100% if the document has been modified
 * since.
 * 
 */
public class EncodingMemento implements Cloneable {

	public final static String CLONED = "cloned"; //$NON-NLS-1$
	public final static String DEFAULTS_ASSUMED_FOR_EMPTY_INPUT = "DefaultsAssumedForEmptyInput"; //$NON-NLS-1$
	public final static String DEFAULTS_USED_DUE_TO_SMALL_STREAM = "defaultsUsedDueToSmallStream"; //$NON-NLS-1$


	/*
	 * Strings to be used for tracing. TODO: need to clean this up, we no
	 * longer use all of them
	 */
	public final static String DETECTED_STANDARD_UNICODE_BYTES = "detectedStandardUnicodeBytes"; //$NON-NLS-1$
	public final static String FOUND_ENCODING_IN_CONTENT = "foundEncodingInContent"; //$NON-NLS-1$
	public final static String FOUND_ENCODING_IN_STREAM = "foundEncodingInStream"; //$NON-NLS-1$
	public final static String FOUND_ENCODING_IN_STRUCTURED_DOCUMENT = "foundEncodingInStructuredDocument"; //$NON-NLS-1$
	public final static String GUESSED_ENCODING_FROM_STREAM = "GuessEncodingFromStream"; //$NON-NLS-1$
	public final static String JAVA_NAME_FOUND_AS_IANA_NAME = "noMappingFoundButJavaNameFoundToBeIANAName"; //$NON-NLS-1$
	public final static String JAVA_NAME_FOUND_IN_ALIAS_NAME = "noMappingFoundButJavaNameFoundInAliasTable"; //$NON-NLS-1$
	public final static String NO_IANA_NAME_FOUND = "noMappingFoundFromJavaNameToIANAName"; //$NON-NLS-1$
	public final static String USED_CONTENT_TYPE_DEFAULT = "UsedContentTypeDefault"; //$NON-NLS-1$
	public final static String USED_JAVA_DEFAULT = "UsedJavaDefault"; //$NON-NLS-1$
	public final static String USED_MEMENTO_FROM_LOAD = "usedMementoFromLoad"; //$NON-NLS-1$
	public final static String USED_PROPERTY_SETTINGS = "USED_PROPERTY_SETTINGS"; //$NON-NLS-1$
	public final static String USED_USER_SPECIFIED_PREFERENCE = "UsedUserSpecifiedPreference"; //$NON-NLS-1$
	public final static String USED_WORKSPACE_DEFAULT = "UsedWorkspaceDefault"; //$NON-NLS-1$
	public final static String USER_IS_USING_JAVA_ENCODING = "UserIsUsingJavaEncoding"; //$NON-NLS-1$
	private String fAppropriateDefault;
	private String fDetectedCharsetName;
	private String fInvalidEncoding;


	private String fJavaCharsetName;
	private boolean fUnicodeStream;
	private boolean fUTF83ByteBOMUsed;
	
	private byte[] fBOM;

	public EncodingMemento() {
		super();
	}

	/**
	 * Returns a clone of this object.
	 */
	public Object clone() {
		EncodingMemento object = null;
		try {
			object = (EncodingMemento) super.clone();
		}
		catch (CloneNotSupportedException e) {
			// impossible, since we're implementing here
		}

		return object;

	}

	/**
	 * Returns the appropriateDefault. This is only set if an invalid encoding
	 * was found, and contains an charset appropriate to use as a default
	 * value, if, for example, the user decides to load the document anyway,
	 * even though the charset was found to be invalid.
	 * 
	 * @return String
	 */
	public String getAppropriateDefault() {
		if (fAppropriateDefault == null) {
			fAppropriateDefault = NonContentBasedEncodingRules.useDefaultNameRules(null);
		}
		return fAppropriateDefault;
	}

	/**
	 * Returns the charset name, if it is different from the charset name
	 * found in getJavaCharsetName. This can happen, for example, if there are
	 * differences in case. This method might return SHIFT_JIS, and the the
	 * getJavaCharsetName might return Shift_JIS -- if SHIFT_JIS was detected
	 * in file/document. If the original file contained the correct case, then
	 * this method would return null. The getJavaCharsetName is typically the
	 * one that should always be used, and this one only used for certain
	 * error conditions, or or if when creating a "duplicate" resource, it was
	 * desired to use exactly the charset name as in the original document. As
	 * an example of this later case, the original document might contain
	 * ISO-8859-9, but the detected charset name might contain ISO-8859-9-I.
	 * 
	 * @return String
	 */
	public String getDetectedCharsetName() {
		return fDetectedCharsetName;
	}

	/**
	 * Returns a charset name that was detected, but not found to be a charset
	 * suppoorted by the VM.
	 * 
	 * @return String
	 */
	public String getInvalidEncoding() {
		return fInvalidEncoding;
	}

	/**
	 * Returns the java cononical charset name.
	 * 
	 * @return String
	 */
	public String getJavaCharsetName() {
		return fJavaCharsetName;
	}

	/**
	 * Note: we may be able to remove this method, if it turns out this work
	 * is done by "text" type.
	 * 
	 * @deprecated -
	 */
	public byte[] getUnicodeBOM() {
		byte[] bom = null;
		if (isUTF83ByteBOMUsed())
			bom = IContentDescription.BOM_UTF_8;
		else if (isUnicodeStream())
			bom = fBOM;
		return bom;
	}

	/**
	 * Note: in our implementation, the stream is a unicode stream if the
	 * charset is UTF-16, UTF-16LE, or UTF-16BE. A stream with 3 byte BOM is
	 * not considered unicode stream here.
	 * 
	 * @return returns true if is a unicode (UTF-16) stream
	 */
	public boolean isUnicodeStream() {
		return fUnicodeStream;
	}

	/**
	 * Note: in our implementation, the stream is a unicode stream if the
	 * charset is UTF-16, UTF-16LE, or UTF-16BE. A stream with 3 byte BOM is
	 * not considered unicode stream here.
	 * 
	 * Set during load, can be used by dumper to write 3 byte BOM, which Java
	 * does not normally do. This helps maintain compatibility with other
	 * programs (those that wrote the 3 byte BOM there to begin with.
	 * 
	 * @return boolean
	 */
	public boolean isUTF83ByteBOMUsed() {
		return fUTF83ByteBOMUsed;
	}

	public boolean isValid() {
		return getInvalidEncoding() == null;
	}

	/**
	 * Sets the appropriateDefault.
	 * 
	 * @param appropriateDefault
	 *            The appropriateDefault to set
	 */
	public void setAppropriateDefault(String appropriateDefault) {
		fAppropriateDefault = appropriateDefault;
	}


	public void setDetectedCharsetName(String detectedCharsetName) {
		fDetectedCharsetName = detectedCharsetName;
	}

	public void setInvalidEncoding(String invalidEncoding) {
		fInvalidEncoding = invalidEncoding;
	}

	/**
	 * Sets the javaEncodingName.
	 * 
	 * @param javaEncodingName
	 *            The javaEncodingName to set
	 */
	public void setJavaCharsetName(String javaCharsetName) {
		fJavaCharsetName = javaCharsetName;
	}

	/**
	 * @param b
	 */
	public void setUnicodeStream(boolean unicodeStream) {
		fUnicodeStream = unicodeStream;

	}

	/**
	 * Sets the uTF83ByteBOMfound.
	 * 
	 * @param uTF83ByteBOMfound
	 *            The uTF83ByteBOMfound to set
	 */
	public void setUTF83ByteBOMUsed(boolean uTF83ByteBOMUsed) {
		fUTF83ByteBOMUsed = uTF83ByteBOMUsed;
	}

	public void setUnicodeBOM(byte[] bom) {
		fBOM = bom;
	}
}