/******************************************************************************* * Copyright (c) 2001, 2008 IBM Corporation and others. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * IBM Corporation - initial API and implementation * Jens Lukowski/Innoopract - initial renaming/restructuring * *******************************************************************************/ package org.eclipse.wst.sse.core.internal.encoding; import org.eclipse.core.runtime.content.IContentDescription; /** * This class is to simply hold information and data about the type of * encoding found for a resource. It not only includes names, etc., but also * gives hints about the algorithm, or rule, that the encodng was determined. * Having all this info in a central object, associated with the Document * (technically, IStructuredDocument), allows for better user error messages, * and better handling of knowing how to dump a file, given we know how it was * loaded. * * Note: the data in this class is only valid if its has actually gone through * the loading or dumping sequence. It is not accurate, for example, if a * structuredDocument is simply created and then setText called. In this type * of case, accuracy for loading and dumping is not required, since its all * re-discovered. One limitation is that structuredDocument's created "from * scratch" this way, don't have any encoding information to count on, and * would have to arrange the processing to be done. (And it is done, * automatically if going through loader or dumper, but perhaps not in future * new uses. TODO: this can be inproved in future versions.) * * isInitialized is set when the loader or dumper processes have been used, * but even this can't be counted on 100% if the document has been modified * since. * */ public class EncodingMemento implements Cloneable { public final static String CLONED = "cloned"; //$NON-NLS-1$ public final static String DEFAULTS_ASSUMED_FOR_EMPTY_INPUT = "DefaultsAssumedForEmptyInput"; //$NON-NLS-1$ public final static String DEFAULTS_USED_DUE_TO_SMALL_STREAM = "defaultsUsedDueToSmallStream"; //$NON-NLS-1$ /* * Strings to be used for tracing. TODO: need to clean this up, we no * longer use all of them */ public final static String DETECTED_STANDARD_UNICODE_BYTES = "detectedStandardUnicodeBytes"; //$NON-NLS-1$ public final static String FOUND_ENCODING_IN_CONTENT = "foundEncodingInContent"; //$NON-NLS-1$ public final static String FOUND_ENCODING_IN_STREAM = "foundEncodingInStream"; //$NON-NLS-1$ public final static String FOUND_ENCODING_IN_STRUCTURED_DOCUMENT = "foundEncodingInStructuredDocument"; //$NON-NLS-1$ public final static String GUESSED_ENCODING_FROM_STREAM = "GuessEncodingFromStream"; //$NON-NLS-1$ public final static String JAVA_NAME_FOUND_AS_IANA_NAME = "noMappingFoundButJavaNameFoundToBeIANAName"; //$NON-NLS-1$ public final static String JAVA_NAME_FOUND_IN_ALIAS_NAME = "noMappingFoundButJavaNameFoundInAliasTable"; //$NON-NLS-1$ public final static String NO_IANA_NAME_FOUND = "noMappingFoundFromJavaNameToIANAName"; //$NON-NLS-1$ public final static String USED_CONTENT_TYPE_DEFAULT = "UsedContentTypeDefault"; //$NON-NLS-1$ public final static String USED_JAVA_DEFAULT = "UsedJavaDefault"; //$NON-NLS-1$ public final static String USED_MEMENTO_FROM_LOAD = "usedMementoFromLoad"; //$NON-NLS-1$ public final static String USED_PROPERTY_SETTINGS = "USED_PROPERTY_SETTINGS"; //$NON-NLS-1$ public final static String USED_USER_SPECIFIED_PREFERENCE = "UsedUserSpecifiedPreference"; //$NON-NLS-1$ public final static String USED_WORKSPACE_DEFAULT = "UsedWorkspaceDefault"; //$NON-NLS-1$ public final static String USER_IS_USING_JAVA_ENCODING = "UserIsUsingJavaEncoding"; //$NON-NLS-1$ private String fAppropriateDefault; private String fDetectedCharsetName; private String fInvalidEncoding; private String fJavaCharsetName; private boolean fUnicodeStream; private boolean fUTF83ByteBOMUsed; private byte[] fBOM; public EncodingMemento() { super(); } /** * Returns a clone of this object. */ public Object clone() { EncodingMemento object = null; try { object = (EncodingMemento) super.clone(); } catch (CloneNotSupportedException e) { // impossible, since we're implementing here } return object; } /** * Returns the appropriateDefault. This is only set if an invalid encoding * was found, and contains an charset appropriate to use as a default * value, if, for example, the user decides to load the document anyway, * even though the charset was found to be invalid. * * @return String */ public String getAppropriateDefault() { if (fAppropriateDefault == null) { fAppropriateDefault = NonContentBasedEncodingRules.useDefaultNameRules(null); } return fAppropriateDefault; } /** * Returns the charset name, if it is different from the charset name * found in getJavaCharsetName. This can happen, for example, if there are * differences in case. This method might return SHIFT_JIS, and the the * getJavaCharsetName might return Shift_JIS -- if SHIFT_JIS was detected * in file/document. If the original file contained the correct case, then * this method would return null. The getJavaCharsetName is typically the * one that should always be used, and this one only used for certain * error conditions, or or if when creating a "duplicate" resource, it was * desired to use exactly the charset name as in the original document. As * an example of this later case, the original document might contain * ISO-8859-9, but the detected charset name might contain ISO-8859-9-I. * * @return String */ public String getDetectedCharsetName() { return fDetectedCharsetName; } /** * Returns a charset name that was detected, but not found to be a charset * suppoorted by the VM. * * @return String */ public String getInvalidEncoding() { return fInvalidEncoding; } /** * Returns the java cononical charset name. * * @return String */ public String getJavaCharsetName() { return fJavaCharsetName; } /** * Note: we may be able to remove this method, if it turns out this work * is done by "text" type. * * @deprecated - */ public byte[] getUnicodeBOM() { byte[] bom = null; if (isUTF83ByteBOMUsed()) bom = IContentDescription.BOM_UTF_8; else if (isUnicodeStream()) bom = fBOM; return bom; } /** * Note: in our implementation, the stream is a unicode stream if the * charset is UTF-16, UTF-16LE, or UTF-16BE. A stream with 3 byte BOM is * not considered unicode stream here. * * @return returns true if is a unicode (UTF-16) stream */ public boolean isUnicodeStream() { return fUnicodeStream; } /** * Note: in our implementation, the stream is a unicode stream if the * charset is UTF-16, UTF-16LE, or UTF-16BE. A stream with 3 byte BOM is * not considered unicode stream here. * * Set during load, can be used by dumper to write 3 byte BOM, which Java * does not normally do. This helps maintain compatibility with other * programs (those that wrote the 3 byte BOM there to begin with. * * @return boolean */ public boolean isUTF83ByteBOMUsed() { return fUTF83ByteBOMUsed; } public boolean isValid() { return getInvalidEncoding() == null; } /** * Sets the appropriateDefault. * * @param appropriateDefault * The appropriateDefault to set */ public void setAppropriateDefault(String appropriateDefault) { fAppropriateDefault = appropriateDefault; } public void setDetectedCharsetName(String detectedCharsetName) { fDetectedCharsetName = detectedCharsetName; } public void setInvalidEncoding(String invalidEncoding) { fInvalidEncoding = invalidEncoding; } /** * Sets the javaEncodingName. * * @param javaEncodingName * The javaEncodingName to set */ public void setJavaCharsetName(String javaCharsetName) { fJavaCharsetName = javaCharsetName; } /** * @param b */ public void setUnicodeStream(boolean unicodeStream) { fUnicodeStream = unicodeStream; } /** * Sets the uTF83ByteBOMfound. * * @param uTF83ByteBOMfound * The uTF83ByteBOMfound to set */ public void setUTF83ByteBOMUsed(boolean uTF83ByteBOMUsed) { fUTF83ByteBOMUsed = uTF83ByteBOMUsed; } public void setUnicodeBOM(byte[] bom) { fBOM = bom; } }