CodedReaderCreator.java example

Explorer
webtools.sourceediting-master
/*******************************************************************************
 * Copyright (c) 2001, 2008 IBM Corporation and others.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 *     IBM Corporation - initial API and implementation
 *     Jens Lukowski/Innoopract - initial renaming/restructuring
 *     
 *******************************************************************************/
package org.eclipse.wst.sse.core.internal.encoding;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;

import org.eclipse.core.resources.IFile;
import org.eclipse.core.runtime.CoreException;
import org.eclipse.core.runtime.IProgressMonitor;
import org.eclipse.core.runtime.IStatus;
import org.eclipse.core.runtime.Platform;
import org.eclipse.core.runtime.Status;
import org.eclipse.core.runtime.content.IContentDescription;
import org.eclipse.core.runtime.content.IContentTypeManager;
import org.eclipse.core.runtime.jobs.Job;
import org.eclipse.wst.sse.core.internal.SSECoreMessages;
import org.eclipse.wst.sse.core.internal.encoding.util.Assert;
import org.eclipse.wst.sse.core.internal.encoding.util.BufferedLimitedStream;
import org.eclipse.wst.sse.core.internal.encoding.util.Logger;
import org.eclipse.wst.sse.core.internal.encoding.util.NullInputStream;
import org.eclipse.wst.sse.core.internal.encoding.util.UnicodeBOMEncodingDetector;
import org.eclipse.wst.sse.core.internal.exceptions.UnsupportedCharsetExceptionWithDetail;


/**
 * The purpose of this class is to centralize analysis of a file to determine
 * the most appropriate rules of decoding it. The intended use is to set the
 * input, then get the reader for that input which will have its encoding set
 * appropriately. Additionally, there is an EncodingMemento provided, which
 * will be required, in some cases, to later determine the most appropriate
 * form of encoded output.
 */
public class CodedReaderCreator extends CodedIO {


	private boolean fClientSuppliedStream;


	private EncodingMemento fEncodingMemento;

	private EncodingRule fEncodingRule;

	private String fFilename;

	private IFile fIFile;


	private InputStream fInputStream;
	
	private static final String CHARSET_UTF_16= "UTF-16"; //$NON-NLS-1$
	
	private static final String CHARSET_UTF_16LE= "UTF-16LE"; //$NON-NLS-1$

	public CodedReaderCreator() {

		super();
	}

	public CodedReaderCreator(IFile file) throws CoreException, IOException {

		this();
		set(file);
		setEncodingRule(EncodingRule.CONTENT_BASED);
	}

	public CodedReaderCreator(IFile file, EncodingRule encodingRule) throws CoreException, IOException {

		this();
		set(file);
		setEncodingRule(encodingRule);
	}

	public CodedReaderCreator(String filename, InputStream inputStream) {

		this();
		set(filename, inputStream);
		setEncodingRule(EncodingRule.CONTENT_BASED);
	}

	public CodedReaderCreator(String filename, InputStream inputStream, EncodingRule encodingRule) {

		this();
		set(filename, inputStream);
		setEncodingRule(encodingRule);
	}

	private EncodingMemento checkForEncodingInContents(InputStream limitedStream) throws CoreException, IOException {
		EncodingMemento result = null;

		// if encoding memento already set, then iFile must
		// have been set, and no need to get again.
		if (fEncodingMemento != null) {
			result = fEncodingMemento;
		}
		else {
			if (fClientSuppliedStream) {
				try {
					limitedStream.reset();
					IContentTypeManager contentTypeManager = Platform.getContentTypeManager();
					IContentDescription contentDescription = contentTypeManager.getDescriptionFor(limitedStream, fFilename, IContentDescription.ALL);
					if (contentDescription != null) {
						fEncodingMemento = createMemento(contentDescription);
					}
					result = fEncodingMemento;
				}
				finally {
					limitedStream.reset();
				}
			}
			else {
				// throw new IllegalStateException("unexpected state:
				// encodingMemento was null but no input stream supplied by
				// client"); //$NON-NLS-1$
				result = null;
			}
		}

		if (result != null && !result.isValid() && !forceDefault()) {
			throw new UnsupportedCharsetExceptionWithDetail(result);
		}

		return result;
	}

	/**
	 * @param resettableLimitedStream
	 */
	private EncodingMemento checkStreamForBOM(InputStream resettableLimitedStream) {
		EncodingMemento result = null;
		UnicodeBOMEncodingDetector unicodeBOMEncodingDetector = new UnicodeBOMEncodingDetector();
		unicodeBOMEncodingDetector.set(resettableLimitedStream);
		result = unicodeBOMEncodingDetector.getEncodingMemento();
		return result;
	}

	/**
	 * @param iFile
	 * @throws CoreException
	 * @throws IOException
	 */
	private EncodingMemento findMementoFromFileCase() throws CoreException, IOException {
		EncodingMemento result = null;
		IContentDescription contentDescription = null;
		try {
			// This method provides possible improved performance at the
			// cost of sometimes returning null
			if (fIFile.exists())
				contentDescription = fIFile.getContentDescription();
		}
		catch (CoreException e) {
			// Assume if core exception occurs, we can still try more
			// expensive
			// discovery options.
			Logger.logException(e);
		}
		if (contentDescription == null && fIFile.isAccessible()) {
			InputStream contents = null;
			try {
				contents = fIFile.getContents();
				contentDescription = Platform.getContentTypeManager().getDescriptionFor(contents, fIFile.getName(), IContentDescription.ALL);
			}
			catch (CoreException e1) {
				// Assume if core exception occurs, we can't really do much
				// with
				// determining encoding, etc.
				Logger.logException(e1);
				throw e1;
			}
			catch (IOException e2) {
				// We likely couldn't get the contents of the file, something
				// is really wrong
				Logger.logException(e2);
				throw e2;
			}
			if (contents != null) {
				try {
					contents.close();
				}
				catch (IOException e2) {
					Logger.logException(e2);
				}
			}
		}
		if (contentDescription != null) {
			result = createMemento(contentDescription);
		}

		return result;
	}

	/**
	 * The primary method which contains the highest level rules for how to
	 * decide appropriate decoding rules: 1. first check for unicode stream 2.
	 * then looked for encoding specified in content (according to the type of
	 * content that is it ... xml, html, jsp, etc. 3. then check for various
	 * settings: file settings first, if null check project settings, if null,
	 * check user preferences. 4. lastly (or, what is the last user
	 * preference) is to use "workbench defaults".
	 * 
	 * @throws IOException
	 * @throws CoreException
	 */
	private EncodingMemento findMementoFromStreamCase() throws CoreException, IOException {

		EncodingMemento result = null;
		InputStream resettableLimitedStream = null;
		try {
			resettableLimitedStream = getLimitedStream(getResettableStream());
			if (resettableLimitedStream != null) {
				// first check for unicode stream
				result = checkStreamForBOM(resettableLimitedStream);
				// if not that, then check contents
				if (result == null) {
					resettableLimitedStream.reset();
					result = checkForEncodingInContents(resettableLimitedStream);
				}

			}
			else {
				// stream null, may name's not.
				if (fFilename != null) {
					// filename not null
					IContentTypeManager contentTypeManager = Platform.getContentTypeManager();
					IContentDescription contentDescription = contentTypeManager.getDescriptionFor(new NullInputStream(), fFilename, IContentDescription.ALL);
					if (contentDescription != null) {
						result = createMemento(contentDescription);
					}
				}
			}
		}
		finally {
			if (resettableLimitedStream != null) {
				handleStreamClose(resettableLimitedStream);
			}
		}
		return result;
	}

	private boolean forceDefault() {

		boolean result = false;
		if (fEncodingRule != null && fEncodingRule == EncodingRule.FORCE_DEFAULT)
			result = true;
		return result;
	}

	public Reader getCodedReader() throws CoreException, IOException {

		Reader result = null;
		// we make a local copy of encoding memento so
		// stream won't
		// be accessed simultaneously.
		EncodingMemento encodingMemento = getEncodingMemento();
		Assert.isNotNull(encodingMemento, "Appears reader requested before file or stream set"); //$NON-NLS-1$
		InputStream streamToReturn = getResettableStream();
		streamToReturn.reset();
		// if UTF 3 byte BOM is used (or UTF-16LE), the
		// built in converters
		// don't
		// correct skip all three bytes ... so skip
		// remaining one to leave
		// stream transparently ready for client.
		// see ... TODO look up bug number
		if (encodingMemento.isUnicodeStream()) {
			streamToReturn.skip(2);
		}
		else if (encodingMemento.isUTF83ByteBOMUsed()) {
			streamToReturn.skip(3);
		}
		String charsetName = encodingMemento.getJavaCharsetName();
		if (charsetName == null) {
			charsetName = encodingMemento.getDetectedCharsetName();
		}
		if (!encodingMemento.isValid() && !forceDefault()) {
			throw new UnsupportedCharsetExceptionWithDetail(encodingMemento);
		}

		if (fEncodingRule == EncodingRule.FORCE_DEFAULT) {
			charsetName = encodingMemento.getAppropriateDefault();
		}
		
		// [228366] For files that have a unicode BOM, and a charset name of UTF-16, the charset decoder needs "UTF-16LE"
		if(CHARSET_UTF_16.equals(charsetName) && encodingMemento.getUnicodeBOM() == IContentDescription.BOM_UTF_16LE)
			charsetName = CHARSET_UTF_16LE;
		
		Charset charset = Charset.forName(charsetName);
		CharsetDecoder charsetDecoder = charset.newDecoder();
		if (fEncodingRule == EncodingRule.IGNORE_CONVERSION_ERROR) {
			charsetDecoder.onMalformedInput(CodingErrorAction.REPLACE);
			charsetDecoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
		}
		else {
			charsetDecoder.onMalformedInput(CodingErrorAction.REPORT);
			charsetDecoder.onUnmappableCharacter(CodingErrorAction.REPORT);
		}
		// more efficient to be buffered, and I know of no
		// reason not to return
		// that directly.
		result = new BufferedReader(new InputStreamReader(streamToReturn, charsetDecoder), CodedIO.MAX_BUF_SIZE);
		result.mark(CodedIO.MAX_BUF_SIZE);
		return result;
	}

	public EncodingMemento getEncodingMemento() throws CoreException, IOException {
		// figure out encoding memento from encoding strategy
		if (fEncodingMemento == null) {
			if (fClientSuppliedStream) {
				fEncodingMemento = findMementoFromStreamCase();
			}
			else if (fIFile != null) {
				fEncodingMemento = findMementoFromFileCase();
			}
		}

		// if encoding stratagy doesn't provide answer,
		// then try file settings, project settings,
		// user preferences, and
		// finally workbench default.
		//
		if (fEncodingMemento == null || fEncodingMemento.getDetectedCharsetName() == null) {
			fEncodingMemento = getEncodingMementoFromResourceAndPreference();
		}

		// use DefaultNameRules from NonContentBasedEncodingRules as the final
		// default
		if (fEncodingMemento == null) {
			fEncodingMemento = handleNotProvidedFromContentCase();
		}

		return fEncodingMemento;
	}

	/*
	 * This method is called only when encoding is not detected in the file.
	 * 
	 * Here is encoding lookup order we will try: - try resource content
	 * description (Eclipse Text file encoding) - try resource content
	 * properties (for JSP only) - try content type encoding preferences (for
	 * HTML only) - try resource content description (Eclipse Text file
	 * encoding, implicit check)
	 * 
	 * Note: This method appears in both CodedReaderCreator and
	 * CodedStreamCreator (with just a minor difference). They should be kept
	 * the same.
	 */
	private EncodingMemento getEncodingMementoFromResourceAndPreference() throws IOException, CoreException {
		EncodingMemento encodingMemento = fEncodingMemento;

		// Follow Eclipse Platform's direction. Get the charset from IFile.
		if (fIFile != null) {
			String charset = fIFile.getCharset();
			encodingMemento = CodedIO.createEncodingMemento(charset);
		}

		return encodingMemento;
	}

	/**
	 * Ensures that an InputStream has mark/reset support, is readlimit is
	 * set, and that the stream is "limitable" (that is, reports "end of
	 * input" rather than allow going past mark). This is very specialized
	 * stream introduced to overcome
	 * https://bugs.eclipse.org/bugs/show_bug.cgi?id=67211. See also
	 * https://bugs.eclipse.org/bugs/show_bug.cgi?id=68565
	 */
	private InputStream getLimitedStream(InputStream original) {
		if (original == null)
			return null;
		if (original instanceof BufferedLimitedStream)
			return original;
		InputStream s = new BufferedLimitedStream(original, CodedIO.MAX_MARK_SIZE);
		s.mark(CodedIO.MAX_MARK_SIZE);
		return s;
	}

	private InputStream getResettableStream() throws CoreException, IOException {

		InputStream resettableStream = null;

		if (fIFile != null) {
			InputStream inputStream = null;
			try {
				// note we always get contents, even if out of synch
				inputStream = fIFile.getContents(true);
			}
			catch (CoreException e) {
				// SHOULD actually check for existence of
				// fIStorage, but
				// for now will just assume core exception
				// means it
				// doesn't exist on file system, yet.
				// and we'll log, just in case its a noteable error
				Logger.logException(e);
				inputStream = new NullInputStream();
			}
			resettableStream = new BufferedInputStream(inputStream, CodedIO.MAX_BUF_SIZE);
		}
		else {
			if (fInputStream != null) {
				if (fInputStream.markSupported()) {
					resettableStream = fInputStream;
					// try {
					resettableStream.reset();
					// }
					// catch (IOException e) {
					// // assumed just hasn't been marked yet, so ignore
					// }
				}
				else {
					resettableStream = new BufferedInputStream(fInputStream, CodedIO.MAX_BUF_SIZE);
				}
			}
		}

		if (resettableStream == null) {
			resettableStream = new NullInputStream();
		}

		// mark this once, stream at "zero" position
		resettableStream.mark(MAX_MARK_SIZE);
		return resettableStream;
	}

	private EncodingMemento handleNotProvidedFromContentCase() {

		EncodingMemento result = null;
		String specDefault = null;
		// try {
		// specDefault = getEncodingDetector().getSpecDefaultEncoding();
		// }
		// catch (CoreException e) {
		// // If this exception occurs, assumes there is
		// // no specDefault
		// }
		// catch (IOException e) {
		// // If this exception occurs, assumes there is
		// // no specDefault
		// }
		// finally {
		// try {
		// handleStreamClose(fEncodingDetectorStream);
		// }
		// catch (IOException e1) {
		// // severe error, not much to do here
		// }
		// }
		// this logic should be moved to 'detection' if not already
		String charset = NonContentBasedEncodingRules.useDefaultNameRules(specDefault);
		Assert.isNotNull(charset, "post condition failed"); //$NON-NLS-1$
		result = CodedIO.createEncodingMemento(charset);
		return result;
	}

	/**
	 * @param resettableInputStream
	 * @throws IOException
	 */
	private void handleStreamClose(InputStream resettableInputStream) throws IOException {

		if (resettableInputStream != null) {
			if (fClientSuppliedStream) {
				resettableInputStream.reset();
			}
			else {

				resettableInputStream.close();
			}
		}
	}

	// TODO We just copy the content properties encoding to current resource's
	// encoding for now. May improve the UI later by setting an informational
	// message and/or disable the content properties encoding field.
	// TODO: remake private else remove
	void migrateContentPropertiesEncoding(String encoding) throws CoreException {
		final IFile file = fIFile;
		final String charset = encoding;
		// TODO: externalize string later
		Job migrater = new Job(SSECoreMessages.Migrate_Charset) { //$NON-NLS-1$
			protected IStatus run(IProgressMonitor monitor) {
				if (file != null) {
					try {
						file.setCharset(charset, null);
					}
					catch (CoreException e) {
						Logger.logException(e);
					}
				}
				return Status.OK_STATUS;
			}
		};
		migrater.setSystem(true);
		migrater.schedule();
	}

	private void resetAll() {

		fEncodingRule = null;
		fIFile = null;
		fFilename = null;
		fInputStream = null;
		fEncodingMemento = null;
		fClientSuppliedStream = false;
	}

	public void set(IFile iFile) throws CoreException, IOException {
		Assert.isNotNull(iFile, "illegal argument"); //$NON-NLS-1$
		resetAll();
		fIFile = iFile;
	}

	public void set(String filename, InputStream inputStream) {

		resetAll();
		fFilename = filename;
		fInputStream = inputStream;
		fClientSuppliedStream = true;
	}

	public void setEncodingRule(EncodingRule encodingRule) {

		fEncodingRule = encodingRule;
	}
}