AbstractDocumentLoader.java example

Explorer
webtools.sourceediting-master
/*******************************************************************************
 * Copyright (c) 2001, 2006 IBM Corporation and others.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 *     IBM Corporation - initial API and implementation
 *     Jens Lukowski/Innoopract - initial renaming/restructuring
 *     
 *******************************************************************************/
package org.eclipse.wst.sse.core.internal.document;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.nio.charset.MalformedInputException;
import java.nio.charset.UnmappableCharacterException;

import org.eclipse.core.resources.IFile;
import org.eclipse.core.resources.ProjectScope;
import org.eclipse.core.runtime.CoreException;
import org.eclipse.core.runtime.Platform;
import org.eclipse.core.runtime.preferences.IScopeContext;
import org.eclipse.core.runtime.preferences.InstanceScope;
import org.eclipse.jface.text.Document;
import org.eclipse.jface.text.IDocument;
import org.eclipse.jface.text.IDocumentExtension3;
import org.eclipse.jface.text.IDocumentPartitioner;
import org.eclipse.wst.sse.core.internal.encoding.CodedIO;
import org.eclipse.wst.sse.core.internal.encoding.CodedReaderCreator;
import org.eclipse.wst.sse.core.internal.encoding.ContentTypeEncodingPreferences;
import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento;
import org.eclipse.wst.sse.core.internal.encoding.EncodingRule;
import org.eclipse.wst.sse.core.internal.exceptions.MalformedInputExceptionWithDetail;
import org.eclipse.wst.sse.core.internal.provisional.document.IEncodedDocument;
import org.eclipse.wst.sse.core.internal.provisional.text.IStructuredPartitioning;



/**
 * This class reads a file and creates an Structured Model.
 */
public abstract class AbstractDocumentLoader implements IDocumentLoader {

	private CodedReaderCreator fCodedReaderCreator;
	protected IDocumentCharsetDetector fDocumentEncodingDetector;
	// private boolean fPropertiesObtained;

	protected EncodingMemento fEncodingMemento;
	protected Reader fFullPreparedReader;

	/**
	 * AbstractLoader constructor also initializes encoding converter/mapper
	 */
	public AbstractDocumentLoader() {
		super();
	}

	protected final StringBuffer convertLineDelimiters(StringBuffer allTextBuffer, String lineDelimiterToUse) {
		// TODO: avoid use of String instance
		String allText = allTextBuffer.toString();
		IDocument tempDoc = new Document(allText);
		if (lineDelimiterToUse == null)
			lineDelimiterToUse = System.getProperty("line.separator"); //$NON-NLS-1$
		StringBuffer newText = new StringBuffer();
		int lineCount = tempDoc.getNumberOfLines();
		for (int i = 0; i < lineCount; i++) {
			try {
				org.eclipse.jface.text.IRegion lineInfo = tempDoc.getLineInformation(i);
				int lineStartOffset = lineInfo.getOffset();
				int lineLength = lineInfo.getLength();
				int lineEndOffset = lineStartOffset + lineLength;
				newText.append(allText.substring(lineStartOffset, lineEndOffset));
				if ((i < lineCount - 1) && (tempDoc.getLineDelimiter(i) != null))
					newText.append(lineDelimiterToUse);
			}
			catch (org.eclipse.jface.text.BadLocationException exception) {
				// should fix up to either throw nothing, or the right thing,
				// but
				// in the course of refactoring, this was easiest "quick fix".
				throw new RuntimeException(exception);
			}
		}
		return newText;
	}

	/**
	 * This method must return a new instance of IEncodedDocument, that has
	 * been initialized with appropriate parser. For many loaders, the
	 * (default) parser used is known for any input. For others, the correct
	 * parser (and its initialization) is normally dependent on the content of
	 * the file. This no-argument method should assume "empty input" and would
	 * therefore return the default parser for the default contentType.
	 */
	public IEncodedDocument createNewStructuredDocument() {
		IEncodedDocument structuredDocument = newEncodedDocument();
		// Make sure every structuredDocument has an Encoding Memento,
		// which is the default one for "empty" structuredDocuments
		String charset = ContentTypeEncodingPreferences.useDefaultNameRules(getDocumentEncodingDetector());
		String specDefaultCharset = getDocumentEncodingDetector().getSpecDefaultEncoding();
		structuredDocument.setEncodingMemento(CodedIO.createEncodingMemento(charset, EncodingMemento.DEFAULTS_ASSUMED_FOR_EMPTY_INPUT, specDefaultCharset));

		String lineDelimiter = getPreferredNewLineDelimiter(null);
		if (lineDelimiter != null)
			structuredDocument.setPreferredLineDelimiter(lineDelimiter);

		IDocumentPartitioner defaultPartitioner = getDefaultDocumentPartitioner();
		if (structuredDocument instanceof IDocumentExtension3) {
			((IDocumentExtension3) structuredDocument).setDocumentPartitioner(IStructuredPartitioning.DEFAULT_STRUCTURED_PARTITIONING, defaultPartitioner);
		}
		else {
			structuredDocument.setDocumentPartitioner(defaultPartitioner);
		}
		defaultPartitioner.connect(structuredDocument);

		return structuredDocument;
	}

	/**
	 * This abstract version should handle most cases, but won't if
	 * contentType is sensitive to encoding, and/or embedded types
	 */
	public IEncodedDocument createNewStructuredDocument(IFile iFile) throws IOException, CoreException {
		IEncodedDocument structuredDocument = createNewStructuredDocument();

		String lineDelimiter = getPreferredNewLineDelimiter(iFile);
		if (lineDelimiter != null)
			structuredDocument.setPreferredLineDelimiter(lineDelimiter);

		try {

			CodedReaderCreator creator = getCodedReaderCreator();
			creator.set(iFile);
			fEncodingMemento = creator.getEncodingMemento();
			structuredDocument.setEncodingMemento(fEncodingMemento);
			fFullPreparedReader = getCodedReaderCreator().getCodedReader();

			setDocumentContentsFromReader(structuredDocument, fFullPreparedReader);
		}
		finally {
			if (fFullPreparedReader != null) {
				fFullPreparedReader.close();
			}
		}
		return structuredDocument;
	}

	public IEncodedDocument createNewStructuredDocument(String filename, InputStream inputStream) throws UnsupportedEncodingException, IOException {
		return createNewStructuredDocument(filename, inputStream, EncodingRule.CONTENT_BASED);
	}

	public IEncodedDocument createNewStructuredDocument(String filename, InputStream inputStream, EncodingRule encodingRule) throws UnsupportedEncodingException, IOException {
		if (filename == null && inputStream == null) {
			throw new IllegalArgumentException("can not have both null filename and inputstream"); //$NON-NLS-1$
		}
		IEncodedDocument structuredDocument = createNewStructuredDocument();
		CodedReaderCreator codedReaderCreator = getCodedReaderCreator();
		try {
			codedReaderCreator.set(filename, inputStream);
			codedReaderCreator.setEncodingRule(encodingRule);
			fEncodingMemento = codedReaderCreator.getEncodingMemento();
			fFullPreparedReader = codedReaderCreator.getCodedReader();
			structuredDocument.setEncodingMemento(fEncodingMemento);
			setDocumentContentsFromReader(structuredDocument, fFullPreparedReader);
		}
		catch (CoreException e) {
			// impossible in this context
			throw new Error(e);
		}
		finally {
			if (fFullPreparedReader != null) {
				fFullPreparedReader.close();
			}
		}

		return structuredDocument;
	}

	private int getCharPostionOfFailure(BufferedReader inputStream) {
		int charPosition = 1;
		int charRead = -1;
		boolean errorFound = false;
		do {
			try {
				charRead = inputStream.read();
				charPosition++;
			}
			catch (IOException e) {
				// this is expected, since we're expecting failure,
				// so no need to do anything.
				errorFound = true;
				break;
			}
		}
		while (!(charRead == -1 || errorFound));

		if (errorFound)
			// dmw, blindly modified to +1 to get unit tests to work, moving
			// from Java 1.3, to 1.4
			// not sure how/why this behavior would have changed. (Its as if
			// 'read' is reporting error
			// one character early).
			return charPosition + 1;
		else
			return -1;
	}

	/**
	 * @return Returns the codedReaderCreator.
	 */
	protected CodedReaderCreator getCodedReaderCreator() {
		if (fCodedReaderCreator == null) {
			fCodedReaderCreator = new CodedReaderCreator();
		}
		return fCodedReaderCreator;
	}

	/**
	 * Creates the partitioner to be used with the
	 * IStructuredPartitioning.DEFAULT_STRUCTURED_PARTITIONING partitioning
	 * 
	 * @return IDocumentPartitioner
	 */
	public abstract IDocumentPartitioner getDefaultDocumentPartitioner();

	/**
	 * Returns the encodingMemento.
	 * 
	 * @return EncodingMemento
	 */
	public EncodingMemento getEncodingMemento() {
		if (fEncodingMemento == null) {
			throw new IllegalStateException("Program Error: encodingMemento was accessed before it was set"); //$NON-NLS-1$
		}
		return fEncodingMemento;
	}

	/**
	 * @return Returns the fullPreparedReader.
	 */
	protected Reader getFullPreparedReader() throws UnsupportedEncodingException, CoreException, IOException {
		if (fFullPreparedReader == null) {
			fFullPreparedReader = getCodedReaderCreator().getCodedReader();
		}
		return fFullPreparedReader;
	}

	/**
	 * Returns the default line delimiter preference for the given file.
	 * 
	 * @param file
	 *            the file
	 * @return the default line delimiter
	 * @since 3.1
	 */
	private String getPlatformLineDelimiterPreference(IFile file) {
		IScopeContext[] scopeContext;
		if (file != null && file.getProject() != null) {
			// project preference
			scopeContext = new IScopeContext[]{new ProjectScope(file.getProject())};
			String lineDelimiter = Platform.getPreferencesService().getString(Platform.PI_RUNTIME, Platform.PREF_LINE_SEPARATOR, null, scopeContext);
			if (lineDelimiter != null)
				return lineDelimiter;
		}
		// workspace preference
		scopeContext = new IScopeContext[]{new InstanceScope()};
		return Platform.getPreferencesService().getString(Platform.PI_RUNTIME, Platform.PREF_LINE_SEPARATOR, null, scopeContext);
	}

	/**
	 * @deprecated use getPreferredNewLineDelimiter(IFile) instead
	 */
	protected String getPreferredNewLineDelimiter() {
		return getPreferredNewLineDelimiter(null);
	}

	/**
	 * If subclass doesn't implement, return platform default
	 */
	protected String getPreferredNewLineDelimiter(IFile file) {
		return getPlatformLineDelimiterPreference(file);
	}

	/**
	 * A utility method, but depends on subclasses to impliment the preferred
	 * end of line for a particular content type. Note: subclasses should not
	 * re-implement this method (there's no reason to, even though its part of
	 * interface). This method not only converts end-of-line characters, if
	 * needed, but sets the correct end-of-line delimiter in
	 * structuredDocument. Minor note: can't use this exact method in dumpers,
	 * since the decision to change or not is a little different, and since
	 * there we have to change text of structuredDocument if found to need
	 * conversion. (Where as for loading, we assume we haven't yet set text in
	 * structuredDocument, but will be done by other method just a tiny biy
	 * later). Needs to be public to handle interface. It is in the interface
	 * just so ModelManagerImpl can use it in a special circumstance.
	 */
	public StringBuffer handleLineDelimiter(StringBuffer originalString, IEncodedDocument theFlatModel) {
		// TODO: need to handle line delimiters so Marker Positions are
		// updated
		StringBuffer convertedText = null;
		// based on text, make a guess on what's being used as
		// line delimiter
		String probableLineDelimiter = TextUtilities.determineLineDelimiter(originalString, theFlatModel.getLegalLineDelimiters(), System.getProperty("line.separator")); //$NON-NLS-1$
		String preferredLineDelimiter = getPreferredNewLineDelimiter(null);
		if (preferredLineDelimiter == null) {
			// when preferredLineDelimiter is null, it means "leave alone"
			// so no conversion needed.
			// set here, only if null (should already be set, but if not,
			// we'll set so any subsequent editing inserts what we're
			// assuming)
			if (!theFlatModel.getPreferredLineDelimiter().equals(probableLineDelimiter)) {
				theFlatModel.setPreferredLineDelimiter(probableLineDelimiter);
			}
			convertedText = originalString;
		}
		else {
			if (!preferredLineDelimiter.equals(probableLineDelimiter)) {
				// technically, wouldn't have to convert line delimiters
				// here at beginning, but when we save, if the preferred
				// line delimter is "leave alone" then we do leave alone,
				// so best to be right from beginning.
				convertedText = convertLineDelimiters(originalString, preferredLineDelimiter);
				theFlatModel.setPreferredLineDelimiter(preferredLineDelimiter);
			}
			else {
				// they are already the same, no conversion needed
				theFlatModel.setPreferredLineDelimiter(preferredLineDelimiter);
				convertedText = originalString;
			}
		}
		return convertedText;
	}

	protected abstract IEncodedDocument newEncodedDocument();

	/**
	 * Very mechanical method, just to read the characters, once the reader is
	 * correctly created. Can throw MalFormedInputException.
	 */
	private StringBuffer readInputStream(Reader reader) throws IOException {

		int fBlocksRead = 0;
		StringBuffer buffer = new StringBuffer();
		int numRead = 0;
		try {
			char tBuff[] = new char[CodedIO.MAX_BUF_SIZE];
			while (numRead != -1) {
				numRead = reader.read(tBuff, 0, tBuff.length);
				if (numRead > 0) {
					buffer.append(tBuff, 0, numRead);
					fBlocksRead++;
				}
			}
		}
		catch (MalformedInputException e) {
			throw new MalformedInputExceptionWithDetail(fEncodingMemento.getJavaCharsetName(), fBlocksRead * CodedIO.MAX_BUF_SIZE + numRead + e.getInputLength());
		}
		catch (UnmappableCharacterException e) {
			throw new MalformedInputExceptionWithDetail(fEncodingMemento.getJavaCharsetName(), fBlocksRead * CodedIO.MAX_BUF_SIZE + numRead + e.getInputLength());

		}
		return buffer;
	}

	public void reload(IEncodedDocument encodedDocument, Reader inputStreamReader) throws IOException {
		if (inputStreamReader == null) {
			throw new IllegalArgumentException("stream reader can not be null"); //$NON-NLS-1$
		}
		int READ_BUFFER_SIZE = 8192;
		int MAX_BUFFERED_SIZE_FOR_RESET_MARK = 200000;
		// temp .... eventually we'lll only read as needed
		BufferedReader bufferedReader = new BufferedReader(inputStreamReader, MAX_BUFFERED_SIZE_FOR_RESET_MARK);
		bufferedReader.mark(MAX_BUFFERED_SIZE_FOR_RESET_MARK);
		StringBuffer buffer = new StringBuffer();
		try {
			int numRead = 0;
			char tBuff[] = new char[READ_BUFFER_SIZE];
			while ((numRead = bufferedReader.read(tBuff, 0, tBuff.length)) != -1) {
				buffer.append(tBuff, 0, numRead);
			}
			// remember -- we didn't open stream ... so we don't close it
		}
		catch (MalformedInputException e) {
			// int pos = e.getInputLength();
			EncodingMemento localEncodingMemento = getEncodingMemento();
			boolean couldReset = true;
			String encodingNameInError = localEncodingMemento.getJavaCharsetName();
			if (encodingNameInError == null) {
				encodingNameInError = localEncodingMemento.getDetectedCharsetName();
			}
			try {
				bufferedReader.reset();
			}
			catch (IOException resetException) {
				// the only errro that can occur during reset is an
				// IOException
				// due to already being past the rest mark. In that case, we
				// throw more generic message
				couldReset = false;
			}
			// -1 can be used by UI layer as a code that "position could not
			// be
			// determined"
			int charPostion = -1;
			if (couldReset) {

				charPostion = getCharPostionOfFailure(bufferedReader);
				// getCharPostionOfFailure(new InputStreamReader(inStream,
				// javaEncodingNameInError));
			}
			// all of that just to throw more accurate error
			// note: we do the conversion to ianaName, instead of using the
			// local
			// variable,
			// because this is ultimately only for the user error message
			// (that
			// is,
			// the error occurred
			// in context of javaEncodingName no matter what ianaEncodingName
			// is
			throw new MalformedInputExceptionWithDetail(encodingNameInError, CodedIO.getAppropriateJavaCharset(encodingNameInError), charPostion, !couldReset, MAX_BUFFERED_SIZE_FOR_RESET_MARK);
		}
		StringBuffer stringbuffer = buffer;
		encodedDocument.set(stringbuffer.toString());

	}

	protected void setDocumentContentsFromReader(IEncodedDocument structuredDocument, Reader reader) throws IOException {

		StringBuffer allText = readInputStream(reader);
		structuredDocument.set(allText.toString());
	}
}