ContentDescriberForXML.java example

Explorer
webtools.sourceediting-master
/*******************************************************************************
 * Copyright (c) 2001, 2006 IBM Corporation and others.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 *     IBM Corporation - initial API and implementation
 *     Jens Lukowski/Innoopract - initial renaming/restructuring
 *     
 *******************************************************************************/
package org.eclipse.wst.xml.core.internal.contenttype;

import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;

import org.eclipse.core.runtime.QualifiedName;
import org.eclipse.core.runtime.content.IContentDescriber;
import org.eclipse.core.runtime.content.IContentDescription;
import org.eclipse.core.runtime.content.ITextContentDescriber;
import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento;
import org.eclipse.wst.sse.core.internal.encoding.IContentDescriptionExtended;
import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector;


public class ContentDescriberForXML implements ITextContentDescriber {
	private final static QualifiedName[] SUPPORTED_OPTIONS = {IContentDescription.CHARSET, IContentDescription.BYTE_ORDER_MARK, IContentDescriptionExtended.DETECTED_CHARSET, IContentDescriptionExtended.UNSUPPORTED_CHARSET, IContentDescriptionExtended.APPROPRIATE_DEFAULT};
	/**
	 * <code>restrictedMode</code> is used just for testing/experiments.
	 * 
	 * If in restrictedMode, our "custom" contentType is seen as valid only in
	 * cases that the platform's standard one does not cover.
	 */
	private final static boolean restrictedMode = true;
	private IResourceCharsetDetector getDetector() {
			return new XMLResourceEncodingDetector();
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see org.eclipse.core.runtime.content.IContentDescriber#describe(java.io.InputStream,
	 *      org.eclipse.core.runtime.content.IContentDescription)
	 */
	public int describe(InputStream contents, IContentDescription description) throws IOException {
		// for this special case, always assume invalid, unless
		// our special circumstances are met.
		int result = IContentDescriber.INVALID;

		if (description == null) {
			// purely request for validty
			result = determineValidity(result, contents);
		}
		else {
			result = calculateSupportedOptions(result, contents, description);
		}
		return result;
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see org.eclipse.core.runtime.content.ITextContentDescriber#describe(java.io.Reader,
	 *      org.eclipse.core.runtime.content.IContentDescription)
	 */
	public int describe(Reader contents, IContentDescription description) throws IOException {
		// for this special case, always assume invalid, unless
		// our special circumstances are met.
		int result = IContentDescriber.INVALID;

		if (description == null) {
			// purely request for validty
			result = determineValidity(result, contents);
		}
		else {
			result = calculateSupportedOptions(result, contents, description);
		}
		return result;
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see org.eclipse.core.runtime.content.IContentDescriber#getSupportedOptions()
	 */
	public QualifiedName[] getSupportedOptions() {

		return SUPPORTED_OPTIONS;
	}

	protected int calculateSupportedOptions(int result, InputStream contents, IContentDescription description) throws IOException {
		int returnResult = result;
		if (isRelevent(description)) {
			IResourceCharsetDetector detector = getDetector();
			contents.reset();
			detector.set(contents);
			returnResult = handleCalculations(result, description, detector);
		}
		return returnResult;
	}

	private int determineValidity(int result, InputStream contents) throws IOException {
		int returnResult = result;
		IResourceCharsetDetector detector = getDetector();
		contents.reset();
		detector.set(contents);
		returnResult = determineValidity(detector, returnResult);
		return returnResult;
	}
	private int determineValidity(int result, Reader contents) throws IOException {
		int returnResult = result;
		IResourceCharsetDetector detector = getDetector();
		contents.reset();
		detector.set(contents);
		returnResult = determineValidity(detector, returnResult);
		return returnResult;
	}
	/**
	 * @param contents
	 * @param description
	 * @throws IOException
	 */
	protected int calculateSupportedOptions(int result, Reader contents, IContentDescription description) throws IOException {
		int returnResult = result;
		if (isRelevent(description)) {
			IResourceCharsetDetector detector = getDetector();
			detector.set(contents);
			returnResult = handleCalculations(result, description, detector);
		}
		return returnResult;
	}

	private void handleDetectedSpecialCase(IContentDescription description, Object detectedCharset, Object javaCharset) {
		// since equal, we don't need to add, but if our detected version is
		// different than
		// javaCharset, then we should add it. This will happen, for example,
		// if there's
		// differences in case, or differences due to override properties
		if (detectedCharset != null) {
			// if (!detectedCharset.equals(javaCharset)) {
			// description.setProperty(IContentDescriptionExtended.DETECTED_CHARSET,
			// detectedCharset);
			// }

			// Once we detected a charset, we should set the property even
			// though it's the same as javaCharset
			// because there are clients that rely on this property to
			// determine if the charset is actually detected in file or not.
			description.setProperty(IContentDescriptionExtended.DETECTED_CHARSET, detectedCharset);
		}
	}

	/**
	 * @param description
	 * @return
	 */
	private boolean isRelevent(IContentDescription description) {
		boolean result = false;
		if (description == null)
			result = false;
		else if (description.isRequested(IContentDescription.BYTE_ORDER_MARK))
			result = true;
		else if (description.isRequested(IContentDescription.CHARSET))
			result = true;
		else if (description.isRequested(IContentDescriptionExtended.APPROPRIATE_DEFAULT))
			result = true;
		else if (description.isRequested(IContentDescriptionExtended.DETECTED_CHARSET))
			result = true;
		else if (description.isRequested(IContentDescriptionExtended.UNSUPPORTED_CHARSET))
			result = true;
		return result;
	}

	/**
	 * @param description
	 * @param detector
	 * @throws IOException
	 */
	private int handleCalculations(int result, IContentDescription description, IResourceCharsetDetector detector) throws IOException {
		int returnResult = result;
		EncodingMemento encodingMemento = ((XMLResourceEncodingDetector) detector).getEncodingMemento();
		if (description != null) {
			// TODO: I need to verify to see if this BOM work is always done
			// by text type.
			Object detectedByteOrderMark = encodingMemento.getUnicodeBOM();
			if (detectedByteOrderMark != null) {
				Object existingByteOrderMark = description.getProperty(IContentDescription.BYTE_ORDER_MARK);
				// not sure why would ever be different, so if is different,
				// may
				// need to "push" up into base.
				if (!detectedByteOrderMark.equals(existingByteOrderMark))
					description.setProperty(IContentDescription.BYTE_ORDER_MARK, detectedByteOrderMark);
			}


			if (!encodingMemento.isValid()) {
				// note: after setting here, its the mere presence of
				// IContentDescriptionExtended.UNSUPPORTED_CHARSET
				// in the resource's description that can be used to determine
				// if invalid in those cases, the "detected" property contains
				// an "appropriate default" to use.
				description.setProperty(IContentDescriptionExtended.UNSUPPORTED_CHARSET, encodingMemento.getInvalidEncoding());
				description.setProperty(IContentDescriptionExtended.APPROPRIATE_DEFAULT, encodingMemento.getAppropriateDefault());
			}

			Object detectedCharset = encodingMemento.getDetectedCharsetName();
			Object javaCharset = encodingMemento.getJavaCharsetName();

			// we always include detected, if its different than java
			handleDetectedSpecialCase(description, detectedCharset, javaCharset);

			if (javaCharset != null) {
				Object existingCharset = description.getProperty(IContentDescription.CHARSET);
				if (javaCharset.equals(existingCharset)) {
					handleDetectedSpecialCase(description, detectedCharset, javaCharset);
				}
				else {
					// we may need to add what we found, but only need to add
					// if different from the default
					Object defaultCharset = detector.getSpecDefaultEncoding();
					if (defaultCharset != null) {
						if (!defaultCharset.equals(javaCharset)) {
							description.setProperty(IContentDescription.CHARSET, javaCharset);
						}
					}
					else {
						// assuming if there is no spec default, we always
						// need to add.
						// TODO: this is probably a dead branch in current
						// code, should re-examine for removal.
						description.setProperty(IContentDescription.CHARSET, javaCharset);
					}
				}
			}
		}

		returnResult = determineValidity(detector, returnResult);
		return returnResult;
	}

	private int determineValidity(IResourceCharsetDetector detector, int returnResult) {
		// we always expect XMLResourceEncodingDetector, but just to make safe
		// cast.
		if (detector instanceof XMLResourceEncodingDetector) {
			XMLResourceEncodingDetector xmlResourceDetector = (XMLResourceEncodingDetector) detector;
			if (xmlResourceDetector.isDeclDetected()) {
				if (restrictedMode) {
					// if there is no initial whitespace, then platform's
					// default one will do.
					if (xmlResourceDetector.hasInitialWhiteSpace()) {
						returnResult = IContentDescriber.VALID;
					}
				}
				else {
					returnResult = IContentDescriber.VALID;
				}
			}
		}
		return returnResult;
	}

}