/******************************************************************************* * Copyright (c) 2004, 2006 IBM Corporation and others. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * IBM Corporation - initial API and implementation *******************************************************************************/ package org.eclipse.wst.html.core.internal.contenttype; import java.io.IOException; import java.io.InputStream; import java.io.Reader; import org.eclipse.core.runtime.QualifiedName; import org.eclipse.core.runtime.content.IContentDescriber; import org.eclipse.core.runtime.content.IContentDescription; import org.eclipse.core.runtime.content.ITextContentDescriber; import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento; import org.eclipse.wst.sse.core.internal.encoding.IContentDescriptionExtended; import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector; /** * * ContentDescriberForHTML * * A few design principles to remember with content describers: * <ul> * <li>Remember not to store values/data in the descriptions array of properties, * especially not large objects! and even no value that is already the default value, * since those description properties are cached per session, so can add up in memory. * <li>Remember that a ContentDescriber instance becomes a "root object" in the * ContentDescriberManager (that is, always in memory, never GC'd), so it should * not have any instance or state data since it would always become stale and * "hold on" to objects unneccessarily. * </ul> */ public final class ContentDescriberForHTML implements ITextContentDescriber { final private static QualifiedName[] SUPPORTED_OPTIONS = {IContentDescription.CHARSET, IContentDescription.BYTE_ORDER_MARK, IContentDescriptionExtended.DETECTED_CHARSET, IContentDescriptionExtended.UNSUPPORTED_CHARSET, IContentDescriptionExtended.APPROPRIATE_DEFAULT}; public int describe(InputStream contents, IContentDescription description) throws IOException { int result = IContentDescriber.INDETERMINATE; if (description == null) { result = computeValidity(contents); } else { calculateSupportedOptions(contents, description); // assummming we should return same 'validity' value we did // when called before. (technically, could be a performance issue // in future, so might want to check if any 'ol value would // be ok here. result = computeValidity(contents); } return result; } public int describe(Reader contents, IContentDescription description) throws IOException { int result = IContentDescriber.INDETERMINATE; if (description == null) { result = computeValidity(contents); } else { calculateSupportedOptions(contents, description); // assummming we should return same 'validity' value we did // when called before. (technically, could be a performance issue // in future, so might want to check if hard coded 'valid' would // be ok here. result = computeValidity(contents); } return result; } public QualifiedName[] getSupportedOptions() { return SUPPORTED_OPTIONS; } private void calculateSupportedOptions(InputStream contents, IContentDescription description) throws IOException { if (isRelevent(description)) { IResourceCharsetDetector detector = getDetector(); detector.set(contents); handleCalculations(description, detector); } } /** * @param contents * @param description * @throws IOException */ private void calculateSupportedOptions(Reader contents, IContentDescription description) throws IOException { if (isRelevent(description)) { IResourceCharsetDetector detector = getDetector(); detector.set(contents); handleCalculations(description, detector); } } private int computeValidity(InputStream inputStream) { // currently no contents specific check for valid HTML contents // (this may change once we add XHTML content type) return IContentDescriber.INDETERMINATE; } private int computeValidity(Reader reader) { // currently no contents specific check for valid HTML contents // (this may change once we add XHTML content type) return IContentDescriber.INDETERMINATE; } private IResourceCharsetDetector getDetector() { return new HTMLResourceEncodingDetector(); } /** * @param description * @param detector * @throws IOException */ private void handleCalculations(IContentDescription description, IResourceCharsetDetector detector) throws IOException { EncodingMemento encodingMemento = ((HTMLResourceEncodingDetector) detector).getEncodingMemento(); // TODO: I need to verify to see if this BOM work is always done // by text type. Object detectedByteOrderMark = encodingMemento.getUnicodeBOM(); if (detectedByteOrderMark != null) { Object existingByteOrderMark = description.getProperty(IContentDescription.BYTE_ORDER_MARK); // not sure why would ever be different, so if is different, may // need to "push" up into base. if (!detectedByteOrderMark.equals(existingByteOrderMark)) description.setProperty(IContentDescription.BYTE_ORDER_MARK, detectedByteOrderMark); } if (!encodingMemento.isValid()) { /* * note: after setting here, its the mere presence of * IContentDescriptionExtended.UNSUPPORTED_CHARSET in the * resource's description that can be used to determine if invalid * in those cases, the "detected" property contains an * "appropriate default" to use. */ description.setProperty(IContentDescriptionExtended.UNSUPPORTED_CHARSET, encodingMemento.getInvalidEncoding()); description.setProperty(IContentDescriptionExtended.APPROPRIATE_DEFAULT, encodingMemento.getAppropriateDefault()); } Object detectedCharset = encodingMemento.getDetectedCharsetName(); Object javaCharset = encodingMemento.getJavaCharsetName(); // we always include detected, if its different than java handleDetectedSpecialCase(description, detectedCharset, javaCharset); if (javaCharset != null) { Object existingCharset = description.getProperty(IContentDescription.CHARSET); if (javaCharset.equals(existingCharset)) { handleDetectedSpecialCase(description, detectedCharset, javaCharset); } else { // we may need to add what we found, but only need to add // if different from default.the Object defaultCharset = detector.getSpecDefaultEncoding(); if (defaultCharset != null) { if (!defaultCharset.equals(javaCharset)) { description.setProperty(IContentDescription.CHARSET, javaCharset); } } else { // assuming if there is no spec default, we always need to // add, I'm assuming description.setProperty(IContentDescription.CHARSET, javaCharset); } } } } private void handleDetectedSpecialCase(IContentDescription description, Object detectedCharset, Object javaCharset) { // since equal, we don't need to add, but if our detected version is // different than // javaCharset, then we should add it. This will happen, for example, // if there's // differences in case, or differences due to override properties if (detectedCharset != null) { // if (!detectedCharset.equals(javaCharset)) { // description.setProperty(IContentDescriptionExtended.DETECTED_CHARSET, // detectedCharset); // } // Once we detected a charset, we should set the property even // though it's the same as javaCharset // because there are clients that rely on this property to // determine if the charset is actually detected in file or not. description.setProperty(IContentDescriptionExtended.DETECTED_CHARSET, detectedCharset); } } /** * @param description * @return */ private boolean isRelevent(IContentDescription description) { boolean result = false; if (description == null) result = false; else if (description.isRequested(IContentDescription.BYTE_ORDER_MARK)) result = true; else if (description.isRequested(IContentDescription.CHARSET)) result = true; else if (description.isRequested(IContentDescriptionExtended.APPROPRIATE_DEFAULT)) result = true; else if (description.isRequested(IContentDescriptionExtended.DETECTED_CHARSET)) result = true; else if (description.isRequested(IContentDescriptionExtended.UNSUPPORTED_CHARSET)) result = true; // else if // (description.isRequested(IContentDescriptionExtended.ENCODING_MEMENTO)) // result = true; return result; } }