CSSDocumentCharsetDetector.java example

Explorer
webtools.sourceediting-master
/*******************************************************************************
 * Copyright (c) 2004, 2006 IBM Corporation and others.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 *     IBM Corporation - initial API and implementation
 *******************************************************************************/
package org.eclipse.wst.css.core.internal.encoding;

import java.util.Iterator;

import org.eclipse.jface.text.IDocument;
import org.eclipse.wst.css.core.internal.contenttype.CSSResourceEncodingDetector;
import org.eclipse.wst.css.core.internal.parserz.CSSRegionContexts;
import org.eclipse.wst.sse.core.internal.document.DocumentReader;
import org.eclipse.wst.sse.core.internal.document.IDocumentCharsetDetector;
import org.eclipse.wst.sse.core.internal.provisional.text.IStructuredDocument;
import org.eclipse.wst.sse.core.internal.provisional.text.IStructuredDocumentRegion;
import org.eclipse.wst.sse.core.internal.provisional.text.IStructuredDocumentRegionList;
import org.eclipse.wst.sse.core.internal.provisional.text.ITextRegion;
import org.eclipse.wst.sse.core.internal.provisional.text.ITextRegionList;
import org.eclipse.wst.sse.core.utils.StringUtils;


public class CSSDocumentCharsetDetector extends CSSResourceEncodingDetector implements IDocumentCharsetDetector {

	public String getEncodingName(IStructuredDocument structuredDocument) {
		String result = null;
		// if the document is empty, then there will be no nodes,
		// so no need to continue.
		IStructuredDocumentRegionList nodes = structuredDocument.getRegionList();
		if (nodes.getLength() > 0) {
			IStructuredDocumentRegion node = null;
			// skip any initial whitespace
			// Note that @charset "encodingname";
			// must appear at very beginning of document,
			// to be valid.
			// May have to test with "damaged" files (e.g.
			// beginning EOLs, etc., to verify this works
			// as expected.
			for (int i = 0; i < nodes.getLength(); i++) {
				node = nodes.item(i);
				if (getType(node) != CSSRegionContexts.CSS_S) {
					break;
				}
			}
			Iterator regions = node.getRegions().iterator();
			ITextRegion region = getNextRegionOfType(CSSRegionContexts.CSS_CHARSET, regions);
			if (region != null) {
				ITextRegion valueRegion = getNextRegionOfType(CSSRegionContexts.CSS_STRING, regions);
				if (valueRegion == null) {
					// if didn't find the region, its probably due to ill
					// formed input, such as
					// @charset "ISO-8859-6;
					// so we'll try again for "unknown" region.
					// If that fails, we'll give up?
					regions = node.getRegions().iterator();
					region = getNextRegionOfType(CSSRegionContexts.CSS_CHARSET, regions);
					if (region != null) {
						valueRegion = getNextRegionOfType(CSSRegionContexts.CSS_UNKNOWN, regions);
						if (valueRegion != null) {
							result = node.getText(valueRegion);
						}
					}
				}
				else {
					result = node.getText(valueRegion);
				}
				result = StringUtils.stripNonLetterDigits(result);
			}
		}
		return result;
	}

	public String getEncodingName(IDocument document) {
		String enc = null;
		if (document instanceof IStructuredDocument) {
			enc = getEncodingName((IStructuredDocument) document);
		}
		else {
			// TODO Important: need to implement some "raw" parser for
			// IDocument level
		}
		return enc;
	}

	private String getType(IStructuredDocumentRegion node) {
		if (node == null)
			return null;
		ITextRegionList regions = node.getRegions();
		if (regions == null || regions.size() == 0)
			return null;
		ITextRegion region = regions.get(0);
		String result = region.getType();
		return result;
	}

	private ITextRegion getNextRegionOfType(String type, Iterator regions) {
		if (type == null)
			return null;
		if (regions == null)
			return null;
		ITextRegion result = null;
		while (regions.hasNext()) {
			ITextRegion region = (ITextRegion) regions.next();
			if (region.getType() == type) {
				result = region;
				break;
			}
		}
		return result;
	}

	/**
	 * 
	 */

	public void set(IDocument document) {
		set(new DocumentReader(document, 0));

	}

}