HTMLHeadTokenizerTester.java example

Explorer
webtools.sourceediting-master
/*******************************************************************************
 * Copyright (c) 2004, 2010 IBM Corporation and others.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 *     IBM Corporation - initial API and implementation
 *******************************************************************************/
package org.eclipse.wst.html.tests.encoding.html;

import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.util.regex.Pattern;

import junit.framework.TestCase;

import org.eclipse.core.resources.IFile;
import org.eclipse.wst.html.core.internal.contenttype.HTMLHeadTokenizer;
import org.eclipse.wst.html.core.internal.contenttype.HTMLHeadTokenizerConstants;
import org.eclipse.wst.html.core.internal.contenttype.HeadParserToken;
import org.eclipse.wst.html.tests.encoding.HTMLEncodingTestsPlugin;
import org.eclipse.wst.xml.core.internal.contenttype.EncodingParserConstants;
import org.eclipse.wst.xml.tests.encoding.ZippedTest;

public class HTMLHeadTokenizerTester extends TestCase {
	boolean DEBUG = false;
	private String fCharset;

	private String fContentTypeValue;
	private final String fileDir = "html/";
	private final String fileHome = "testfiles/";
	private final String fileLocation = this.fileHome + this.fileDir;
	private String fPageEncodingValue = null;
	private String fXMLDecEncodingName;

	private void doTestFile(String filename, String expectedName) throws IOException {
		doTestFile(filename, expectedName, null);
	}

	private void doTestFile(String filename, String expectedName, String finalTokenType) {
		try {
			doTestFile(HTMLEncodingTestsPlugin.getTestReader(filename), expectedName, finalTokenType);
		}
		catch (IOException e) {
			System.out.println("Error opening file \"" + filename +"\"");
		}
	}

	private void doTestFile(Reader fileReader, String expectedName, String finalTokenType) throws IOException {
		HTMLHeadTokenizer tokenizer = null;
		tokenizer = new HTMLHeadTokenizer(fileReader);

		HeadParserToken resultToken = null;
		HeadParserToken token = parseHeader(tokenizer);
		String resultValue = getAppropriateEncoding();
		fileReader.close();
		if (finalTokenType != null) {
			assertTrue("did not end as expected. found:  " + token.getType(), finalTokenType.equals(token.getType()));
		}
		else {
			if (expectedName == null) {
				assertTrue("expected no encoding, but found: " + resultValue, resultToken == null);
			}
			else {
				// TODO: need to work on case issues
				assertTrue("expected " + expectedName + " but found " + resultValue, expectedName.equals(resultValue.toUpperCase()));
			}
		}

	}

	// public void testMalformedNoEncoding() {
	// String filename = fileLocation + "MalformedNoEncoding.jsp";
	// doTestFile(filename);
	// }
	// public void testMalformedNoEncodingXSL() {
	// String filename = fileLocation + "MalformedNoEncodingXSL.jsp";
	// doTestFile(filename);
	// }
	// public void testNoEncoding() {
	// String filename = fileLocation + "NoEncoding.jsp";
	// doTestFile(filename);
	// }
	// public void testNormalNonDefault() {
	// String filename = fileLocation + "NormalNonDefault.jsp";
	// doTestFile(filename);
	// }
	// public void testNormalPageCaseNonDefault() {
	// String filename = fileLocation + "NormalPageCaseNonDefault.jsp";
	// doTestFile(filename);
	// }
	// public void testdefect223365() {
	// String filename = fileLocation + "SelColBeanRow12ResultsForm.jsp";
	// doTestFile(filename);
	// }
	/**
	 * returns encoding according to priority: 1. XML Declaration 2. page
	 * directive pageEncoding name 3. page directive contentType charset name
	 */
	private String getAppropriateEncoding() {
		String result = null;
		if (this.fXMLDecEncodingName != null) {
			result = this.fXMLDecEncodingName;
		}
		else if (this.fPageEncodingValue != null) {
			result = this.fPageEncodingValue;
		}
		else if (this.fCharset != null) {
			result = this.fCharset;
		}
		return result;
	}

	private boolean isLegalString(String tokenType) {
		boolean result = false;
		if (tokenType == null) {
			result = false;
		}
		else {
			result = tokenType.equals(EncodingParserConstants.StringValue) || tokenType.equals(EncodingParserConstants.UnDelimitedStringValue) || tokenType.equals(EncodingParserConstants.InvalidTerminatedStringValue) || tokenType.equals(EncodingParserConstants.InvalidTermintatedUnDelimitedStringValue);
		}
		return result;
	}

	private void parseContentTypeValue(String contentType) {
		Pattern pattern = Pattern.compile(";\\s*charset\\s*=\\s*");
		String[] parts = pattern.split(contentType);
		if (parts.length > 0) {
			// if only one item, it can still be charset instead of
			// contentType
			if (parts.length == 1) {
				if (parts[0].length() > 6) {
					String checkForCharset = parts[0].substring(0, 7);
					if (checkForCharset.equalsIgnoreCase("charset")) {
						int eqpos = parts[0].indexOf('=');
						eqpos = eqpos + 1;
						if (eqpos < parts[0].length()) {
							this.fCharset = parts[0].substring(eqpos);
							this.fCharset = this.fCharset.trim();
						}
					}
				}
			}
			else {
				// fContentType = parts[0];
			}

		}
		if (parts.length > 1) {
			this.fCharset = parts[1];
		}
	}

	/**
	 * Give's priority to encoding value, if found else, looks for contentType
	 * value;
	 */
	private HeadParserToken parseHeader(HTMLHeadTokenizer tokenizer) throws IOException {
		this.fPageEncodingValue = null;
		this.fCharset = null;
		/*
		 * if (tokenType == XMLHeadTokenizerConstants.XMLDelEncoding) { if
		 * (tokenizer.hasMoreTokens()) { ITextHeadRegion valueToken =
		 * tokenizer.getNextToken(); String valueTokenType =
		 * valueToken.getType(); if (isLegal(valueTokenType)) { resultValue =
		 * valueToken.getText(); if (DEBUG) { System.out.println("XML Head
		 * Tokenizer Found Encoding: " + resultValue); } } } }
		 */
		HeadParserToken token = null;
		HeadParserToken finalToken = null;
		do {
			token = tokenizer.getNextToken();
			if (this.DEBUG) {
				System.out.println(token);
			}
			String tokenType = token.getType();
			if(canHandleAsUnicodeStream(tokenType)) {
				
			}
			if (tokenType == HTMLHeadTokenizerConstants.MetaTagContentType) {
				if (tokenizer.hasMoreTokens()) {
					HeadParserToken valueToken = tokenizer.getNextToken();
					if (this.DEBUG) {
						System.out.println(valueToken);
					}
					String valueTokenType = valueToken.getType();
					if (isLegalString(valueTokenType)) {
						this.fContentTypeValue = valueToken.getText();

					}
				}
			}

		}
		while (tokenizer.hasMoreTokens());
		if (this.fContentTypeValue != null) {
			parseContentTypeValue(this.fContentTypeValue);
		}
		finalToken = token;
		return finalToken;

	}
	
	private boolean canHandleAsUnicodeStream(String tokenType) {
		boolean canHandleAsUnicode = false;
		if (tokenType == EncodingParserConstants.UTF83ByteBOM) {
			canHandleAsUnicode = true;
			this.fCharset = "UTF-8"; //$NON-NLS-1$
		}
		else if (tokenType == EncodingParserConstants.UTF16BE || tokenType == EncodingParserConstants.UTF16LE) {
			canHandleAsUnicode = true;
			this.fCharset = "UTF-16"; //$NON-NLS-1$
		}
		return canHandleAsUnicode;
	}

	public void testBestCase() throws IOException {
		String filename = this.fileLocation + "NormalNonDefault.html";
		doTestFile(filename, "UTF-8");

	}

	// public void testIllFormed() {
	// String filename = fileLocation + "testIllFormed.jsp";
	// doTestFile(filename);
	// }
	// public void testIllFormed2() {
	// String filename = fileLocation + "testIllFormed2.jsp";
	// doTestFile(filename);
	// }
	// public void testIllformedNormalNonDefault() {
	// String filename = fileLocation + "IllformedNormalNonDefault.jsp";
	// doTestFile(filename);
	// }
	public void testEmptyFile() throws IOException {
		String filename = this.fileLocation + "EmptyFile.html";
		doTestFile(filename, null);
	}

	public void testIllFormedNormalNonDefault() throws IOException {
		String filename = this.fileLocation + "IllformedNormalNonDefault.html";
		doTestFile(filename, "UTF-8");
	}

	public void testLargeCase() throws IOException {
		String filename = this.fileLocation + "LargeNonDefault.html";
		doTestFile(filename, "ISO-8859-1");

	}

	public void testLargeNoEncoding() throws IOException {
		String filename = this.fileLocation + "LargeNoEncoding.html";
		doTestFile(filename, null, EncodingParserConstants.MAX_CHARS_REACHED);

	}

	public void testMultiNonDefault() throws IOException {
		String filename = this.fileLocation + "MultiNonDefault.html";
		doTestFile(filename, "ISO-8859-6");
	}

	public void testNoEncoding() throws IOException {
		String filename = this.fileLocation + "NoEncoding.html";
		doTestFile(filename, null);
	}

	public void testnoquotes() throws IOException {
		String filename = this.fileLocation + "noquotes.html";
		doTestFile(filename, "UTF-8");

	}
	
	public void testnoquotesUTF16le() throws IOException {
		String filename = this.fileLocation + "noquotesUTF16le.html";
		doTestFile(filename, "UTF-16LE");
	}
	
	public void testUTF16le() throws IOException {
		String filename = this.fileLocation + "utf16le.html";
		doTestFile(filename, "UTF-16LE");
	}

	public void testUTF16be() throws IOException {
		String filename = this.fileLocation + "utf16be.html";
		doTestFile(filename, "UTF-16BE");
	}
	
	/*
		sun.io.MalformedInputException
		at sun.io.ByteToCharUTF8.convert(ByteToCharUTF8.java:262)
		at sun.nio.cs.StreamDecoder$ConverterSD.convertInto(StreamDecoder.java:314)
		at sun.nio.cs.StreamDecoder$ConverterSD.implRead(StreamDecoder.java:364)
		at sun.nio.cs.StreamDecoder.read(StreamDecoder.java:250)
		at java.io.InputStreamReader.read(InputStreamReader.java:212)
		at org.eclipse.wst.html.core.internal.contenttype.HTMLHeadTokenizer.yy_advance(HTMLHeadTokenizer.java:340)
		at org.eclipse.wst.html.core.internal.contenttype.HTMLHeadTokenizer.primGetNextToken(HTMLHeadTokenizer.java:477)
		at org.eclipse.wst.html.core.internal.contenttype.HTMLHeadTokenizer.getNextToken(HTMLHeadTokenizer.java:232)
		at org.eclipse.wst.html.tests.encoding.html.HTMLHeadTokenizerTester.parseHeader(HTMLHeadTokenizerTester.java:175)
		at org.eclipse.wst.html.tests.encoding.html.HTMLHeadTokenizerTester.doTestFile(HTMLHeadTokenizerTester.java:57)
		at org.eclipse.wst.html.tests.encoding.html.HTMLHeadTokenizerTester.doTestFile(HTMLHeadTokenizerTester.java:37)
		at org.eclipse.wst.html.tests.encoding.html.HTMLHeadTokenizerTester.testUTF16BOM(HTMLHeadTokenizerTester.java:293)
	*/

	public void testUTF16BOM() throws Exception {
		String filename = this.fileLocation + "utf16BOM.html";
		ZippedTest test = new ZippedTest();
		test.setUp();
		IFile file = test.getFile(filename);
		assertNotNull(file);
		Reader fileReader = new FileReader(file.getLocationURI().getPath());
		doTestFile(fileReader, "UTF-16", null);
		test.shutDown();
	}
}