/**
* <a href="http://www.openolat.org">
* OpenOLAT - Online Learning and Training</a><br>
* <p>
* Licensed under the Apache License, Version 2.0 (the "License"); <br>
* you may not use this file except in compliance with the License.<br>
* You may obtain a copy of the License at the
* <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a>
* <p>
* Unless required by applicable law or agreed to in writing,<br>
* software distributed under the License is distributed on an "AS IS" BASIS, <br>
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br>
* See the License for the specific language governing permissions and <br>
* limitations under the License.
* <p>
* Initial code contributed and copyrighted by<br>
* frentix GmbH, http://www.frentix.com
* <p>
*/
package org.olat.core.util;
import java.io.IOException;
import java.io.InputStream;
import org.apache.commons.io.IOUtils;
import org.junit.Assert;
import org.junit.Test;
/**
*
* Initial date: 24.03.2014<br>
* @author srosse, stephane.rosse@frentix.com, http://www.frentix.com
*
*/
public class SimpleHtmlParserTest {
@Test
public void parse_minimalTinyMCEHTMLPage() throws IOException {
InputStream inHtml = SimpleHtmlParserTest.class.getResourceAsStream("simple_1.html");
String html = IOUtils.toString(inHtml);
SimpleHtmlParser parser = new SimpleHtmlParser(html);
Assert.assertEquals("<body>", parser.getBodyTag());
Assert.assertEquals("utf-8", parser.getCharsetName());
Assert.assertEquals("Hello", parser.getHtmlContent().trim());
Assert.assertEquals("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">", parser.getHtmlDocType());
Assert.assertEquals("<html xmlns=\"http://www.w3.org/1999/xhtml\">", parser.getXhtmlNamespaces().trim());
Assert.assertTrue(parser.isValidHtml());
Assert.assertFalse(parser.hasOwnCss());
}
@Test
public void parse_externHtmlEditor() throws IOException {
InputStream inHtml = SimpleHtmlParserTest.class.getResourceAsStream("simple_2.html");
String html = IOUtils.toString(inHtml);
SimpleHtmlParser parser = new SimpleHtmlParser(html);
Assert.assertEquals("<body>", parser.getBodyTag());
Assert.assertEquals("iso-2022-jp", parser.getCharsetName());
Assert.assertEquals("Generated", parser.getHtmlContent().trim());
Assert.assertEquals("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"\n\t\"http://www.w3.org/TR/html4/loose.dtd\">", parser.getHtmlDocType());
Assert.assertNull(parser.getXhtmlNamespaces());
Assert.assertTrue(parser.isValidHtml());
Assert.assertFalse(parser.hasOwnCss());
}
@Test
public void parse_ErroHandling() throws IOException {
String html = "<html><body></body></html>";
SimpleHtmlParser parser = new SimpleHtmlParser(html);
Assert.assertEquals("<body>", parser.getBodyTag());
Assert.assertNull(parser.getCharsetName());
Assert.assertEquals("", parser.getHtmlContent().trim());
Assert.assertNull(parser.getHtmlDocType());
Assert.assertNull(parser.getXhtmlNamespaces());
Assert.assertTrue(parser.isValidHtml());
Assert.assertFalse(parser.hasOwnCss());
}
}