/** * <a href="http://www.openolat.org"> * OpenOLAT - Online Learning and Training</a><br> * <p> * Licensed under the Apache License, Version 2.0 (the "License"); <br> * you may not use this file except in compliance with the License.<br> * You may obtain a copy of the License at the * <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a> * <p> * Unless required by applicable law or agreed to in writing,<br> * software distributed under the License is distributed on an "AS IS" BASIS, <br> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br> * See the License for the specific language governing permissions and <br> * limitations under the License. * <p> * Initial code contributed and copyrighted by<br> * frentix GmbH, http://www.frentix.com * <p> */ package org.olat.core.util; import java.io.IOException; import java.io.InputStream; import org.apache.commons.io.IOUtils; import org.junit.Assert; import org.junit.Test; /** * * Initial date: 24.03.2014<br> * @author srosse, stephane.rosse@frentix.com, http://www.frentix.com * */ public class SimpleHtmlParserTest { @Test public void parse_minimalTinyMCEHTMLPage() throws IOException { InputStream inHtml = SimpleHtmlParserTest.class.getResourceAsStream("simple_1.html"); String html = IOUtils.toString(inHtml); SimpleHtmlParser parser = new SimpleHtmlParser(html); Assert.assertEquals("<body>", parser.getBodyTag()); Assert.assertEquals("utf-8", parser.getCharsetName()); Assert.assertEquals("Hello", parser.getHtmlContent().trim()); Assert.assertEquals("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">", parser.getHtmlDocType()); Assert.assertEquals("<html xmlns=\"http://www.w3.org/1999/xhtml\">", parser.getXhtmlNamespaces().trim()); Assert.assertTrue(parser.isValidHtml()); Assert.assertFalse(parser.hasOwnCss()); } @Test public void parse_externHtmlEditor() throws IOException { InputStream inHtml = SimpleHtmlParserTest.class.getResourceAsStream("simple_2.html"); String html = IOUtils.toString(inHtml); SimpleHtmlParser parser = new SimpleHtmlParser(html); Assert.assertEquals("<body>", parser.getBodyTag()); Assert.assertEquals("iso-2022-jp", parser.getCharsetName()); Assert.assertEquals("Generated", parser.getHtmlContent().trim()); Assert.assertEquals("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"\n\t\"http://www.w3.org/TR/html4/loose.dtd\">", parser.getHtmlDocType()); Assert.assertNull(parser.getXhtmlNamespaces()); Assert.assertTrue(parser.isValidHtml()); Assert.assertFalse(parser.hasOwnCss()); } @Test public void parse_ErroHandling() throws IOException { String html = "<html><body></body></html>"; SimpleHtmlParser parser = new SimpleHtmlParser(html); Assert.assertEquals("<body>", parser.getBodyTag()); Assert.assertNull(parser.getCharsetName()); Assert.assertEquals("", parser.getHtmlContent().trim()); Assert.assertNull(parser.getHtmlDocType()); Assert.assertNull(parser.getXhtmlNamespaces()); Assert.assertTrue(parser.isValidHtml()); Assert.assertFalse(parser.hasOwnCss()); } }