/* * Encog(tm) Core v3.4 - Java Version * http://www.heatonresearch.com/encog/ * https://github.com/encog/encog-java-core * Copyright 2008-2016 Heaton Research, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * For more information on Heaton Research copyrights, licenses * and trademarks visit: * http://www.heatonresearch.com/copyright */ package org.encog.bot.html; import java.io.ByteArrayInputStream; import junit.framework.TestCase; import org.encog.parse.tags.Tag; import org.encog.parse.tags.read.ReadHTML; public class TestParseHTML extends TestCase { public void testAttributeLess() throws Throwable { String html = "12<b>12</b>1"; ByteArrayInputStream bis = new ByteArrayInputStream(html.getBytes()); ReadHTML parse = new ReadHTML(bis); TestCase.assertTrue(parse.read()=='1'); TestCase.assertTrue(parse.read()=='2'); TestCase.assertTrue(parse.read()==0); TestCase.assertTrue(parse.getTag().getName().equalsIgnoreCase("b")); TestCase.assertTrue(parse.getTag().getType()==Tag.Type.BEGIN); TestCase.assertTrue(parse.read()=='1'); TestCase.assertTrue(parse.read()=='2'); TestCase.assertTrue(parse.read()==0); Tag tag = parse.getTag(); TestCase.assertTrue(tag.getName().equalsIgnoreCase("b")); TestCase.assertTrue(tag.getType()==Tag.Type.END); TestCase.assertEquals(tag.toString(),"</b>"); TestCase.assertTrue(parse.read()=='1'); } public void testAttributes() throws Throwable { String html="<img src=\"picture.gif\" alt=\"A Picture\">"; ByteArrayInputStream bis = new ByteArrayInputStream(html.getBytes()); ReadHTML parse = new ReadHTML(bis); TestCase.assertTrue(parse.read()==0); Tag tag = parse.getTag(); TestCase.assertNotNull(tag); TestCase.assertTrue(tag.getName().equals("img")); //TestCase.assertTrue(html.equals(tag.toString())); TestCase.assertTrue(tag.getAttributeValue("src").equals("picture.gif")); TestCase.assertTrue(tag.getAttributeValue("alt").equals("A Picture")); } public void testAttributesNoDELIM() throws Throwable { String html="<img src=picture.gif alt=APicture>"; ByteArrayInputStream bis = new ByteArrayInputStream(html.getBytes()); ReadHTML parse = new ReadHTML(bis); TestCase.assertTrue(parse.read()==0); Tag tag = parse.getTag(); TestCase.assertNotNull(tag); TestCase.assertTrue(tag.getName().equals("img")); TestCase.assertTrue(tag.getAttributeValue("src").equals("picture.gif")); TestCase.assertTrue(tag.getAttributeValue("alt").equals("APicture")); } public void testBoth() throws Throwable { String html="<br/>"; String htmlName = "br"; ByteArrayInputStream bis = new ByteArrayInputStream(html.getBytes()); ReadHTML parse = new ReadHTML(bis); TestCase.assertTrue(parse.read()==0); Tag tag = parse.getTag(); TestCase.assertNotNull(tag); TestCase.assertTrue(tag.getType()==Tag.Type.BEGIN); TestCase.assertTrue(tag.getName().equals(htmlName)); parse.readToTag(); tag = parse.getTag(); TestCase.assertNotNull(tag); TestCase.assertTrue(tag.getType()==Tag.Type.END); TestCase.assertTrue(tag.getName().equals(htmlName)); } public void testBothWithAttributes() throws Throwable { String html="<img src=\"picture.gif\" alt=\"A Picture\"/>"; ByteArrayInputStream bis = new ByteArrayInputStream(html.getBytes()); ReadHTML parse = new ReadHTML(bis); TestCase.assertTrue(parse.read()==0); } public void testComment() throws Throwable { String html="a<!-- Hello -->b"; ByteArrayInputStream bis = new ByteArrayInputStream(html.getBytes()); ReadHTML parse = new ReadHTML(bis); TestCase.assertTrue(parse.read()=='a'); TestCase.assertTrue(parse.read()==0); TestCase.assertTrue(parse.read()=='b'); } public void testScript() throws Throwable { String html="a<script>12</script>b"; ByteArrayInputStream bis = new ByteArrayInputStream(html.getBytes()); ReadHTML parse = new ReadHTML(bis); TestCase.assertTrue(parse.read()=='a'); TestCase.assertTrue(parse.read()==0); TestCase.assertTrue(parse.read()=='1'); TestCase.assertTrue(parse.read()=='2'); TestCase.assertTrue(parse.read()==0); TestCase.assertTrue(parse.read()=='b'); } public void testScript2() throws Throwable { String html="a<script>1<2</script>b<br>"; ByteArrayInputStream bis = new ByteArrayInputStream(html.getBytes()); ReadHTML parse = new ReadHTML(bis); TestCase.assertTrue(parse.read()=='a'); TestCase.assertTrue(parse.read()==0); TestCase.assertTrue(parse.read()=='1'); TestCase.assertTrue(parse.read()=='<'); TestCase.assertTrue(parse.read()=='2'); TestCase.assertTrue(parse.read()==0); TestCase.assertTrue(parse.read()=='b'); TestCase.assertTrue(parse.read()==0); } public void testToString() { String html="a<img src=\"picture.gif\" alt=\"A Picture\">b"; ByteArrayInputStream bis = new ByteArrayInputStream(html.getBytes()); ReadHTML parse = new ReadHTML(bis); parse.readToTag(); TestCase.assertTrue(parse.toString().indexOf("A Picture")!=-1); } public void testTagToString() { String html="<br/>"; ByteArrayInputStream bis = new ByteArrayInputStream(html.getBytes()); ReadHTML parse = new ReadHTML(bis); TestCase.assertTrue(parse.read()==0); } public void testSpecialCharacter() throws Throwable { String html = "<>A"; ByteArrayInputStream bis = new ByteArrayInputStream(html.getBytes()); ReadHTML parse = new ReadHTML(bis); TestCase.assertTrue(parse.read()=='<'); TestCase.assertTrue(parse.read()=='>'); TestCase.assertTrue(parse.read()=='A'); } public void testSimpleAttribute() throws Throwable { String html = "<!DOCTYPE \"test\">"; ByteArrayInputStream bis = new ByteArrayInputStream(html.getBytes()); ReadHTML parse = new ReadHTML(bis); TestCase.assertTrue(parse.read()==0); Tag tag = parse.getTag(); TestCase.assertEquals(tag.toString(), html); } }