package com.occamlab.te.parsers; import static org.junit.Assert.*; import java.io.File; import java.io.PrintWriter; import java.io.StringWriter; import java.net.URL; import java.net.URLConnection; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.bootstrap.DOMImplementationRegistry; import org.w3c.dom.ls.DOMImplementationLS; import org.w3c.dom.ls.LSSerializer; import com.occamlab.te.Engine; import com.occamlab.te.RuntimeOptions; import com.occamlab.te.TEClassLoader; import com.occamlab.te.TECore; import com.occamlab.te.index.Index; public class HttpParserTest { private static final String DOCBOOK_NS = "http://docbook.org/ns/docbook"; private static DocumentBuilder docBuilder; private static TECore teCore; private StringWriter strWriter; private PrintWriter logger; @BeforeClass public static void setUpClass() throws Exception { File indexFile = new File(HttpParserTest.class.getResource( "/conf/index-parsers.xml").toURI()); Index index = new Index(indexFile); RuntimeOptions opts = new RuntimeOptions(); Engine engine = new Engine(); engine.setClassLoader("default", new TEClassLoader(new File(System.getProperty("user.home")))); teCore = new TECore(engine, index, opts); DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setNamespaceAware(true); docBuilder = dbf.newDocumentBuilder(); } @Before public void initWriter() { this.strWriter = new StringWriter(); this.logger = new PrintWriter(strWriter); } @Test public void parseXmlEntityWithNullInstruction() throws Throwable { URL url = this.getClass().getResource("/article.xml"); URLConnection urlConn = url.openConnection(); Document rsp = HTTPParser.parse(urlConn, null, logger, teCore); assertNotNull("Expected article element.", rsp.getElementsByTagNameNS(DOCBOOK_NS, "article").item(0)); assertTrue("Expected empty log.", strWriter.getBuffer().length() == 0); } @Test public void parseTextEntityWithNullInstruction() throws Throwable { URL url = this.getClass().getResource("/jabberwocky.txt"); URLConnection urlConn = url.openConnection(); Document result = HTTPParser.parse(urlConn, null, logger, teCore); Element content = (Element) result.getElementsByTagName("content") .item(0); assertNotNull("Expected content element.", content); assertTrue("Expected text content starting with 'Twas brillig", content .getTextContent().startsWith("'Twas brillig")); } @Test public void parseTextEntityWithImageParser() throws Throwable { URL url = this.getClass().getResource("/jabberwocky.txt"); URLConnection urlConn = url.openConnection(); Document imgParser = docBuilder.parse(getClass().getResourceAsStream( "/conf/HttpParser+ImageParser.xml")); Document result = HTTPParser.parse(urlConn, imgParser.getDocumentElement(), logger, teCore); Element content = (Element) result.getElementsByTagName("content") .item(0); assertTrue("content element should have no child nodes", !content.hasChildNodes()); } static String writeNodeToString(Node node) throws ClassNotFoundException, InstantiationException, IllegalAccessException, ClassCastException { DOMImplementationRegistry registry = DOMImplementationRegistry .newInstance(); DOMImplementationLS impl = (DOMImplementationLS) registry .getDOMImplementation("LS"); LSSerializer serializer = impl.createLSSerializer(); return serializer.writeToString(node); } }