/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nutch.protocol.http; import static org.junit.Assert.assertEquals; import java.net.URL; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Text; import org.apache.nutch.crawl.CrawlDatum; import org.apache.nutch.net.protocols.Response; import org.apache.nutch.protocol.Content; import org.apache.nutch.protocol.ProtocolOutput; import org.junit.After; import org.junit.Test; import org.mortbay.jetty.Server; import org.mortbay.jetty.nio.SelectChannelConnector; import org.mortbay.jetty.servlet.Context; import org.mortbay.jetty.servlet.ServletHolder; /** * Test cases for protocol-http */ public class TestProtocolHttp { private static final String RES_DIR = System.getProperty("test.data", "."); private Http http; private Server server; private Context root; private Configuration conf; private int port; public void setUp(boolean redirection) throws Exception { conf = new Configuration(); conf.addResource("nutch-default.xml"); conf.addResource("nutch-site-test.xml"); http = new Http(); http.setConf(conf); server = new Server(); if (redirection) { root = new Context(server, "/redirection", Context.SESSIONS); root.setAttribute("newContextURL", "/redirect"); } else { root = new Context(server, "/", Context.SESSIONS); } ServletHolder sh = new ServletHolder( org.apache.jasper.servlet.JspServlet.class); root.addServlet(sh, "*.jsp"); root.setResourceBase(RES_DIR); } @After public void tearDown() throws Exception { server.stop(); } @Test public void testStatusCode() throws Exception { startServer(47504, false); fetchPage("/basic-http.jsp", 200); fetchPage("/redirect301.jsp", 301); fetchPage("/redirect302.jsp", 302); fetchPage("/nonexists.html", 404); fetchPage("/brokenpage.jsp", 500); } @Test public void testRedirectionJetty() throws Exception { // Redirection via Jetty startServer(47503, true); fetchPage("/redirection", 302); } /** * Starts the Jetty server at a specified port and redirection parameter. * * @param portno * Port number. * @param redirection * whether redirection */ private void startServer(int portno, boolean redirection) throws Exception { port = portno; setUp(redirection); SelectChannelConnector connector = new SelectChannelConnector(); connector.setHost("127.0.0.1"); connector.setPort(port); server.addConnector(connector); server.start(); } /** * Fetches the specified <code>page</code> from the local Jetty server and * checks whether the HTTP response status code matches with the expected * code. Also use jsp pages for redirection. * * @param page * Page to be fetched. * @param expectedCode * HTTP response status code expected while fetching the page. */ private void fetchPage(String page, int expectedCode) throws Exception { URL url = new URL("http", "127.0.0.1", port, page); CrawlDatum crawlDatum = new CrawlDatum(); Response response = http.getResponse(url, crawlDatum, true); ProtocolOutput out = http.getProtocolOutput(new Text(url.toString()), crawlDatum); Content content = out.getContent(); assertEquals("HTTP Status Code for " + url, expectedCode, response.getCode()); if (page.compareTo("/nonexists.html") != 0 && page.compareTo("/brokenpage.jsp") != 0 && page.compareTo("/redirection") != 0) { assertEquals("ContentType " + url, "text/html", content.getContentType()); } } }