/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nutch.parse;
// JUnit imports
import junit.framework.TestCase;
// Nutch imports
import org.apache.nutch.plugin.Extension;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.util.NutchConfiguration;
/**
* Unit test for new parse plugin selection.
*
* @author Sebastien Le Callonnec
* @version 1.0
*/
public class TestParserFactory extends TestCase {
private Configuration conf;
private ParserFactory parserFactory;
public TestParserFactory(String name) { super(name); }
/** Inits the Test Case with the test parse-plugin file */
protected void setUp() throws Exception {
conf = NutchConfiguration.create();
conf.set("plugin.includes", ".*");
conf.set("parse.plugin.file",
"org/apache/nutch/parse/parse-plugin-test.xml");
parserFactory = new ParserFactory(conf);
}
/** Unit test for <code>getExtensions(String)</code> method. */
public void testGetExtensions() throws Exception {
Extension ext = (Extension)parserFactory.getExtensions("text/html").get(0);
assertEquals("parse-html", ext.getDescriptor().getPluginId());
ext = (Extension) parserFactory.getExtensions("text/html; charset=ISO-8859-1").get(0);
assertEquals("parse-html", ext.getDescriptor().getPluginId());
ext = (Extension)parserFactory.getExtensions("foo/bar").get(0);
assertEquals("parse-text", ext.getDescriptor().getPluginId());
}
/** Unit test to check <code>getParsers</code> method */
public void testGetParsers() throws Exception {
Parser [] parsers = parserFactory.getParsers("text/html", "http://foo.com");
assertNotNull(parsers);
assertEquals(1, parsers.length);
assertEquals("org.apache.nutch.parse.html.HtmlParser",
parsers[0].getClass().getName());
parsers = parserFactory.getParsers("text/html; charset=ISO-8859-1",
"http://foo.com");
assertNotNull(parsers);
assertEquals(1, parsers.length);
assertEquals("org.apache.nutch.parse.html.HtmlParser",
parsers[0].getClass().getName());
parsers = parserFactory.getParsers("application/x-javascript",
"http://foo.com");
assertNotNull(parsers);
assertEquals(1, parsers.length);
assertEquals("org.apache.nutch.parse.js.JSParseFilter",
parsers[0].getClass().getName());
parsers = parserFactory.getParsers("text/plain", "http://foo.com");
assertNotNull(parsers);
assertEquals(1, parsers.length);
assertEquals("org.apache.nutch.parse.text.TextParser",
parsers[0].getClass().getName());
Parser parser1 = parserFactory.getParsers("text/plain", "http://foo.com")[0];
Parser parser2 = parserFactory.getParsers("*", "http://foo.com")[0];
assertEquals("Different instances!", parser1.hashCode(), parser2.hashCode());
//test and make sure that the rss parser is loaded even though its plugin.xml
//doesn't claim to support text/rss, only application/rss+xml
parsers = parserFactory.getParsers("text/rss","http://foo.com");
assertNotNull(parsers);
assertEquals(1,parsers.length);
assertEquals("org.apache.nutch.parse.rss.RSSParser",
parsers[0].getClass().getName());
}
}