/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nutch.analysis;
import org.apache.nutch.searcher.Query;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.util.NutchConfiguration;
import junit.framework.TestCase;
/**
* JUnit tests for query parser
*
*/
public class TestQueryParser extends TestCase {
private static Configuration conf = NutchConfiguration.create();
public void assertQueryEquals(String query, String result) throws Exception {
try {
Query q = NutchAnalysis.parseQuery(query, conf);
String s = q.toString();
if (!s.equals(result)) {
fail("Query /" + query + "/ yielded /" + s + "/, expecting /" + result
+ "/");
}
} catch (Exception e) {
throw new Exception("error: While parsing query:" + query, e);
}
}
/**
* Test query parser
*
* @throws Exception
*/
public void testParseQuery() throws Exception {
//simple tests
assertQueryEquals("x", "x");
assertQueryEquals("X", "x");
assertQueryEquals("+x", "x");
assertQueryEquals("-x", "-x");
assertQueryEquals("x y", "x y");
assertQueryEquals(" x y ", "x y");
assertQueryEquals("test +", "test");
// missing fourth double quote
assertQueryEquals("\" abc def \" \" def ghi ", "\"abc def\" \"def ghi\"");
//empty query
assertQueryEquals("\"", "");
//fields
assertQueryEquals("field:x -another:y", "field:x -another:y");
assertQueryEquals("the:x", "the:x");
//ACRONYM
assertQueryEquals("w.s.o.p.", "wsop");
//STOPWORD
assertQueryEquals("the", "");
assertQueryEquals("field:the -y", "field:the -y");
assertQueryEquals("\"the y\"", "\"the y\"");
assertQueryEquals("+the -y", "the -y");
//PHRASE
assertQueryEquals("\"hello world\"", "\"hello world\"");
assertQueryEquals("\"phrase a.b.c. phrase\"", "\"phrase abc phrase\"");
assertQueryEquals("\"the end\"", "\"the end\"");
assertQueryEquals("term\"the end\"", "term \"the end\"");
//unbalanced
assertQueryEquals("term\"the end", "term \"the end\"");
//SIGRAM
assertQueryEquals("\u3040\u3041\u3042", "\u3040 \u3041 \u3042");
//COMPOUND
assertQueryEquals("term some.email@adress.here",
"term \"some email adress here\"");
}
}