/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import monty.solr.util.MontySolrQueryTestCase;
import monty.solr.util.MontySolrSetup;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.TermQuery;
import org.adsabs.solr.AdsConfig.F;
import org.junit.BeforeClass;
/**
* Test for the normalized_text_ascii type
*
*/
public class TestAdsabsTypeNormalizedTextAscii extends MontySolrQueryTestCase {
@BeforeClass
public static void beforeClass() throws Exception {
makeResourcesVisible(Thread.currentThread().getContextClassLoader(),
new String[] {MontySolrSetup.getMontySolrHome() + "/contrib/examples/adsabs/server/solr/collection1",
MontySolrSetup.getSolrHome() + "/example/solr/collection1"
});
System.setProperty("solr.allow.unsafe.resourceloading", "true");
schemaString = MontySolrSetup.getMontySolrHome()
+ "/contrib/examples/adsabs/server/solr/collection1/schema.xml";
configString = MontySolrSetup.getMontySolrHome()
+ "/contrib/examples/adsabs/server/solr/collection1/solrconfig.xml";
initCore(configString, schemaString, MontySolrSetup.getSolrHome()
+ "/example/solr");
}
public void test() throws Exception {
assertU(addDocs(F.TYPE_NORMALIZED_TEXT_ASCII_FIELDS, "Bílá kobyla skočila přes čtyřista"));
assertU(addDocs(F.TYPE_NORMALIZED_TEXT_ASCII_FIELDS, "třicet-tři stříbrných střech"));
assertU(addDocs(F.TYPE_NORMALIZED_TEXT_ASCII_FIELDS, "A ještě TřistaTřicetTři stříbrných stovek"));
assertU(addDocs(F.TYPE_NORMALIZED_TEXT_ASCII_FIELDS, "Cutri, R"));
assertU(addDocs(F.TYPE_NORMALIZED_TEXT_ASCII_FIELDS, "Cutri,R"));
assertU(addDocs(F.TYPE_NORMALIZED_TEXT_ASCII_FIELDS, "Cutri,.R"));
assertU(addDocs(F.TYPE_NORMALIZED_TEXT_ASCII_FIELDS, "one-jets")); //6.
assertU(addDocs(F.TYPE_NORMALIZED_TEXT_ASCII_FIELDS, "jets-two"));
assertU(addDocs(F.TYPE_NORMALIZED_TEXT_ASCII_FIELDS, "three-jets-four"));
assertU(addDocs(F.TYPE_NORMALIZED_TEXT_ASCII_FIELDS, "five jets"));
assertU(commit("waitSearcher", "true"));
assertQ(req("q", "*:*"), "//*[@numFound='10']");
//dumpDoc(null, F.TYPE_NORMALIZED_TEXT_ASCII_FIELDS);
for (String f: F.TYPE_NORMALIZED_TEXT_ASCII_FIELDS) {
// ascii normalization
assertQueryEquals(req("q", f + ":Bílá", "qt", "aqp"), f+":bila", TermQuery.class);
assertQueryEquals(req("q", f + ":Bila", "qt", "aqp"), f+":bila", TermQuery.class);
assertQueryEquals(req("q", f + ":bila", "qt", "aqp"), f+":bila", TermQuery.class);
assertQ(req("q", f + ":Bílá"), "//*[@numFound='1']", "//doc[1]/str[@name='id'][.='0']");
assertQ(req("q", f + ":Bila"), "//*[@numFound='1']", "//doc[1]/str[@name='id'][.='0']");
assertQ(req("q", f + ":bila"), "//*[@numFound='1']", "//doc[1]/str[@name='id'][.='0']");
// whitespace analyzer & phrases
assertQueryEquals(req("q", f + ":třicet-tři", "qt", "aqp"), f+":tricettri", TermQuery.class);
assertQ(req("q", f + ":třicet-tři"), "//*[@numFound='1']", "//doc[1]/str[@name='id'][.='1']");
assertQueryEquals(req("q", f + ":(třicet tři)", "qt", "aqp"), String.format("+%s:tricet +%s:tri", f, f), BooleanQuery.class);
assertQ(req("q", f + ":(třicet tři)"), "//*[@numFound='0']");
assertQueryEquals(req("q", f + ":\"třicet tři\"", "qt", "aqp"), String.format("%s:\"tricet tri\"", f), PhraseQuery.class);
assertQ(req("q", f + ":\"třicet tři\""), "//*[@numFound='0']");
assertQueryEquals(req("q", f + ":\"třicet tři\"", "qt", "aqp"), String.format("%s:\"tricet tri\"", f), PhraseQuery.class);
assertQ(req("q", f + ":\"třicet tři\""), "//*[@numFound='0']");
assertQueryEquals(req("q", f + ":\"stříbrných střech\"", "qt", "aqp"), String.format("%s:\"stribrnych strech\"", f), PhraseQuery.class);
assertQ(req("q", f + ":\"stříbrných střech\""), "//*[@numFound='1']", "//doc[1]/str[@name='id'][.='1']");
// no WDF
assertQ(req("q", f + ":TřistaTřicetTři"), "//*[@numFound='1']", "//doc[1]/str[@name='id'][.='2']");
assertQ(req("q", f + ":třistatřicettři"), "//*[@numFound='1']", "//doc[1]/str[@name='id'][.='2']");
assertQ(req("q", f + ":TristaTricetTri"), "//*[@numFound='1']", "//doc[1]/str[@name='id'][.='2']");
assertQ(req("q", f + ":\"cutri,\""), "//*[@numFound='1']", "//doc[1]/str[@name='id'][.='3']");
// TODO: I could easily activate this behaviour if we allow ANY field inside AqpDEFOPMarkPlainNodes
//assertQ(req("q", f + ":cutri,r"), "//*[@numFound='1']", "//doc[1]/str[@name='id'][.='4']");
//assertQ(req("q", f + ":cutri,.r"), "//*[@numFound='1']", "//doc[1]/str[@name='id'][.='5']");
assertQ(req("q", f + ":\"five jets\""), "//*[@numFound='1']");
assertQ(req("q", f + ":\"fivejets\""), "//*[@numFound='0']");
assertQ(req("q", f + ":\"onejets\""), "//*[@numFound='1']", "//doc[1]/str[@name='id'][.='6']");
assertQ(req("q", f + ":\"one-jets\""), "//*[@numFound='1']", "//doc[1]/str[@name='id'][.='6']");
assertQ(req("q", f + ":\"*jets\""), "//*[@numFound='2']", "//doc[1]/str[@name='id'][.='6']"); // curiously also finds 'jets' (id 5.)
assertQ(req("q", f + ":\"jets\""), "//*[@numFound='1']", "//doc[1]/str[@name='id'][.='9']");
assertQ(req("q", f + ":\"jets*\""), "//*[@numFound='2']", "//doc[1]/str[@name='id'][.='7']"); // also 'jets' (id 5.)
assertQ(req("q", f + ":\"*jets*\""), "//*[@numFound='4']");
// find only 'jets' (where the word stood alone)
assertQ(req("q", f + ":\"jets\""), "//*[@numFound='1']", "//doc[1]/str[@name='id'][.='9']");
}
}
// Uniquely for Junit 3
public static junit.framework.Test suite() {
return new junit.framework.JUnit4TestAdapter(TestAdsabsTypeNormalizedTextAscii.class);
}
}