/*
* Copyright (C) 2003-2008 eXo Platform SAS.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Affero General Public License
* as published by the Free Software Foundation; either version 3
* of the License, or (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with this program; if not, see<http://www.gnu.org/licenses/>.
*/
package org.exoplatform.services.jcr.impl.core.query;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.Version;
import org.exoplatform.services.jcr.impl.core.NodeImpl;
import org.exoplatform.services.jcr.impl.core.RepositoryImpl;
import org.exoplatform.services.jcr.impl.core.query.lucene.FieldNames;
import org.exoplatform.services.jcr.impl.core.query.lucene.IndexingConfigurationImpl;
import org.exoplatform.services.jcr.impl.core.query.lucene.SearchIndex;
import org.exoplatform.services.jcr.impl.core.query.lucene.Util;
import org.exoplatform.services.log.ExoLogger;
import org.exoplatform.services.log.Log;
import java.io.IOException;
import javax.jcr.Node;
import javax.jcr.RepositoryException;
import javax.jcr.Session;
/**
* Created by The eXo Platform SAS.
*
* @author <a href="mailto:Sergey.Kabashnyuk@gmail.com">Sergey Kabashnyuk</a>
* @version $Id: TestIndexingConfig.java 12051 2008-03-18 13:47:22Z serg $
*/
public class TestIndexingConfig extends BaseQueryTest
{
private final String workspaceName = "ws2";
private final String repositoryName = "db1tck";
public final String testString1 = "The quick brown fox jumped over the lazy dogs";
public final String testString2 = "XY&Z Corporation - xyz@example.com";
public final String simple = "simpleAnalyzer";
public final String whitespace = "whitespaceAnalyzer";
public final String stop = "stopAnalyzer";
public final String def = "defaultAnalyzer"; // there might
// be standard
// analyzer
Node testRoot = null;
private SearchManager searchManager;
private SearchIndex searchIndex;
private Session testSession;
@Override
public void setUp() throws Exception
{
super.setUp();
RepositoryImpl db1tckRepo = (RepositoryImpl)repositoryService.getRepository(repositoryName);
assertNotNull(db1tckRepo);
testSession = db1tckRepo.login(credentials, workspaceName);
searchManager = (SearchManager)db1tckRepo.getWorkspaceContainer(workspaceName).getComponent(SearchManager.class);
assertNotNull(searchManager);
searchIndex = (SearchIndex)(searchManager.getHandler());
assertNotNull(searchIndex);
IndexingConfigurationImpl indexingConfigurationImpl = (IndexingConfigurationImpl)searchIndex.getIndexingConfig();
assertNotNull(indexingConfigurationImpl);
indexingConfigurationImpl.addPropertyAnalyzer("FULL:" + simple, new SimpleAnalyzer(Version.LUCENE_36));
indexingConfigurationImpl.addPropertyAnalyzer("FULL:" + whitespace, new WhitespaceAnalyzer(Version.LUCENE_36));
indexingConfigurationImpl.addPropertyAnalyzer("FULL:" + stop, new StopAnalyzer(Version.LUCENE_36));
testRoot = testSession.getRootNode().addNode("testrootAnalyzers");
root.save();
}
@Override
public void tearDown() throws Exception
{
testRoot.remove();
testSession.save();
super.tearDown();
}
public void testSimplePropertyAnalyzer() throws Exception
{
try
{
NodeImpl testNode1 = (NodeImpl)testRoot.addNode("node1");
testNode1.setProperty(simple, testString1);
Node testNode2 = testRoot.addNode("node2");
testNode2.setProperty(simple, testString2);
testSession.save();
// Test is there are all terms
// There must be [the] [quick] [brown] [fox] [jumped] [over] [the] [lazy] [dogs]
// in Node1
ScoreDoc doc = this.getDocument(testNode1.getInternalIdentifier(), false);
assertNotNull(doc);
TermQuery the = new TermQuery(new Term("FULL:" + simple, "the"));
TermQuery quick = new TermQuery(new Term("FULL:" + simple, "quick"));
TermQuery brown = new TermQuery(new Term("FULL:" + simple, "brown"));
TermQuery fox = new TermQuery(new Term("FULL:" + simple, "fox"));
TermQuery jumped = new TermQuery(new Term("FULL:" + simple, "jumped"));
TermQuery over = new TermQuery(new Term("FULL:" + simple, "over"));
TermQuery lazy = new TermQuery(new Term("FULL:" + simple, "lazy"));
TermQuery dogs = new TermQuery(new Term("FULL:" + simple, "dogs"));
BooleanQuery compl = new BooleanQuery();
compl.add(the, Occur.MUST);
compl.add(quick, Occur.MUST);
compl.add(brown, Occur.MUST);
compl.add(fox, Occur.MUST);
compl.add(jumped, Occur.MUST);
compl.add(over, Occur.MUST);
compl.add(lazy, Occur.MUST);
compl.add(dogs, Occur.MUST);
IndexReader ir = searchIndex.getIndexReader();
IndexSearcher is = new IndexSearcher(ir);
TopDocs search = is.search(compl, null, Integer.MAX_VALUE);
assertEquals(1, search.totalHits);
// Test is there are all terms
// There must be [xy] [z] [corporation] [xyz] [example] [com]
// in Node2
TermQuery xy = new TermQuery(new Term("FULL:" + simple, "xy"));
TermQuery z = new TermQuery(new Term("FULL:" + simple, "z"));
TermQuery corporation = new TermQuery(new Term("FULL:" + simple, "corporation"));
TermQuery xyz = new TermQuery(new Term("FULL:" + simple, "xyz"));
TermQuery example = new TermQuery(new Term("FULL:" + simple, "example"));
TermQuery com = new TermQuery(new Term("FULL:" + simple, "com"));
compl = new BooleanQuery();
compl.add(xy, Occur.MUST);
compl.add(z, Occur.MUST);
compl.add(corporation, Occur.MUST);
compl.add(xyz, Occur.MUST);
compl.add(example, Occur.MUST);
compl.add(com, Occur.MUST);
search = is.search(compl, null, Integer.MAX_VALUE);
assertEquals(1, search.totalHits);
is.close();
Util.closeOrRelease(ir);
}
catch (Exception e)
{
e.printStackTrace();
throw e;
}
}
public void testWhitespacePropertyAnalyzer() throws Exception
{
try
{
NodeImpl testNode1 = (NodeImpl)testRoot.addNode("node1");
testNode1.setProperty(whitespace, testString1);
Node testNode2 = testRoot.addNode("node2");
testNode2.setProperty(whitespace, testString2);
testSession.save();
// Test is there are all terms
// There must be [The] [quick] [brown] [fox] [jumped] [over] [the] [lazy] [dogs]
// in Node1
TermQuery The = new TermQuery(new Term("FULL:" + whitespace, "The"));
TermQuery quick = new TermQuery(new Term("FULL:" + whitespace, "quick"));
TermQuery brown = new TermQuery(new Term("FULL:" + whitespace, "brown"));
TermQuery fox = new TermQuery(new Term("FULL:" + whitespace, "fox"));
TermQuery jumped = new TermQuery(new Term("FULL:" + whitespace, "jumped"));
TermQuery over = new TermQuery(new Term("FULL:" + whitespace, "over"));
TermQuery the = new TermQuery(new Term("FULL:" + whitespace, "the"));
TermQuery lazy = new TermQuery(new Term("FULL:" + whitespace, "lazy"));
TermQuery dogs = new TermQuery(new Term("FULL:" + whitespace, "dogs"));
BooleanQuery compl = new BooleanQuery();
compl.add(The, Occur.MUST);
compl.add(quick, Occur.MUST);
compl.add(brown, Occur.MUST);
compl.add(fox, Occur.MUST);
compl.add(jumped, Occur.MUST);
compl.add(over, Occur.MUST);
compl.add(the, Occur.MUST);
compl.add(lazy, Occur.MUST);
compl.add(dogs, Occur.MUST);
IndexReader ir = searchIndex.getIndexReader();
IndexSearcher is = new IndexSearcher(ir);
TopDocs search = is.search(compl, null, Integer.MAX_VALUE);
assertEquals(1, search.totalHits);
// Test is there are all terms
// There must be [XY&Z] [Corporation] [-] [xyz@example.com]
// in Node2
TermQuery XYandZ = new TermQuery(new Term("FULL:" + whitespace, "XY&Z"));
TermQuery corporation = new TermQuery(new Term("FULL:" + whitespace, "Corporation"));
TermQuery defiz = new TermQuery(new Term("FULL:" + whitespace, "-"));
TermQuery example = new TermQuery(new Term("FULL:" + whitespace, "xyz@example.com"));
compl = new BooleanQuery();
compl.add(XYandZ, Occur.MUST);
compl.add(corporation, Occur.MUST);
compl.add(defiz, Occur.MUST);
compl.add(example, Occur.MUST);
search = is.search(compl, null, Integer.MAX_VALUE);
assertEquals(1, search.totalHits);
is.close();
Util.closeOrRelease(ir);
}
catch (Exception e)
{
e.printStackTrace();
throw e;
}
}
public void testStopPropertyAnalyzer() throws Exception
{
try
{
NodeImpl testNode1 = (NodeImpl)testRoot.addNode("node1");
testNode1.setProperty(stop, testString1);
Node testNode2 = testRoot.addNode("node2");
testNode2.setProperty(stop, testString2);
testSession.save();
// Test is there are all terms
// There must be [quick] [brown] [fox] [jumped] [over] [lazy] [dogs]
// in Node1
TermQuery quick = new TermQuery(new Term("FULL:" + stop, "quick"));
TermQuery brown = new TermQuery(new Term("FULL:" + stop, "brown"));
TermQuery fox = new TermQuery(new Term("FULL:" + stop, "fox"));
TermQuery jumped = new TermQuery(new Term("FULL:" + stop, "jumped"));
TermQuery over = new TermQuery(new Term("FULL:" + stop, "over"));
TermQuery lazy = new TermQuery(new Term("FULL:" + stop, "lazy"));
TermQuery dogs = new TermQuery(new Term("FULL:" + stop, "dogs"));
BooleanQuery compl = new BooleanQuery();
compl.add(quick, Occur.MUST);
compl.add(brown, Occur.MUST);
compl.add(fox, Occur.MUST);
compl.add(jumped, Occur.MUST);
compl.add(over, Occur.MUST);
compl.add(lazy, Occur.MUST);
compl.add(dogs, Occur.MUST);
IndexReader ir = searchIndex.getIndexReader();
IndexSearcher is = new IndexSearcher(ir);
TopDocs search = is.search(compl, null, Integer.MAX_VALUE);
assertEquals(1, search.totalHits);
// Test is there are all terms
// There must be [xy] [z] [corporation] [xyz] [example] [com]
// in Node2
TermQuery xy = new TermQuery(new Term("FULL:" + stop, "xy"));
TermQuery z = new TermQuery(new Term("FULL:" + stop, "z"));
TermQuery corporation = new TermQuery(new Term("FULL:" + stop, "corporation"));
TermQuery xyz = new TermQuery(new Term("FULL:" + stop, "xyz"));
TermQuery example = new TermQuery(new Term("FULL:" + stop, "example"));
TermQuery com = new TermQuery(new Term("FULL:" + stop, "com"));
compl = new BooleanQuery();
compl.add(xy, Occur.MUST);
compl.add(z, Occur.MUST);
compl.add(corporation, Occur.MUST);
compl.add(xyz, Occur.MUST);
compl.add(example, Occur.MUST);
compl.add(com, Occur.MUST);
search = is.search(compl, null, Integer.MAX_VALUE);
assertEquals(1, search.totalHits);
is.close();
Util.closeOrRelease(ir);
}
catch (Exception e)
{
e.printStackTrace();
throw e;
}
}
public void testDefaultPropertyAnalyzer() throws Exception
{
try
{
// StandardAnalyzer used for default
NodeImpl testNode1 = (NodeImpl)testRoot.addNode("node1");
testNode1.setProperty(def, testString1);
Node testNode2 = testRoot.addNode("node2");
testNode2.setProperty(def, testString2);
testSession.save();
// Test is there are all terms
// There must be [quick] [brown] [fox] [jumped] [over] [lazy] [dogs]
// in Node1
TermQuery the = new TermQuery(new Term("FULL:" + def, "the"));
TermQuery quick = new TermQuery(new Term("FULL:" + def, "quick"));
TermQuery brown = new TermQuery(new Term("FULL:" + def, "brown"));
TermQuery fox = new TermQuery(new Term("FULL:" + def, "fox"));
TermQuery jumped = new TermQuery(new Term("FULL:" + def, "jumped"));
TermQuery over = new TermQuery(new Term("FULL:" + def, "over"));
TermQuery lazy = new TermQuery(new Term("FULL:" + def, "lazy"));
TermQuery dogs = new TermQuery(new Term("FULL:" + def, "dogs"));
BooleanQuery compl = new BooleanQuery();
compl.add(the, Occur.MUST);
compl.add(quick, Occur.MUST);
compl.add(brown, Occur.MUST);
compl.add(fox, Occur.MUST);
compl.add(jumped, Occur.MUST);
compl.add(over, Occur.MUST);
compl.add(lazy, Occur.MUST);
compl.add(dogs, Occur.MUST);
IndexReader ir = searchIndex.getIndexReader();
IndexSearcher is = new IndexSearcher(ir);
TopDocs search = is.search(compl, null, Integer.MAX_VALUE);
assertEquals(1, search.totalHits);
// Test is there are all terms
// Terms [xy&z] [corporation] [xyz@example] [com] - it's a default
// lucene StandardAnalyzer with own stop words set.
// In our case, there are StandardAnalyzer with empty stop words set, so
// there must be terms : [corporation] [xy&z] [xyz@example.com]
TermQuery xy = new TermQuery(new Term("FULL:" + def, "xy&z"));
TermQuery corporation = new TermQuery(new Term("FULL:" + def, "corporation"));
TermQuery com = new TermQuery(new Term("FULL:" + def, "xyz@example.com"));
compl = new BooleanQuery();
compl.add(xy, Occur.MUST);
compl.add(corporation, Occur.MUST);
compl.add(com, Occur.MUST);
search = is.search(compl, null, Integer.MAX_VALUE);
assertEquals(1, search.totalHits);
is.close();
Util.closeOrRelease(ir);
}
catch (Exception e)
{
e.printStackTrace();
throw e;
}
}
protected ScoreDoc getDocument(String nodeIdentifer, boolean includeSystemIndex) throws IOException,
RepositoryException
{
IndexReader reader = ((SearchIndex)searchManager.getHandler()).getIndexReader();
IndexSearcher is = new IndexSearcher(reader);
TermQuery query = new TermQuery(new Term(FieldNames.UUID, nodeIdentifer));
TopDocs topDocs = is.search(query, null, Integer.MAX_VALUE);
try
{
if (topDocs.totalHits == 1)
{
return topDocs.scoreDocs[0];
}
else if (topDocs.totalHits > 1)
{
throw new RepositoryException("Results more then one");
}
}
finally
{
is.close();
Util.closeOrRelease(reader);
}
return null;
}
}