/*
* This library is part of OpenCms -
* the Open Source Content Management System
*
* Copyright (c) Alkacon Software GmbH (http://www.alkacon.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* For further information about Alkacon Software GmbH, please see the
* company website: http://www.alkacon.com
*
* For further information about OpenCms, please see the
* project website: http://www.opencms.org
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package org.opencms.search;
import org.opencms.main.CmsLog;
import org.opencms.search.fields.CmsSearchField;
import org.opencms.util.CmsStringUtil;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
import org.apache.commons.logging.Log;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Scorer;
/**
* Collects category information during a search process.<p>
*
* <b>Please note:</b> The calculation of the category count slows down the search time by an order
* of magnitude. Make sure that you only use this feature if it's really required!
* Be especially careful if your search result list can become large (> 1000 documents), since in this case
* overall system performance will certainly be impacted considerably when calculating the categories.<p>
*
* @since 6.0.0
*/
public class CmsSearchCategoryCollector extends Collector {
/**
* Class with an increasing counter to avoid multiple look ups and
* object creations when dealing with the category count.<p>
*/
private static class CmsCategroyCount {
/** The category count. */
int m_count;
/**
* Creates a new instance with a initial count of 1.<p>
*/
CmsCategroyCount() {
m_count = 1;
}
/**
* Increases the count by one.<p>
*/
void inc() {
m_count++;
}
/**
* Creates an Integer for this count.<p>
*
* @return an Integer for this count
*/
Integer toInteger() {
return new Integer(m_count);
}
}
/** Category used in case the document belongs to no category. */
public static final String UNKNOWN_CATEGORY = "unknown";
/** The log object for this class. */
private static final Log LOG = CmsLog.getLog(CmsSearchCategoryCollector.class);
/** The internal map of the categories found. */
private Map<String, CmsCategroyCount> m_categories;
/** The index searcher used. */
private IndexSearcher m_searcher;
/**
* Creates a new category search collector instance.<p>
*
* @param searcher the index searcher used
*/
public CmsSearchCategoryCollector(IndexSearcher searcher) {
super();
m_searcher = searcher;
m_categories = new HashMap<String, CmsCategroyCount>();
}
/**
* Convenience method to format a map of categories in a nice 2 column list, for example
* for display of debugging output.<p>
*
* @param categories the map to format
* @return the formatted category map
*/
public static final String formatCategoryMap(Map<String, Integer> categories) {
StringBuffer result = new StringBuffer(256);
result.append("Total categories: ");
result.append(categories.size());
result.append('\n');
Iterator<Map.Entry<String, Integer>> i = categories.entrySet().iterator();
while (i.hasNext()) {
Map.Entry<String, Integer> entry = i.next();
result.append(CmsStringUtil.padRight(entry.getKey(), 30));
result.append(entry.getValue().intValue());
result.append('\n');
}
return result.toString();
}
/**
* @see org.apache.lucene.search.Collector#acceptsDocsOutOfOrder()
*/
@Override
public boolean acceptsDocsOutOfOrder() {
// we just count hits and these don't need to be ordered
return true;
}
/**
* @see org.apache.lucene.search.Collector#collect(int)
*/
@Override
public void collect(int id) {
String category = null;
try {
Document doc = m_searcher.doc(id);
category = doc.get(CmsSearchField.FIELD_CATEGORY);
} catch (IOException e) {
// category will be null
if (LOG.isDebugEnabled()) {
LOG.debug(Messages.get().getBundle().key(Messages.LOG_READ_CATEGORY_FAILED_1, new Integer(id)), e);
}
}
if (category == null) {
category = UNKNOWN_CATEGORY;
}
CmsCategroyCount count = m_categories.get(category);
if (count != null) {
count.inc();
} else {
count = new CmsCategroyCount();
m_categories.put(category, count);
}
}
/**
* Returns the category count result, the returned map
* contains Strings (category names) mapped to an Integer (the count).<p>
*
* @return the category count result
*/
public Map<String, Integer> getCategoryCountResult() {
Map<String, Integer> result = new TreeMap<String, Integer>();
Iterator<Map.Entry<String, CmsCategroyCount>> i = m_categories.entrySet().iterator();
while (i.hasNext()) {
Map.Entry<String, CmsCategroyCount> entry = i.next();
result.put(entry.getKey(), entry.getValue().toInteger());
}
return result;
}
/**
* @see org.apache.lucene.search.Collector#setNextReader(org.apache.lucene.index.IndexReader, int)
*/
@Override
public void setNextReader(IndexReader reader, int docBase) {
// ignored, we just count hits
}
/**
* @see org.apache.lucene.search.Collector#setScorer(org.apache.lucene.search.Scorer)
*/
@Override
public void setScorer(Scorer arg0) {
// ignored, we don't need a scorer
}
/**
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
return formatCategoryMap(getCategoryCountResult());
}
}