/* * This library is part of OpenCms - * the Open Source Content Management System * * Copyright (c) Alkacon Software GmbH (http://www.alkacon.com) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * For further information about Alkacon Software GmbH, please see the * company website: http://www.alkacon.com * * For further information about OpenCms, please see the * project website: http://www.opencms.org * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package org.opencms.search; import org.opencms.main.CmsLog; import org.opencms.search.fields.CmsSearchField; import org.opencms.util.CmsStringUtil; import java.io.IOException; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.TreeMap; import org.apache.commons.logging.Log; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Collector; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Scorer; /** * Collects category information during a search process.<p> * * <b>Please note:</b> The calculation of the category count slows down the search time by an order * of magnitude. Make sure that you only use this feature if it's really required! * Be especially careful if your search result list can become large (> 1000 documents), since in this case * overall system performance will certainly be impacted considerably when calculating the categories.<p> * * @since 6.0.0 */ public class CmsSearchCategoryCollector extends Collector { /** * Class with an increasing counter to avoid multiple look ups and * object creations when dealing with the category count.<p> */ private static class CmsCategroyCount { /** The category count. */ int m_count; /** * Creates a new instance with a initial count of 1.<p> */ CmsCategroyCount() { m_count = 1; } /** * Increases the count by one.<p> */ void inc() { m_count++; } /** * Creates an Integer for this count.<p> * * @return an Integer for this count */ Integer toInteger() { return new Integer(m_count); } } /** Category used in case the document belongs to no category. */ public static final String UNKNOWN_CATEGORY = "unknown"; /** The log object for this class. */ private static final Log LOG = CmsLog.getLog(CmsSearchCategoryCollector.class); /** The internal map of the categories found. */ private Map<String, CmsCategroyCount> m_categories; /** The index searcher used. */ private IndexSearcher m_searcher; /** * Creates a new category search collector instance.<p> * * @param searcher the index searcher used */ public CmsSearchCategoryCollector(IndexSearcher searcher) { super(); m_searcher = searcher; m_categories = new HashMap<String, CmsCategroyCount>(); } /** * Convenience method to format a map of categories in a nice 2 column list, for example * for display of debugging output.<p> * * @param categories the map to format * @return the formatted category map */ public static final String formatCategoryMap(Map<String, Integer> categories) { StringBuffer result = new StringBuffer(256); result.append("Total categories: "); result.append(categories.size()); result.append('\n'); Iterator<Map.Entry<String, Integer>> i = categories.entrySet().iterator(); while (i.hasNext()) { Map.Entry<String, Integer> entry = i.next(); result.append(CmsStringUtil.padRight(entry.getKey(), 30)); result.append(entry.getValue().intValue()); result.append('\n'); } return result.toString(); } /** * @see org.apache.lucene.search.Collector#acceptsDocsOutOfOrder() */ @Override public boolean acceptsDocsOutOfOrder() { // we just count hits and these don't need to be ordered return true; } /** * @see org.apache.lucene.search.Collector#collect(int) */ @Override public void collect(int id) { String category = null; try { Document doc = m_searcher.doc(id); category = doc.get(CmsSearchField.FIELD_CATEGORY); } catch (IOException e) { // category will be null if (LOG.isDebugEnabled()) { LOG.debug(Messages.get().getBundle().key(Messages.LOG_READ_CATEGORY_FAILED_1, new Integer(id)), e); } } if (category == null) { category = UNKNOWN_CATEGORY; } CmsCategroyCount count = m_categories.get(category); if (count != null) { count.inc(); } else { count = new CmsCategroyCount(); m_categories.put(category, count); } } /** * Returns the category count result, the returned map * contains Strings (category names) mapped to an Integer (the count).<p> * * @return the category count result */ public Map<String, Integer> getCategoryCountResult() { Map<String, Integer> result = new TreeMap<String, Integer>(); Iterator<Map.Entry<String, CmsCategroyCount>> i = m_categories.entrySet().iterator(); while (i.hasNext()) { Map.Entry<String, CmsCategroyCount> entry = i.next(); result.put(entry.getKey(), entry.getValue().toInteger()); } return result; } /** * @see org.apache.lucene.search.Collector#setNextReader(org.apache.lucene.index.IndexReader, int) */ @Override public void setNextReader(IndexReader reader, int docBase) { // ignored, we just count hits } /** * @see org.apache.lucene.search.Collector#setScorer(org.apache.lucene.search.Scorer) */ @Override public void setScorer(Scorer arg0) { // ignored, we don't need a scorer } /** * @see java.lang.Object#toString() */ @Override public String toString() { return formatCategoryMap(getCategoryCountResult()); } }