/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nutch.webapp.controller; import java.io.IOException; import javax.servlet.ServletContext; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import org.apache.nutch.clustering.ClusteringPresearchExtension; import org.apache.nutch.clustering.Clusters; import org.apache.nutch.clustering.HitsCluster; import org.apache.nutch.clustering.OnlineClusterer; import org.apache.nutch.clustering.OnlineClustererFactory; import org.apache.nutch.plugin.PluginRuntimeException; import org.apache.nutch.searcher.HitDetails; import org.apache.nutch.searcher.Summary; import org.apache.nutch.webapp.common.ServiceLocator; import org.apache.nutch.webapp.common.Startable; import org.apache.struts.tiles.ComponentContext; public class ClusteringController extends NutchController implements Startable { public static final String REQ_ATTR_CLUSTERS = "clusters"; static OnlineClusterer clusterer = null; public void nutchPerform(ComponentContext tileContext, HttpServletRequest request, HttpServletResponse response, ServletContext servletContext) throws ServletException, IOException { ServiceLocator locator = getServiceLocator(request); if (ClusteringPresearchExtension.isClusteringActive(locator)) { // display top N clusters and top Q documents inside them. int N = locator.getConfiguration().getInt( "extension.clustering.cluster-count", 10); int Q = locator.getConfiguration().getInt( "extension.clustering.cluster-top-documents-count", 3); int maxLabels = 2; HitDetails[] details = locator.getSearch().getDetails(); Summary[] summaries = locator.getSearch().getSummaries(); HitsCluster[] clusters = null; if (clusterer != null) { final long clusteringStart = System.currentTimeMillis(); try { clusters = clusterer.clusterHits(details, Summary .toStrings(summaries)); final long clusteringDuration = System.currentTimeMillis() - clusteringStart; LOG.info("Clustering took: " + clusteringDuration + " milliseconds."); } catch (Exception e) { LOG.info("Could not do clustering???" + e); return; } } // set new limit if fever than N results N = Math.min(N, clusters.length); // set to request Clusters clusterResult = new Clusters(clusters, N, Q, maxLabels); request.setAttribute(REQ_ATTR_CLUSTERS, clusterResult); } } public void start(ServletContext servletContext) { ServiceLocator locator = getServiceLocator(servletContext); try { clusterer = new OnlineClustererFactory(locator.getConfiguration()) .getOnlineClusterer(); } catch (PluginRuntimeException e) { LOG.info("Could not initialize Clusterer, is the plugin enabled?"); return; } } }