/** * Copyright 2008 - 2009 Pro-Netics S.P.A. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package it.pronetics.madstore.crawler.impl.grid; import it.pronetics.madstore.crawler.Pipeline; import it.pronetics.madstore.crawler.model.Page; import java.io.Serializable; import java.util.ArrayList; import java.util.Collection; import java.util.List; import org.gridgain.grid.GridException; import org.gridgain.grid.GridJob; import org.gridgain.grid.GridJobAdapter; import org.gridgain.grid.GridJobResult; import org.gridgain.grid.GridTaskSplitAdapter; import org.gridgain.grid.resources.GridSpringResource; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Grid task which splits pipeline-based processing of several pages into several distributed jobs, * each one processing a single page. * * @author Christian Mongillo * @author Sergio Bossa */ public class ProcessorTask extends GridTaskSplitAdapter<Collection<Page>, Collection<ProcessorTaskResult>> { private static final transient Logger LOG = LoggerFactory.getLogger(ProcessorTask.class); @GridSpringResource(resourceName = "pipeline") private transient Pipeline pipeline; public ProcessorTask(Pipeline pipeline) { this.pipeline = pipeline; } @Override protected Collection<? extends GridJob> split(int gridSize, Collection<Page> pages) throws GridException { Collection<GridJob> jobs = new ArrayList<GridJob>(pages.size()); for (final Page page : pages) { jobs.add(new GridJobAdapter() { public Serializable execute() throws GridException { try { Page processedPage = pipeline.execute(page); if (processedPage != null) { ProcessorTaskResult result = new ProcessorTaskResult(processedPage); return result; } else { return null; } } catch (Exception ex) { LOG.error(ex.getMessage(), ex); return null; } } }); } return jobs; } public Collection<ProcessorTaskResult> reduce(List<GridJobResult> jobResults) throws GridException { Collection<ProcessorTaskResult> processorResults = new ArrayList<ProcessorTaskResult>(jobResults.size()); for (GridJobResult jobResult : jobResults) { processorResults.add(jobResult.<ProcessorTaskResult>getData()); } return processorResults; } }