/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nutch.util; import java.io.IOException; import java.util.Collections; import java.util.HashMap; import java.util.Map; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.mapreduce.Job; import org.apache.nutch.metadata.Nutch; public abstract class NutchTool extends Configured { protected HashMap<String, Object> results = new HashMap<>(); protected Map<String, Object> status = Collections .synchronizedMap(new HashMap<String, Object>()); protected Job currentJob; protected int numJobs; protected int currentJobNum; /** * Runs the tool, using a map of arguments. May return results, or null. */ public abstract Map<String, Object> run(Map<String, Object> args, String crawlId) throws Exception; public NutchTool(Configuration conf){ super(conf); } public NutchTool(){ super(null); } /** Returns relative progress of the tool, a float in range [0,1]. */ public float getProgress() { float res = 0; if (currentJob != null) { try { res = (currentJob.mapProgress() + currentJob.reduceProgress()) / 2.0f; } catch (IOException e) { e.printStackTrace(); res = 0; } catch (IllegalStateException ile) { ile.printStackTrace(); res = 0; } } // take into account multiple jobs if (numJobs > 1) { res = (currentJobNum + res) / (float) numJobs; } status.put(Nutch.STAT_PROGRESS, res); return res; } /** Returns current status of the running tool. */ public Map<String, Object> getStatus() { return status; } /** * Stop the job with the possibility to resume. Subclasses should override * this, since by default it calls {@link #killJob()}. * * @return true if succeeded, false otherwise */ public boolean stopJob() throws Exception { return killJob(); } /** * Kill the job immediately. Clients should assume that any results that the * job produced so far are in inconsistent state or missing. * * @return true if succeeded, false otherwise. * @throws Exception */ public boolean killJob() throws Exception { if (currentJob != null && !currentJob.isComplete()) { try { currentJob.killJob(); return true; } catch (Exception e) { e.printStackTrace(); return false; } } return false; } }