/*
* This file is part of the Heritrix web crawler (crawler.archive.org).
*
* Licensed to the Internet Archive (IA) by one or more individual
* contributors.
*
* The IA licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.archive.crawler.framework;
import java.io.PrintWriter;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.commons.lang.StringUtils;
import org.archive.crawler.reporting.AlertThreadGroup;
import org.archive.util.ArchiveUtils;
import org.archive.util.Histotable;
import org.archive.util.Reporter;
/**
* A collection of ToeThreads. The class manages the ToeThreads currently
* running. It offers methods for increasing and decreasing their
* number, keeping track of their state and (not necessarily safely)
* killing hung threads.
*
* @author Gordon Mohr
* @author Kristinn Sigurdsson
*
* @see org.archive.crawler.framework.ToeThread
*/
public class ToePool extends ThreadGroup implements Reporter {
/** run worker thread slightly lower than usual */
public static int DEFAULT_TOE_PRIORITY = Thread.NORM_PRIORITY - 1;
protected CrawlController controller;
protected int nextSerialNumber = 1;
protected int targetSize = 0;
/**
* Constructor. Creates a pool of ToeThreads.
*
* @param c A reference to the CrawlController for the current crawl.
*/
public ToePool(AlertThreadGroup atg, CrawlController c) {
super(atg, "ToeThreads");
this.controller = c;
setDaemon(true);
}
public void cleanup() {
// force all Toes waiting on queues, etc to proceed
Thread[] toes = getToes();
for(Thread toe : toes) {
if(toe!=null) {
toe.interrupt();
}
}
// see HER-2036
this.controller = null;
}
/**
* @return The number of ToeThreads that are not available (Approximation).
*/
public int getActiveToeCount() {
Thread[] toes = getToes();
int count = 0;
for (int i = 0; i < toes.length; i++) {
if((toes[i] instanceof ToeThread) &&
((ToeThread)toes[i]).isActive()) {
count++;
}
}
return count;
}
/**
* @return The number of ToeThreads. This may include killed ToeThreads
* that were not replaced.
*/
public int getToeCount() {
Thread[] toes = getToes();
int count = 0;
for (int i = 0; i<toes.length; i++) {
if((toes[i] instanceof ToeThread)) {
count++;
}
}
return count;
}
private Thread[] getToes() {
Thread[] toes = new Thread[activeCount()+10];
this.enumerate(toes);
return toes;
}
/**
* Change the number of ToeThreads.
*
* @param newsize The new number of ToeThreads.
*/
public void setSize(int newsize)
{
targetSize = newsize;
int difference = newsize - getToeCount();
if (difference > 0) {
// must create threads
for(int i = 1; i <= difference; i++) {
startNewThread();
}
} else {
// must retire extra threads
int retainedToes = targetSize;
Thread[] toes = this.getToes();
for (int i = 0; i < toes.length ; i++) {
if(!(toes[i] instanceof ToeThread)) {
continue;
}
retainedToes--;
if (retainedToes>=0) {
continue; // this toe is spared
}
// otherwise:
ToeThread tt = (ToeThread)toes[i];
tt.retire();
}
}
}
/**
* Kills specified thread. Killed thread can be optionally replaced with a
* new thread.
*
* <p><b>WARNING:</b> This operation should be used with great care. It may
* destabilize the crawler.
*
* @param threadNumber Thread to kill
* @param replace If true then a new thread will be created to take the
* killed threads place. Otherwise the total number of threads
* will decrease by one.
*/
public void killThread(int threadNumber, boolean replace){
Thread[] toes = getToes();
for (int i = 0; i< toes.length; i++) {
if(! (toes[i] instanceof ToeThread)) {
continue;
}
ToeThread toe = (ToeThread) toes[i];
if(toe.getSerialNumber()==threadNumber) {
toe.kill();
}
}
if(replace){
// Create a new toe thread to take its place. Replace toe
startNewThread();
}
}
private synchronized void startNewThread() {
ToeThread newThread = new ToeThread(this, nextSerialNumber++);
newThread.setPriority(DEFAULT_TOE_PRIORITY);
newThread.start();
}
/**
* @return Instance of CrawlController.
*/
public CrawlController getController() {
return controller;
}
//
// Reporter implementation
//
@Override
public void reportTo(PrintWriter writer) {
writer.print("Toe threads report - "
+ ArchiveUtils.get12DigitDate() + "\n");
writer.print(" Job being crawled: "
+ this.controller.getMetadata().getJobName() + "\n");
writer.print(" Number of toe threads in pool: " + getToeCount() + " ("
+ getActiveToeCount() + " active)\n\n");
Thread[] toes = this.getToes();
synchronized (toes) {
for (int i = 0; i < toes.length; i++) {
if (!(toes[i] instanceof ToeThread)) {
continue;
}
ToeThread tt = (ToeThread) toes[i];
if (tt != null) {
tt.reportTo(writer);
}
}
}
}
public void compactReportTo(PrintWriter writer) {
writer.print(getToeCount() + " threads (" + getActiveToeCount()
+ " active)\n");
Thread[] toes = this.getToes();
boolean legendWritten = false;
// TODO: sort by activity: those with curi the longest at front
synchronized (toes) {
for (int i = 0; i < toes.length; i++) {
if (!(toes[i] instanceof ToeThread)) {
continue;
}
ToeThread tt = (ToeThread) toes[i];
if (tt != null) {
if(!legendWritten) {
writer.println(tt.shortReportLegend());
legendWritten = true;
}
tt.shortReportLineTo(writer);
}
}
}
}
@Override
public Map<String, Object> shortReportMap() {
Histotable<Object> steps = new Histotable<Object>();
Histotable<Object> processors = new Histotable<Object>();
Thread[] toes = getToes();
for (int i = 0; i < toes.length; i++) {
if(!(toes[i] instanceof ToeThread)) {
continue;
}
ToeThread tt = (ToeThread)toes[i];
if(tt!=null) {
steps.tally(tt.getStep().toString());
String currentProcessorName = tt.getCurrentProcessorName();
if (StringUtils.isEmpty(currentProcessorName)) {
currentProcessorName = "noActiveProcessor";
}
processors.tally(currentProcessorName);
}
}
Map<String,Object> data = new LinkedHashMap<String, Object>();
data.put("toeCount", getToeCount());
LinkedList<String> unwound = new LinkedList<String>();
for (Entry<?, Long> step: steps.getSortedByCounts()) {
unwound.add(step.getValue() + " " + step.getKey());
}
data.put("steps", unwound);
unwound = new LinkedList<String>();
for (Entry<?, Long> proc: processors.getSortedByCounts()) {
unwound.add(proc.getValue() + " " + proc.getKey());
}
data.put("processors", unwound);
return data;
}
@SuppressWarnings("unchecked")
@Override
public void shortReportLineTo(PrintWriter w) {
Map<String, Object> map = shortReportMap();
w.print(map.get("toeCount"));
w.print(" threads: ");
LinkedList<String> sortedSteps = (LinkedList<String>)map.get("steps");
{
Iterator<String> iter = sortedSteps.iterator();
if (!iter.hasNext()) {
return;
}
w.print(iter.next());
if (iter.hasNext()) {
w.print(", ");
w.print(iter.next());
if (iter.hasNext()) {
w.print(", etc...");
}
}
w.print("; ");
}
LinkedList<String> sortedProcesses = (LinkedList<String>)map.get("processors");
{
Iterator<String> iter = sortedProcesses.iterator();
if (iter.hasNext()) {
w.print(iter.next());
while (iter.hasNext()) {
w.print(", ");
w.print(iter.next());
}
}
}
}
/* (non-Javadoc)
* @see org.archive.util.Reporter#singleLineLegend()
*/
@Override
public String shortReportLegend() {
return "total: mostCommonStateTotal secondMostCommonStateTotal";
}
public void waitForAll() {
while (true) try {
if (isAllAlive(getToes())) {
return;
}
Thread.sleep(1000);
} catch (InterruptedException e) {
throw new IllegalStateException(e);
}
}
private static boolean isAllAlive(Thread[] threads) {
for (Thread t: threads) {
if ((t != null) && (!t.isAlive())) {
return false;
}
}
return true;
}
}