package com.github.sefler1987.javaworker.worker.linear;
import java.util.HashSet;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import com.github.sefler1987.javaworker.worker.WorkerTask;
/**
* 给定一个PageURL, 挖掘这个目标URL上的所有URL, 以及更进一层的挖掘...
*/
public class PageURLMiningTask extends WorkerTask<HashSet<String>> {
private static final int NO_PRIORITY = 0;
//一个URL会挖掘出好多相关的任务.比如提供一个网页,则这个网页会有其他的连接地址
//这些地址都是需要挖掘的. 因此由一个targetURL构成的任务, 它有一堆等待挖掘的URLs
private HashSet<String> minedURLs = new HashSet<String>();
//挖掘目标URL
private String targetURL;
public PageURLMiningTask(String targetURL) {
super(NO_PRIORITY);
this.targetURL = targetURL;
}
@Override
public boolean cancel(boolean mayInterruptIfRunning) {
throw new UnsupportedOperationException("Not implemented yet");
}
@Override
public boolean isCancelled() {
throw new UnsupportedOperationException("Not implemented yet");
}
@Override
public synchronized HashSet<String> get() throws InterruptedException, ExecutionException {
if (!isDone()) {
wait();
}
return minedURLs;
}
@Override
public synchronized HashSet<String> get(long timeout, TimeUnit unit) throws InterruptedException,
ExecutionException, TimeoutException {
if (!isDone()) {
wait(unit.toMillis(timeout));
}
return minedURLs;
}
public HashSet<String> getMinedURLs() {
return minedURLs;
}
//当找到一个新的URL时, 要将其加入到待挖掘的URLs中
public void addMinedURL(String url) {
minedURLs.add(url);
}
public String getTargetURL() {
return targetURL;
}
public void setTargetURL(String targetURL) {
this.targetURL = targetURL;
}
}