package us.codecraft.webmagic;
import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;
/**
* <div class="zh">
* Request对象封装了待抓取的url信息。<br/>
* 在PageProcessor中,Request对象可以通过{@link us.codecraft.webmagic.Page#getRequest()} 获取。<br/>
* <br/>
* Request对象包含一个extra属性,可以写入一些必须的上下文,这个特性在某些场合会有用。<br/>
* <pre>
* Example:
* 抓取<a href="${link}">${linktext}</a>时,希望提取链接link,并保存linktext的信息。
* 在上一个页面:
* public void process(Page page){
* Request request = new Request(link,linktext);
* page.addTargetRequest(request)
* }
* 在下一个页面:
* public void process(Page page){
* String linktext = (String)page.getRequest().getExtra()[0];
* }
* </pre>
* </div>
*
* @author code4crafter@gmail.com <br>
* Date: 13-4-21
* Time: 上午11:37
*/
public class Request implements Serializable {
private static final long serialVersionUID = 2062192774891352043L;
private String url;
/**
* 额外参数,可以保存一些需要的上下文信息
*/
private Map<String, Object> extras;
private double priority;
public Request() {
}
/**
* 构建一个request对象
*
* @param url 必须参数,待抓取的url
*/
public Request(String url) {
this.url = url;
}
public double getPriority() {
return priority;
}
/**
* 设置优先级,用于URL队列排序<br>
* 需扩展Scheduler<br>
* 目前还没有对应支持优先级的Scheduler实现 =。= <br>
* @param priority 优先级,越大则越靠前
* @return this
*/
public Request setPriority(double priority) {
this.priority = priority;
return this;
}
public Object getExtra(String key) {
if (extras == null) {
return null;
}
return extras.get(key);
}
public Request putExtra(String key, Object value) {
if (extras == null) {
extras = new HashMap<String, Object>();
}
extras.put(key, value);
return this;
}
/**
* 获取待抓取的url
*
* @return url 待抓取的url
*/
public String getUrl() {
return url;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Request request = (Request) o;
if (!url.equals(request.url)) return false;
return true;
}
public Map<String, Object> getExtras() {
return extras;
}
@Override
public int hashCode() {
return url.hashCode();
}
public void setExtras(Map<String, Object> extras) {
this.extras = extras;
}
public void setUrl(String url) {
this.url = url;
}
@Override
public String toString() {
return "Request{" +
"url='" + url + '\'' +
", extras=" + extras +
", priority=" + priority +
'}';
}
}