/** * Copyright (c)2010-2011 Enterprise Website Content Management System(EWCMS), All rights reserved. * EWCMS PROPRIETARY/CONFIDENTIAL. Use is subject to license terms. * http://www.ewcms.com */ package com.ewcms.plugin.crawler.model; import java.io.Serializable; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import javax.persistence.CascadeType; import javax.persistence.Column; import javax.persistence.Entity; import javax.persistence.EnumType; import javax.persistence.Enumerated; import javax.persistence.FetchType; import javax.persistence.GeneratedValue; import javax.persistence.GenerationType; import javax.persistence.Id; import javax.persistence.JoinColumn; import javax.persistence.OneToMany; import javax.persistence.OrderBy; import javax.persistence.SequenceGenerator; import javax.persistence.Table; import org.codehaus.jackson.annotate.JsonIgnore; /** * * 采集器信息 * * <ul> * <li>id:编号</li> * <li>name:名称</li> * <li>description:描述</li> * <li>status:状态(true:启用,false:停用)</li> * <li>maxCount:内容页最大采集数</li> * <li>depth:采集深度</li> * <li>threadCount:采集线程数</li> * <li>timeOutWait:超时等待时间</li> * <li>dateFormat:发布日期格式</li> * <li>downloadFile:下载内容中的文件</li> * <li>removeHref:移除内容中的链接</li> * <li>removeHtmlTag:移除内容中的HTML标签</li> * <li>isLocal:是否本地(true:只采集标题和URL)</li> * <li>channelId:采集到此频道</li> * <li>baseURI:域名地址</li> * <li>domains:URL层级对象集合</li> * <li>matchBlocks:匹配块对象集合</li> * <li>filterBlocks:过滤块对象集合</li> * <li>htmlType:页面类型</li> * <li>proxy:代理服务器</li> * <li>proxyHost:服务器地址</li> * <li>proxyPort:端口</li> * <li>proxyUserName:用户名</li> * <li>proxyPassWord:密码</li> * <li>encoding:页面编码格式</li> * <li>titleExternal:外部标题(true:自定义内容标题,false:使用html内容中的title标题)</li> * <li>titleRegex:标题表达式</li> * <li>type:采集类型</li> * <li>isImage:图片资源</li> * <li>isFlash:Flash资源</li> * <li>isVideo:视频资源</li> * <li>isAnnex:附件资源<li> * <li>annexType:附件类型</li> * <li>isKey:关键字</li> * <li>keys:关键词</li> * </ul> * * @author wuzhijun * */ @Entity @Table(name = "plugin_crawler_gather") @SequenceGenerator(name = "seq_plugin_crawler_gather", sequenceName = "seq_plugin_crawler_gather_id", allocationSize = 1) public class Gather implements Serializable { private static final long serialVersionUID = -6421132072889992004L; /** * 采集类型枚举 * @author wuzhijun */ public enum Type { CONTENT("内容"), RESOURCE("资源"); private String description; private Type(String description){ this.description = description; } public String getDescription(){ return description; } } @Id @GeneratedValue(generator = "seq_plugin_crawler_gather", strategy = GenerationType.SEQUENCE) @Column(name = "id") private Long id; @Column(name = "name", nullable = false, length = 100) private String name; @Column(name = "description", columnDefinition = "text") private String description; @Column(name = "status") private Boolean status; @Column(name = "max_page") private Long maxPage; @Column(name = "depth") private Integer depth; @Column(name = "threadcount") private Integer threadCount; @Column(name = "timeoutwait") private Integer timeOutWait; @Column(name = "downloadFile") private Boolean downloadFile; @Column(name = "removeHref") private Boolean removeHref; @Column(name = "removeHtmlTag") private Boolean removeHtmlTag; @Column(name = "islocal") private Boolean isLocal; @Column(name = "channel_id") private Integer channelId; @Column(name = "base_uri") private String baseURI; @OneToMany(cascade = CascadeType.ALL, fetch = FetchType.EAGER, targetEntity = Domain.class, orphanRemoval = true) @JoinColumn(name = "gather_id") @OrderBy(value = "level") private Set<Domain> domains = new HashSet<Domain>(); @OneToMany(cascade = CascadeType.ALL, fetch = FetchType.LAZY, targetEntity = MatchBlock.class, orphanRemoval = true) @JoinColumn(name = "gather_id") private List<MatchBlock> matchBlocks = new ArrayList<MatchBlock>(); @OneToMany(cascade = CascadeType.ALL, fetch = FetchType.LAZY, targetEntity = FilterBlock.class, orphanRemoval = true) @JoinColumn(name = "gather_id") private List<FilterBlock> filterBlocks = new ArrayList<FilterBlock>(); @Column(name = "html_type") private String htmlType; @Column(name = "proxy") private Boolean proxy; @Column(name = "proxy_host") private String proxyHost; @Column(name = "proxy_port") private Integer proxyPort; @Column(name = "proxy_username") private String proxyUserName; @Column(name = "proxy_password") private String proxyPassWord; @Column(name = "encoding") private String encoding; @Column(name = "title_external") private Boolean titleExternal; @Column(name = "title_regex") private String titleRegex; @Enumerated(EnumType.STRING) @Column(name = "type") private Type type; @Column(name = "isImage") private Boolean isImage; @Column(name = "isFlash") private Boolean isFlash; @Column(name = "isVideo") private Boolean isVideo; @Column(name = "isAnnex") private Boolean isAnnex; @Column(name = "annexType") private String annexType; @Column(name = "iskey") private Boolean isKey; @Column(name = "keys", columnDefinition = "text") private String keys; public Gather(){ maxPage = -1L; depth = -1; threadCount = 30; timeOutWait = 10; htmlType = "html"; proxy = false; downloadFile = false; removeHref = false; removeHtmlTag = false; encoding = "UTF-8"; titleExternal = false; type = Type.CONTENT; isImage = false; isFlash = false; isVideo = false; isAnnex = false; isLocal = false; } public Long getId() { return id; } public void setId(Long id) { this.id = id; } public String getName() { return name; } public void setName(String name) { this.name = name; } public String getDescription() { return description; } public void setDescription(String description) { this.description = description; } public Boolean getStatus() { return status; } public void setStatus(Boolean status) { this.status = status; } public Long getMaxPage() { return maxPage; } public void setMaxPage(Long maxPage) { this.maxPage = maxPage; } public Integer getDepth() { return depth; } public void setDepth(Integer depth) { this.depth = depth; } public Integer getThreadCount() { return threadCount; } public void setThreadCount(Integer threadCount) { this.threadCount = threadCount; } public Integer getTimeOutWait() { return timeOutWait; } public void setTimeOutWait(Integer timeOutWait) { this.timeOutWait = timeOutWait; } public Boolean getDownloadFile() { return downloadFile; } public void setDownloadFile(Boolean downloadFile) { this.downloadFile = downloadFile; } public Boolean getRemoveHref() { return removeHref; } public void setRemoveHref(Boolean removeHref) { this.removeHref = removeHref; } public Boolean getRemoveHtmlTag() { return removeHtmlTag; } public void setRemoveHtmlTag(Boolean removeHtmlTag) { this.removeHtmlTag = removeHtmlTag; } public Boolean getIsLocal() { return isLocal; } public void setIsLocal(Boolean isLocal) { this.isLocal = isLocal; } public Integer getChannelId() { return channelId; } public void setChannelId(Integer channelId) { this.channelId = channelId; } public String getBaseURI() { return baseURI; } public void setBaseURI(String baseURI) { this.baseURI = baseURI; } @JsonIgnore public Set<Domain> getDomains() { return domains; } public void setDomains(Set<Domain> domains) { this.domains = domains; } @JsonIgnore public List<MatchBlock> getMatchBlocks() { return matchBlocks; } public void setMatchBlocks(List<MatchBlock> matchBlocks) { this.matchBlocks = matchBlocks; } @JsonIgnore public List<FilterBlock> getFilterBlocks() { return filterBlocks; } public void setFilterBlocks(List<FilterBlock> filterBlocks) { this.filterBlocks = filterBlocks; } public String getHtmlType() { return htmlType; } public void setHtmlType(String htmlType) { this.htmlType = htmlType; } public Boolean getProxy() { return proxy; } public void setProxy(Boolean proxy) { this.proxy = proxy; } public String getProxyHost() { return proxyHost; } public void setProxyHost(String proxyHost) { this.proxyHost = proxyHost; } public Integer getProxyPort() { return proxyPort; } public void setProxyPort(Integer proxyPort) { this.proxyPort = proxyPort; } public String getProxyUserName() { return proxyUserName; } public void setProxyUserName(String proxyUserName) { this.proxyUserName = proxyUserName; } public String getProxyPassWord() { return proxyPassWord; } public void setProxyPassWord(String proxyPassWord) { this.proxyPassWord = proxyPassWord; } public String getEncoding() { return encoding; } public void setEncoding(String encoding) { this.encoding = encoding; } public Boolean getTitleExternal() { return titleExternal; } public void setTitleExternal(Boolean titleExternal) { this.titleExternal = titleExternal; } public String getTitleRegex() { return titleRegex; } public void setTitleRegex(String titleRegex) { this.titleRegex = titleRegex; } public Type getType() { return type; } public void setType(Type type) { this.type = type; } public Boolean getIsImage() { return isImage; } public void setIsImage(Boolean isImage) { this.isImage = isImage; } public Boolean getIsFlash() { return isFlash; } public void setIsFlash(Boolean isFlash) { this.isFlash = isFlash; } public Boolean getIsVideo() { return isVideo; } public void setIsVideo(Boolean isVideo) { this.isVideo = isVideo; } public Boolean getIsAnnex() { return isAnnex; } public void setIsAnnex(Boolean isAnnex) { this.isAnnex = isAnnex; } public String getAnnexType() { return annexType; } public void setAnnexType(String annexType) { this.annexType = annexType; } public Boolean getIsKey() { return isKey; } public void setIsKey(Boolean isKey) { this.isKey = isKey; } public String getKeys() { return keys; } public void setKeys(String keys) { this.keys = keys; } @Override public int hashCode() { final int prime = 31; int result = 1; result = prime * result + ((id == null) ? 0 : id.hashCode()); return result; } @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; Gather other = (Gather) obj; if (id == null) { if (other.id != null) return false; } else if (!id.equals(other.id)) return false; return true; } }