package com.fpcms.common.webcrawler.htmlparser;
import java.net.MalformedURLException;
import java.net.URL;
import org.apache.commons.lang.StringUtils;
public class HtmlPage {
private String title;
private String keywords;
private String description;
private String content;
private Anchor anchor;
private String sourceLang;
private String tags;
public HtmlPage() {
}
public HtmlPage(String title, String content) {
super();
this.title = title;
this.content = content;
}
public Anchor getAnchor() {
return anchor;
}
public void setAnchor(Anchor anchor) {
this.anchor = anchor;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = StringUtils.trim(title);
}
public String getKeywords() {
return keywords;
}
public void setKeywords(String keywords) {
this.keywords = StringUtils.trim(keywords);
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = StringUtils.trim(description);
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = StringUtils.trim(content);
}
public String getSourceLang() {
return sourceLang;
}
public void setSourceLang(String sourceLang) {
this.sourceLang = sourceLang;
}
public String getTags() {
return tags;
}
public void setTags(String tags) {
this.tags = tags;
}
public static class Anchor {
private String href;
private String text;
private String title;
public Anchor() {
}
public Anchor(String href) {
super();
this.href = href;
}
public Anchor(String href, String text, String title) {
super();
this.href = href;
this.text = text;
this.title = title;
}
public String getHref() {
return href;
}
public void setHref(String href) {
this.href = StringUtils.trim(href);
}
public String getText() {
return text;
}
public void setText(String text) {
this.text = StringUtils.trim(text);
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = StringUtils.trim(title);
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + ((href == null) ? 0 : href.hashCode());
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
Anchor other = (Anchor) obj;
if (href == null) {
if (other.href != null)
return false;
} else if (!href.equals(other.href))
return false;
return true;
}
@Override
public String toString() {
String titleAttr = StringUtils.isBlank(title) ? "" : " title='"+title+"'";
return "<a href='"+href+"'"+titleAttr+">"+StringUtils.defaultString(text,href)+"</a>";
}
public static String toFullUrl(String baseUrl,String href) {
String result = toFullUrl0(baseUrl, href);
return deleteUrlParameter(result,";");
}
private static String deleteUrlParameter(String url,String seperator) {
int indexOf = url.indexOf(seperator);
if(indexOf >= 0) {
return url.substring(0,indexOf);
}
return url;
}
public static String removeQueryString(String fullHref) {
return deleteUrlParameter(fullHref,"?");
}
private static String toFullUrl0(String baseUrl, String href) {
if(href.matches("https?://.*")) {
return href;
}
if(href.startsWith("/")) {
String root = getRootBaseUrl(baseUrl);
return root + href;
}else {
if(baseUrl.endsWith("/")) {
return baseUrl + href;
}else {
return baseUrl + "/" + href;
}
}
}
public static String getRootBaseUrl(String baseUrl) {
if(StringUtils.isBlank(baseUrl)) {
return null;
}
try {
URL url = new URL(baseUrl);
String root = url.getProtocol()+"://"+url.getHost();
return root;
} catch (MalformedURLException e) {
throw new RuntimeException("MalformedURLException,url:"+baseUrl,e);
}
}
}
}