package feed.parser;
import java.text.ParseException;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlTransient;
import javax.xml.bind.annotation.XmlType;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.commons.lang3.StringUtils;
import org.caudexorigo.text.HtmlStripper;
import org.caudexorigo.time.RFC822;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.annotation.JsonPropertyOrder;
@JsonIgnoreProperties(ignoreUnknown = true)
@JsonPropertyOrder({ "link", "pubDate", "title", "body", "enclosure", "author", "categories", "guid" })
@XmlType(propOrder = { "link", "rssPubDate", "title", "body", "enclosure", "author", "categories", "guid" })
public class FeedEntry
{
private static final Pattern img = Pattern.compile(".*<img.*?src=\"(.*?)\".*?>.*");
private final Map<String, String> attr;
private Guid guid;
private Enclosure enclosure;
private Set<String> col_category;
private boolean is_clean_body_init = false;
private String cleanBody = null;
private boolean stripHtml;
public FeedEntry()
{
this(false);
}
public FeedEntry(boolean stripHtml)
{
super();
this.stripHtml = stripHtml;
attr = new HashMap<String, String>();
col_category = new HashSet<String>();
}
public void addCategory(String category)
{
if (StringUtils.isNotBlank(category))
{
col_category.add(category.trim());
}
}
@Override
public boolean equals(Object obj)
{
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
FeedEntry other = (FeedEntry) obj;
if (getLink() == null)
{
if (other.getLink() != null)
return false;
}
else if (!getLink().equals(other.getLink()))
return false;
return true;
}
@XmlElement(name = "author")
public String getAuthor()
{
return StringUtils.trimToNull(attr.get("author"));
}
@XmlElement(name = "description")
public String getBody()
{
if (stripHtml)
{
return getCleanBody();
}
else
{
return getRawBody();
}
}
@XmlElement(name = "category")
public Set<String> getCategories()
{
Set<String> c = new HashSet<String>();
c.addAll(col_category);
return c;
}
private String getCleanBody()
{
if (!is_clean_body_init)
{
cleanBody = StringUtils.trimToNull(HtmlStripper.strip(StringEscapeUtils.unescapeHtml4(getRawBody())));
is_clean_body_init = true;
}
return cleanBody;
}
@XmlElement(name = "enclosure")
public Enclosure getEnclosure()
{
if ((enclosure == null) && stripHtml && StringUtils.isNotBlank(getRawBody()))
{
Matcher m = img.matcher(getRawBody());
if (m.find())
{
String img_url = m.group(1);
String img_mtype = ImageMimeTable.getContentType(img_url);
if (StringUtils.isNotBlank(img_mtype))
{
Enclosure e = new Enclosure(1, img_mtype, img_url);
return e;
}
}
}
return enclosure;
}
@XmlElement(name = "guid")
public Guid getGuid()
{
if (guid == null)
{
return new Guid(getLink(), true);
}
else
{
return guid;
}
}
@XmlElement(name = "link")
public String getLink()
{
String link = p_getLink();
// if (link != null)
// {
// return UrlCodec.encodeUriComponent(link);
//
// }
// else
// {
// return link;
// }
return link;
}
@XmlTransient
public Date getPubDate()
{
if (StringUtils.isNotBlank(attr.get("pubdate")))
{
return DateParser.parse(attr.get("pubdate"));
}
return null;
}
@JsonIgnore
@XmlTransient
public String getRawBody()
{
if (StringUtils.isNotBlank(attr.get("content:encoded")))
{
return attr.get("content:encoded");
}
else if (StringUtils.isNotBlank(attr.get("body")))
{
return attr.get("body");
}
return null;
}
@JsonIgnore
@XmlElement(name = "pubDate")
public String getRssPubDate()
{
if (getPubDate() != null)
{
try
{
return RFC822.format(getPubDate());
}
catch (ParseException e)
{
return null;
}
}
return null;
}
@XmlElement(name = "title")
public String getTitle()
{
String cleanTitle = HtmlStripper.strip(StringEscapeUtils.unescapeHtml4(attr.get("title")));
return StringUtils.trimToNull(cleanTitle);
}
@Override
public int hashCode()
{
final int prime = 31;
int result = 1;
result = prime * result + ((getLink() == null) ? 0 : getLink().hashCode());
return result;
}
private String p_getLink()
{
if (StringUtils.isNotBlank(attr.get("origlink")))
{
return attr.get("origlink").trim();
}
else if (StringUtils.isNotBlank(attr.get("link")))
{
return attr.get("link").trim();
}
else if ((guid != null) && guid.isPermaLink() && StringUtils.isNotBlank(guid.getValue()))
{
return guid.getValue().trim();
}
return null;
}
public void setAttribute(String key, String value)
{
if (StringUtils.isNotBlank(key) && StringUtils.isNotBlank(value))
{
attr.put(key, value);
}
}
protected void setAuthor(String author)
{
if (StringUtils.isNotBlank(author))
{
attr.put("author", author);
}
}
protected void setBody(String body)
{
if (StringUtils.isNotBlank(body))
{
attr.put("body", body);
}
}
public void setCategories(Set<String> cat)
{
if (cat != null)
{
col_category = cat;
}
}
public void setEnclosure(Enclosure enclosure)
{
this.enclosure = enclosure;
}
public void setGuid(Guid guid)
{
if (guid != null)
{
this.guid = guid;
}
}
public void setLink(String link)
{
if (StringUtils.isNotBlank(link))
{
attr.put("link", link);
}
}
public void setPubDate(Date pubdate)
{
if (pubdate != null)
{
try
{
attr.put("pubdate", RFC822.format(pubdate));
}
catch (ParseException e)
{
throw new RuntimeException(e);
}
}
}
public void setTitle(String title)
{
if (StringUtils.isNotBlank(title))
{
attr.put("title", title);
}
}
protected void setStripHhtml(boolean stripHtml)
{
this.stripHtml = stripHtml;
}
@Override
public String toString()
{
return String.format("\n\tFeedEntry [author=%s, guid=%s, link=%s, categories%s, pubDate=%s, title=%s, body=%s, enclosure=%s]", getAuthor(), getGuid(), getLink(), getCategories(), getPubDate(), getTitle(), getBody(), getEnclosure());
}
}