package it.sauronsoftware.feed4j;
import it.sauronsoftware.feed4j.bean.FeedEnclosure;
import it.sauronsoftware.feed4j.bean.Feed;
import it.sauronsoftware.feed4j.bean.FeedHeader;
import it.sauronsoftware.feed4j.bean.FeedImage;
import it.sauronsoftware.feed4j.bean.FeedItem;
import it.sauronsoftware.feed4j.bean.RawElement;
import it.sauronsoftware.feed4j.bean.RawNode;
import it.sauronsoftware.feed4j.html.HTMLFragmentHelper;
import it.sauronsoftware.feed4j.html.HTMLOptimizer;
import java.net.MalformedURLException;
import java.net.URL;
import java.text.ParseException;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.Namespace;
import org.dom4j.QName;
/**
* RSS 2.0 feed parser.
*
* @author Carlo Pelliccia
*/
class TypeRSS_2_0 extends TypeAbstract {
/**
* This method parses a dom4j Document representation assuming it is RSS 2.0
* feed.
*
* @param source
* The source URL for the feed.
* @param document
* The dom4j Document representation of the XML representing the
* feed.
* @return The Feed object representing the feed parsed contents.
*/
public static Feed feed(URL source, Document document) {
// Root element.
Element root = document.getRootElement();
// Root element namespace URI.
Namespace ns = root.getNamespace();
String nsuri = ns.getURI();
// The return value.
Feed feed = new Feed();
// Start from the header.
FeedHeader header = new FeedHeader();
header.setURL(source);
// Search for the "channel" element.
Element channel = root.element(new QName("channel", ns));
if (channel != null) {
// Header raw-population from "channel" element.
populateRawElement(header, channel);
// Search between the raw elements and build non-raw data.
for (int i = 0; i < header.getNodeCount(); i++) {
RawNode node = header.getNode(i);
if (node instanceof RawElement) {
RawElement element = (RawElement) node;
String ensuri = element.getNamespaceURI();
String ename = element.getName();
String evalue = element.getValue();
if (evalue != null) {
// Textual element.
if (ensuri.equals(nsuri)) {
if (ename.equals("title")) {
header.setTitle(evalue);
} else if (ename.equals("description")) {
header.setDescription(evalue);
} else if (ename.equals("link")) {
try {
header.setLink(new URL(evalue));
} catch (MalformedURLException e) {
;
}
} else if (ename.equals("pubDate")) {
try {
header
.setPubDate(Constants.RFC_822_DATE_FORMAT
.parse(evalue));
} catch (ParseException e) {
;
}
} else if (ename.equals("language")) {
if (isValidLanguageCode(evalue)) {
header.setLanguage(evalue);
}
}
}
} else {
// Non-textual element.
if (ensuri.equals(nsuri)) {
if (ename.equals("item")) {
// FeedItem!
FeedItem item = handleItem(source, element);
if (item != null) {
feed.addItem(item);
}
} else if (ename.equals("image")) {
// Channel image.
header.setImage(handleImage(element));
}
}
}
}
}
}
// Remove from the header every raw "item" element.
RawElement[] rawitems = header.getElements(nsuri, "item");
for (int i = 0; i < rawitems.length; i++) {
header.removeNode(rawitems[i]);
}
// Remove from the header every raw "image" element.
RawElement[] rawimages = header.getElements(nsuri, "image");
for (int i = 0; i < rawimages.length; i++) {
header.removeNode(rawimages[i]);
}
// Link the header.
feed.setHeader(header);
// Well done!
return feed;
}
/**
* Item parser.
*/
private static FeedItem handleItem(URL source, RawElement rawItem) {
// Namespace URI.
String nsuri = rawItem.getNamespaceURI();
// Build teh return value.
FeedItem item = new FeedItem();
// Raw population.
populateRawElement(item, rawItem);
// Non-raw population.
for (int i = 0; i < item.getNodeCount(); i++) {
RawNode node = item.getNode(i);
if (node instanceof RawElement) {
RawElement element = (RawElement) node;
String ensuri = element.getNamespaceURI();
String ename = element.getName();
String evalue = element.getValue();
if (evalue != null) {
// Textual element.
if (ensuri.equals(nsuri)) {
// In RSS namespace.
if (ename.equals("title")) {
item.setTitle(evalue);
} else if (ename.equals("link")) {
try {
item.setLink(new URL(evalue));
} catch (MalformedURLException e) {
;
}
} else if (ename.equals("description")) {
evalue = HTMLOptimizer.optimize(evalue);
if (evalue.length() > 0) {
item.setDescriptionAsHTML(evalue);
item.setDescriptionAsText(HTMLFragmentHelper
.fromHTMLtoTextPlain(evalue));
}
} else if (ename.equals("comments")) {
try {
item.setComments(new URL(evalue));
} catch (MalformedURLException e) {
;
}
} else if (ename.equals("guid")) {
String isPermaLink = element.getAttributeValue(
ensuri, "isPermaLink");
if (isPermaLink != null
&& isPermaLink.equals("true")) {
try {
item.setLink(new URL(evalue));
} catch (MalformedURLException e) {
;
}
}
item.setGUID(evalue);
}
}
} else {
if (ename.equals("enclosure") && ensuri.equals(nsuri)) {
FeedEnclosure enclosure = handleEnclosure(element);
if (enclosure != null) {
item.addEnclosure(enclosure);
}
}
}
}
}
// Valid?
if (item.getTitle() == null || item.getLink() == null) {
// No, return null.
return null;
}
// A GUID for the item.
String rssGuid = item.getGUID();
if (rssGuid == null) {
rssGuid = item.getLink().toExternalForm();
}
item.setGUID(buildGUID(source.hashCode(), rssGuid.hashCode()));
// Remove every "enclosure" element from the raw ones, since they have
// been handled.
RawElement[] enclosures = item.getElements(nsuri, "enclosure");
for (int i = 0; i < enclosures.length; i++) {
item.removeNode(enclosures[i]);
}
// Well done.
return item;
}
/**
* Attachments handler.
*/
private static FeedEnclosure handleEnclosure(RawElement rawEnclosure) {
// Namespace URI.
String nsuri = rawEnclosure.getNamespaceURI();
// Build the object.
FeedEnclosure enclosure = new FeedEnclosure();
// Raw population.
populateRawElement(enclosure, rawEnclosure);
// Non-raw population, starting from the URL.
String value = enclosure.getAttributeValue(nsuri, "url");
if (value != null) {
try {
enclosure.setURL(new URL(value));
} catch (MalformedURLException e) {
;
}
}
// MIME type.
value = enclosure.getAttributeValue(nsuri, "type");
if (value != null) {
enclosure.setMimeType(value);
}
// File size.
value = enclosure.getAttributeValue(nsuri, "length");
if (value != null) {
long length = -1;
try {
length = Long.parseLong(value);
} catch (NumberFormatException e) {
;
}
if (length > 0) {
enclosure.setLength(length);
}
}
// Solid?
if (enclosure.getURL() == null || enclosure.getMimeType() == null) {
return null;
}
// Well done!
return enclosure;
}
/**
* Channel image handler.
*/
private static FeedImage handleImage(RawElement rawImage) {
// Namespace URI.
String nsuri = rawImage.getNamespaceURI();
// Build the object.
FeedImage image = new FeedImage();
// Raw population.
populateRawElement(image, rawImage);
// Non-raw population.
String value = image.getElementValue(nsuri, "url");
if (value != null) {
try {
image.setURL(new URL(value));
} catch (MalformedURLException e) {
;
}
}
value = image.getElementValue(nsuri, "description");
if (value != null) {
image.setDescription(value);
}
value = image.getElementValue(nsuri, "width");
if (value != null) {
int intvalue = 0;
try {
intvalue = Integer.parseInt(value);
} catch (NumberFormatException e) {
;
}
if (intvalue > 0) {
image.setWidth(intvalue);
}
}
value = image.getElementValue(nsuri, "height");
if (value != null) {
int intvalue = 0;
try {
intvalue = Integer.parseInt(value);
} catch (NumberFormatException e) {
;
}
if (intvalue > 0) {
image.setHeight(intvalue);
}
}
// Solid?
if (image.getURL() == null) {
return null;
}
// Well done!
return image;
}
}