package it.sauronsoftware.feed4j;
import it.sauronsoftware.feed4j.bean.Feed;
import it.sauronsoftware.feed4j.bean.FeedHeader;
import it.sauronsoftware.feed4j.bean.FeedImage;
import it.sauronsoftware.feed4j.bean.FeedItem;
import it.sauronsoftware.feed4j.bean.RawElement;
import it.sauronsoftware.feed4j.bean.RawNode;
import it.sauronsoftware.feed4j.html.HTMLFragmentHelper;
import java.net.MalformedURLException;
import java.net.URL;
import java.text.ParseException;
import java.util.Iterator;
import org.dom4j.Document;
import org.dom4j.Element;
/**
* RSS 1.0 feed parser.
*
* @author Carlo Pelliccia
*/
class TypeRSS_1_0 extends TypeAbstract {
/**
* This method parses a dom4j Document representation assuming it is RSS 1.0
* feed.
*
* @param source
* The source URL for the feed.
* @param document
* The dom4j Document representation of the XML representing the
* feed.
* @return The Feed object representing the feed parsed contents.
*/
public static Feed feed(URL source, Document document) {
// Root element.
Element root = document.getRootElement();
// Return value.
Feed feed = new Feed();
// Start from the header.
FeedHeader header = new FeedHeader();
header.setURL(source);
// Search for the "channel" and the "item" elements.
Element channel = null;
for (Iterator i = root.elementIterator(); i.hasNext();) {
Element aux = (Element) i.next();
String nsuri = aux.getNamespaceURI();
if (nsuri.equals(Constants.RSS_1_0_NS_URI)) {
String name = aux.getName();
if (name.equals("item")) {
FeedItem item = handleItem(source, aux);
if (item != null) {
feed.addItem(item);
}
} else if (channel == null && name.equals("channel")) {
channel = aux;
}
}
}
// Channel?
if (channel != null) {
// From channel to header, raw.
populateRawElement(header, channel);
// Parse every raw element and build non-raw ones.
for (int i = 0; i < header.getNodeCount(); i++) {
RawNode node = header.getNode(i);
if (node instanceof RawElement) {
RawElement element = (RawElement) node;
String ensuri = element.getNamespaceURI();
String ename = element.getName();
String evalue = element.getValue();
if (evalue != null) {
// Textual element.
if (ensuri.equals(Constants.RSS_1_0_NS_URI)) {
if (ename.equals("title")) {
header.setTitle(evalue);
} else if (ename.equals("description")) {
header.setDescription(evalue);
} else if (ename.equals("link")) {
try {
header.setLink(new URL(evalue));
} catch (MalformedURLException e) {
;
}
}
} else if (ensuri.equals(Constants.DC_NS_URI)) {
if (evalue != null) {
if (ename.equals("date")) {
try {
header
.setPubDate(Constants.ISO_8601_DATE_FORMAT
.parse(evalue));
} catch (ParseException e) {
;
}
} else if (ename.equals("language")) {
if (isValidLanguageCode(evalue)) {
header.setLanguage(evalue);
}
}
}
}
} else {
if (ensuri.equals(Constants.RSS_1_0_NS_URI)) {
if (ename.equals("image")) {
FeedImage image = handleImage(element);
if (image != null) {
header.setImage(image);
}
}
}
}
}
}
}
// Remove from the header raw elements the handled image.
RawElement[] rawimages = header.getElements(Constants.RSS_1_0_NS_URI,
"image");
for (int i = 0; i < rawimages.length; i++) {
header.removeNode(rawimages[i]);
}
// Add the header
feed.setHeader(header);
// Well done!
return feed;
}
/**
* Items handler.
*/
private static FeedItem handleItem(URL source, Element el) {
FeedItem item = new FeedItem();
// Raw population.
populateRawElement(item, el);
// About -> GUID
String rssGuid = item.getAttributeValue(Constants.RDF_NS_URI, "about");
// Non-raw population.
for (int i = 0; i < item.getNodeCount(); i++) {
RawNode node = item.getNode(i);
if (node instanceof RawElement) {
RawElement element = (RawElement) node;
String ensuri = element.getNamespaceURI();
String ename = element.getName();
String evalue = element.getValue();
if (evalue != null) {
// Textual element
if (ensuri.equals(Constants.RSS_1_0_NS_URI)) {
if (ename.equals("title")) {
item.setTitle(evalue);
} else if (ename.equals("link")) {
try {
item.setLink(new URL(evalue));
} catch (MalformedURLException e) {
;
}
} else if (ename.equals("description")) {
item.setDescriptionAsText(evalue);
item.setDescriptionAsHTML(HTMLFragmentHelper
.fromTextPlainToHTML(evalue));
}
} else if (ensuri.equals(Constants.DC_NS_URI)) {
if (evalue != null) {
if (ename.equals("date")) {
try {
item
.setPubDate(Constants.ISO_8601_DATE_FORMAT
.parse(evalue));
} catch (ParseException e) {
;
}
}
}
}
}
}
}
// Valid?
if (item.getTitle() == null || item.getLink() == null) {
// No!
return null;
}
// GUID for the item.
if (rssGuid == null) {
rssGuid = item.getLink().toExternalForm();
}
item.setGUID(buildGUID(source.hashCode(), rssGuid.hashCode()));
// Well done!
return item;
}
/**
* Channel image handler.
*/
private static FeedImage handleImage(RawElement rawImage) {
FeedImage image = new FeedImage();
// Raw population.
populateRawElement(image, rawImage);
// Non-raw population.
String value = image.getElementValue(Constants.RSS_1_0_NS_URI, "url");
if (value != null) {
try {
image.setURL(new URL(value));
} catch (MalformedURLException e) {
;
}
}
value = image.getElementValue(Constants.RSS_1_0_NS_URI, "title");
if (value != null) {
image.setDescription(value);
}
// Valid?
if (image.getURL() == null) {
return null;
}
// Well done!
return image;
}
}