package it.sauronsoftware.feed4j; import it.sauronsoftware.feed4j.bean.FeedEnclosure; import it.sauronsoftware.feed4j.bean.Feed; import it.sauronsoftware.feed4j.bean.FeedHeader; import it.sauronsoftware.feed4j.bean.FeedImage; import it.sauronsoftware.feed4j.bean.FeedItem; import it.sauronsoftware.feed4j.bean.RawElement; import it.sauronsoftware.feed4j.bean.RawNode; import it.sauronsoftware.feed4j.html.HTMLFragmentHelper; import it.sauronsoftware.feed4j.html.HTMLOptimizer; import java.net.MalformedURLException; import java.net.URL; import java.text.ParseException; import org.dom4j.Document; import org.dom4j.Element; /** * Atom 1.0 feed parser. * * @author Carlo Pelliccia */ class TypeAtom_1_0 extends TypeAbstract { /** * This method parses a dom4j Document representation assuming it is an Atom * 1.0 feed. * * @param source * The source URL for the feed. * @param document * The dom4j Document representation of the XML representing the * feed. * @return The Feed object representing the feed parsed contents. */ public static Feed feed(URL source, Document document) { // Get the root element. Element root = document.getRootElement(); // Root element namespace URI. String nsuri = root.getNamespaceURI(); // Create the return value. Feed feed = new Feed(); // Build the Header object. FeedHeader header = new FeedHeader(); header.setURL(source); // Populate the header with raw elements. populateRawElement(header, root); // The feed language. String language = header.getAttributeValue(Constants.XML_NAMESPACE, "lang"); if (language != null && isValidLanguageCode(language)) { header.setLanguage(language); } // Other interesting informations... for (int i = 0; i < header.getNodeCount(); i++) { RawNode node = header.getNode(i); if (node instanceof RawElement) { RawElement element = (RawElement) node; String ensuri = element.getNamespaceURI(); String ename = element.getName(); if (ensuri.equals(nsuri)) { if (ename.equals("title")) { String title = getValueAsTextPlain(element); if (title != null) { header.setTitle(title); } } else if (ename.equals("link")) { URL link = handleLink(element); if (link != null) { header.setLink(link); } } else if (ename.equals("logo")) { String value = element.getValue(); if (value != null) { try { URL url = new URL(value); FeedImage image = new FeedImage(); image.setURL(url); header.setImage(image); } catch (MalformedURLException e) { ; } } } else if (ename.equals("modified")) { String modified = element.getValue(); if (modified != null) { try { header .setPubDate(Constants.ISO_8601_DATE_FORMAT .parse(modified)); } catch (ParseException e) { ; } } } else if (ename.equals("entry")) { FeedItem item = handleEntry(source, element); if (item != null) { feed.addItem(item); } } } } } // Removes from the header every "entry" raw element. RawElement[] rawentries = header.getElements(nsuri, "entry"); for (int i = 0; i < rawentries.length; i++) { header.removeNode(rawentries[i]); } // Add the header. feed.setHeader(header); // Weel done! return feed; } /** * From an Atom entry to a FeedItem object. */ private static FeedItem handleEntry(URL source, RawElement entryElement) { // Atom "entry" element namespace URI. String nsuri = entryElement.getNamespaceURI(); // Return value. FeedItem item = new FeedItem(); // Raw population. populateRawElement(item, entryElement); // Explore every node of the entry. String summary = null; String content = null; String id = null; for (int i = 0; i < item.getNodeCount(); i++) { RawNode node = item.getNode(i); if (node instanceof RawElement) { RawElement element = (RawElement) node; String ensuri = element.getNamespaceURI(); String ename = element.getName(); if (ensuri.equals(nsuri)) { if (ename.equals("title")) { String title = getValueAsTextPlain(element); if (title != null) { item.setTitle(title); } } else if (ename.equals("link")) { URL link = handleLink(element); if (link != null) { item.setLink(link); } else { FeedEnclosure enclosure = handleEnclosure(element); if (enclosure != null) { item.addEnclosure(enclosure); } } } else if (ename.equals("id")) { String aux = element.getValue(); if (aux != null) { id = aux; } } else if (ename.equals("summary")) { String aux = getValueAsHTML(element); if (aux != null) { summary = aux; } } else if (ename.equals("content")) { String aux = getValueAsHTML(element); if (aux != null) { content = aux; } } else if (ename.equals("issued")) { String modified = element.getValue(); if (modified != null) { try { item.setPubDate(Constants.ISO_8601_DATE_FORMAT .parse(modified)); } catch (ParseException e) { ; } } } else if (ename.equals("modified")) { String modified = element.getValue(); if (modified != null) { try { item.setPubDate(Constants.ISO_8601_DATE_FORMAT .parse(modified)); } catch (ParseException e) { ; } } } } } } // Is this item solid? if (item.getTitle() == null || item.getLink() == null) { // No, return null. return null; } // Work on the item description. if (summary == null) { summary = ""; } if (content == null) { content = ""; } String description = (content.length() > summary.length()) ? content : summary; if (description.length() > 0) { item.setDescriptionAsHTML(description); item.setDescriptionAsText(HTMLFragmentHelper .fromHTMLtoTextPlain(description)); } // GUID generation. if (id == null) { id = item.getLink().toExternalForm(); } item.setGUID(buildGUID(source.hashCode(), id.hashCode())); // Well done! return item; } /** * Atom link analyzer. */ private static URL handleLink(RawElement linkElement) { String nsuri = linkElement.getNamespaceURI(); // 1. Attribute rel="alternate" required. String rel = linkElement.getAttributeValue(nsuri, "rel"); if (rel != null && !rel.equalsIgnoreCase("alternate")) { return null; } // 2. Attribute href="..." required. String href = linkElement.getAttributeValue(nsuri, "href"); if (href == null || href.length() == 0) { return null; } // 3. Valid URL?. try { return new URL(href); } catch (MalformedURLException e) { return null; } } /** * Atom attachments analyzer. */ private static FeedEnclosure handleEnclosure(RawElement linkElement) { String nsuri = linkElement.getNamespaceURI(); // 1. Attribute rel="alternate" required. String rel = linkElement.getAttributeValue(nsuri, "rel"); if (rel == null || !rel.equalsIgnoreCase("enclosure")) { return null; } // 2. Attribute href="..." required. String href = linkElement.getAttributeValue(nsuri, "href"); if (href == null || href.length() == 0) { return null; } // 3. Valid URL?. URL url; try { url = new URL(href); } catch (MalformedURLException e) { return null; } // 4. MIME type? String type = linkElement.getAttributeValue(nsuri, "type"); if (type == null || (type = type.trim()).length() == 0) { return null; } // 5. File size? long length = -1; String lengthStr = linkElement.getAttributeValue(nsuri, "length"); if (lengthStr != null) { try { length = Long.parseLong(lengthStr); } catch (NumberFormatException e) { ; } } // 6. Title? String title = linkElement.getAttributeValue(nsuri, "title"); // Assembla e restituisce l'oggetto. FeedEnclosure enclosure = new FeedEnclosure(); enclosure.setURL(url); enclosure.setMimeType(type); if (length > 0) { enclosure.setLength(length); } if (title != null) { enclosure.setTitle(title); } return enclosure; } /** * Gets an element value as HTML. The element must contain a "type" Atom * attribute. */ private static String getValueAsHTML(RawElement element) { String type = element.getAttributeValue(element.getNamespaceURI(), "type"); if (type == null || type.length() == 0) { type = "text"; } if (type.equals("text")) { String aux = element.getValue(); if (aux != null) { aux = aux.trim(); if (aux.length() > 0) { return HTMLFragmentHelper.fromTextPlainToHTML(aux); } else { return null; } } else { return null; } } else if (type.equals("html")) { String aux = element.getValue(); if (aux != null) { aux = aux.trim(); if (aux.length() > 0) { aux = HTMLOptimizer.optimize(aux); if (aux.length() > 0) { return aux; } else { return null; } } else { return null; } } else { return null; } } else if (type.equals("xhtml")) { String aux = HTMLFragmentHelper.fromXHTMLtoHTML(element); if (aux.length() > 0) { aux = HTMLOptimizer.optimize(aux); if (aux.length() > 0) { return aux; } else { return null; } } else { return null; } } else { return null; } } /** * Gets an element value as plain text. The element must contain a "type" * Atom attribute. */ private static String getValueAsTextPlain(RawElement element) { String type = element.getAttributeValue(element.getNamespaceURI(), "type"); if (type == null || type.length() == 0) { type = "text"; } if (type.equals("text")) { String aux = element.getValue(); if (aux != null) { aux = aux.trim(); if (aux.length() > 0) { return aux; } else { return null; } } else { return null; } } else if (type.equals("html")) { String value = element.getValue(); if (value != null && value.length() > 0) { String aux = HTMLFragmentHelper.fromHTMLtoTextPlain(value); if (aux.length() > 0) { return aux; } else { return null; } } else { return null; } } else if (type.equals("xhtml")) { String aux = HTMLFragmentHelper.fromXHTMLtoTextPlain(element); if (aux.length() > 0) { return aux; } else { return null; } } else { return null; } } }