/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * * University Of Edinburgh (EDINA) * Scotland * * * File Name : RSSv2FeedDetector.java * Author : gwaller * Approver : Gareth Waller * * Notes : * * *~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * HISTORY * ------- * * $LastChangedRevision$ * $LastChangedDate$ * $LastChangedBy$ */ package uk.ac.jorum.packager.detector; import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.StringReader; import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.HttpException; import org.apache.commons.httpclient.HttpStatus; import org.apache.commons.httpclient.methods.GetMethod; import org.apache.commons.httpclient.params.HttpMethodParams; import org.apache.log4j.Logger; import org.dspace.content.Bitstream; import org.dspace.content.BitstreamFormat; import org.dspace.content.Bundle; import org.dspace.content.packager.PackageIngester; import org.dspace.core.Constants; import org.dspace.core.Context; import org.jdom.Attribute; import org.jdom.Document; import org.jdom.Element; import org.jdom.Namespace; import org.jdom.input.SAXBuilder; import org.jdom.output.Format; import org.jdom.output.XMLOutputter; import uk.ac.jorum.dspace.utils.BundleUtils; import uk.ac.jorum.packager.RSSv2FeedIngester; import uk.ac.jorum.submit.step.PackageDetectorStep; import uk.ac.jorum.utils.ExceptionLogger; /** * @author gwaller * */ public class RSSv2FeedDetector extends BasePackageDetector { private static Logger log = Logger.getLogger(RSSv2FeedDetector.class); public RSSv2FeedDetector(Bitstream b){ this.setBitstream(b); } public RSSv2FeedDetector(){ super(); } /* (non-Javadoc) * @see uk.ac.jorum.packager.detector.BasePackageDetector#ingesterClass() */ @Override public Class<? extends PackageIngester> ingesterClass() { return RSSv2FeedIngester.class; } public static Document getRssDocument(Bitstream rssUrlBitstream){ try{ return getRssDocument(rssUrlBitstream.getbContext(), rssUrlBitstream.retrieve(), rssUrlBitstream.getBundles()[0]); }catch (Exception e){ ExceptionLogger.logException(log, e); } return null; } public static Document getRssDocument(Context context, InputStream is, Bundle feedBundle){ GetMethod method = null; BufferedReader in = null; try{ // Download the feed // url is the bitstream contents in = new BufferedReader(new InputStreamReader(is)); String url = in.readLine(); // need to replace the feed prefix with the HTTP protocol ie replace feed:// with http:// url = url.replaceAll(PackageDetectorStep.FEED_PREFIX, "http://"); HttpClient client = new HttpClient(); // Create a method instance. method = new GetMethod(url); // Provide custom retry handler is necessary method.getParams() .setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler(3, false)); // Execute the method. int statusCode = client.executeMethod(method); if (statusCode != HttpStatus.SC_OK) { throw new Exception("Method failed: " + method.getStatusLine()); } // Read the response body. byte[] responseBody = method.getResponseBody(); // Store the response in a bitstream so that the ingester can use it later if (feedBundle != null){ BitstreamFormat bs_format = BitstreamFormat.findByShortDescription(context, "Text"); BundleUtils.setBitstreamFromBytes(feedBundle, Constants.FEED_BUNDLE_CONTENTS_NAME, bs_format, responseBody, false); } // Now see if we have a RSS v2.0 document SAXBuilder builder = new SAXBuilder(false); Document xmlDoc = builder.build(new ByteArrayInputStream(responseBody)); //XMLOutputter outputPretty = new XMLOutputter(Format.getPrettyFormat()); //log.debug("Got RSS DOCUMENT:"); //log.debug(outputPretty.outputString(xmlDoc)); return xmlDoc; } catch (Exception e) { ExceptionLogger.logException(log, e); } finally { // Release the connection. method.releaseConnection(); try{in.close();} catch (Exception e){} } return null; } /* (non-Javadoc) * @see uk.ac.jorum.packager.detector.BasePackageDetector#isValidPackage() */ @Override public boolean isValidPackage() { boolean result = false; boolean isUrl = false; try{ // Download the feed // url is the bitstream contents // Check that the bitstream belongs in the FEED_BUNDLE - if it isn't then we may be looking at raw content ie not a link to a feed! Bundle[] bundles = this.getBitstream().getBundles(); for (Bundle b:bundles){ if (b.getName().equals(Constants.FEED_BUNDLE)){ isUrl = true; break; } } if (isUrl){ Document xmlDoc = getRssDocument(this.getBitstream()); Element root = xmlDoc.getRootElement(); Attribute version = root.getAttribute("version"); if (root.getName().equals("rss") && version != null && version.getValue().equals("2.0")){ result = true; } } } catch (Exception e) { ExceptionLogger.logException(log, e); } return result; } }