/* * SitemapsOrgGenerator.java * * Version: $Revision: 3733 $ * * Date: $Date: 2009-04-24 03:52:11 +0000 (Fri, 24 Apr 2009) $ * * Copyright (c) 2002-2009, The DSpace Foundation. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * - Neither the name of the DSpace Foundation nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. */ package org.dspace.app.sitemap; import java.io.File; import java.io.IOException; import java.io.PrintStream; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.Date; /** * Class for generating <a href="http://sitemaps.org/">Sitemaps</a> to improve * search engine coverage of the DSpace site and limit the server load caused by * crawlers. * * @author Robert Tansley * @author Stuart Lewis */ public class SitemapsOrgGenerator extends AbstractGenerator { /** Stem of URLs sitemaps will eventually appear at */ private String indexURLStem; /** Tail of URLs sitemaps will eventually appear at */ private String indexURLTail; /** The correct date format */ private DateFormat w3dtfFormat = new SimpleDateFormat( "yyyy-MM-dd'T'HH:mm:ss'Z'"); /** * Construct a sitemaps.org protocol sitemap generator, writing files to the * given directory, and with the sitemaps eventually exposed at starting * with the given URL stem and tail. * * @param outputDirIn * Directory to write sitemap files to * @param urlStem * start of URL that sitemap files will appear at, e.g. * {@code http://dspace.myu.edu/sitemap?sitemap=} * @param urlTail * end of URL that sitemap files will appear at, e.g. * {@code .html} or {@code null} */ public SitemapsOrgGenerator(File outputDirIn, String urlStem, String urlTail) { super(outputDirIn); indexURLStem = urlStem; indexURLTail = (urlTail == null ? "" : urlTail); } public String getFilename(int number) { return "sitemap" + number + ".xml.gz"; } public String getLeadingBoilerPlate() { return "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + "<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">"; } public int getMaxSize() { // 10 Mb return 10485760; } public int getMaxURLs() { return 50000; } public String getTrailingBoilerPlate() { return "</urlset>"; } public String getURLText(String url, Date lastMod) { StringBuffer urlText = new StringBuffer(); urlText.append("<url><loc>").append(url).append("</loc>"); if (lastMod != null) { urlText.append("<lastmod>").append(w3dtfFormat.format(lastMod)) .append("</lastmod>"); } urlText.append("</url>\n"); return urlText.toString(); } public boolean useCompression() { return true; } public String getIndexFilename() { return "sitemap_index.xml.gz"; } public void writeIndex(PrintStream output, int sitemapCount) throws IOException { String now = w3dtfFormat.format(new Date()); output.println("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"); output .println("<sitemapindex xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">"); for (int i = 0; i < sitemapCount; i++) { output.print("<sitemap><loc>" + indexURLStem + i + indexURLTail + "</loc>"); output.print("<lastmod>" + now + "</lastmod></sitemap>\n"); } output.println("</sitemapindex>"); } }