/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.app.sitemap;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.util.Date;
/**
* Class for generating HTML "sitemaps" which contain links to various pages in
* a DSpace site. This should improve search engine coverage of the DSpace site
* and limit the server load caused by crawlers.
*
* @author Robert Tansley
* @author Stuart Lewis
*/
public class HTMLSitemapGenerator extends AbstractGenerator
{
/** Stem of URLs sitemaps will eventually appear at */
private String indexURLStem;
/** Tail of URLs sitemaps will eventually appear at */
private String indexURLTail;
/**
* Construct an HTML sitemap generator, writing files to the given
* directory, and with the sitemaps eventually exposed at starting with the
* given URL stem and tail.
*
* @param outputDirIn
* Directory to write sitemap files to
* @param urlStem
* start of URL that sitemap files will appear at, e.g.
* {@code http://dspace.myu.edu/sitemap?sitemap=}
* @param urlTail
* end of URL that sitemap files will appear at, e.g.
* {@code .html} or {@code null}
*/
public HTMLSitemapGenerator(File outputDirIn, String urlStem, String urlTail)
{
super(outputDirIn);
indexURLStem = urlStem;
indexURLTail = (urlTail == null ? "" : urlTail);
}
public String getFilename(int number)
{
return "sitemap" + number + ".html";
}
public String getLeadingBoilerPlate()
{
return "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\n"
+ "<html><head><title>URL List</title></head><body><ul>";
}
public int getMaxSize()
{
// 50k
return 51200;
}
public int getMaxURLs()
{
return 1000;
}
public String getTrailingBoilerPlate()
{
return "</ul></body></html>\n";
}
public String getURLText(String url, Date lastMod)
{
StringBuffer urlText = new StringBuffer();
urlText.append("<li><a href=\"").append(url).append("\">").append(url)
.append("</a></li>\n");
return urlText.toString();
}
public boolean useCompression()
{
return false;
}
public String getIndexFilename()
{
return "sitemap_index.html";
}
public void writeIndex(PrintStream output, int sitemapCount)
throws IOException
{
output.println(getLeadingBoilerPlate());
for (int i = 0; i < sitemapCount; i++)
{
output.print("<li><a href=\"" + indexURLStem + i + indexURLTail
+ "\">sitemap " + i);
output.print("</a></li>\n");
}
output.println(getTrailingBoilerPlate());
}
}