/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.app.sitemap;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLEncoder;
import java.sql.SQLException;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.ArrayUtils;
import org.apache.log4j.Logger;
import org.dspace.content.Collection;
import org.dspace.content.Community;
import org.dspace.content.Item;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.content.service.CollectionService;
import org.dspace.content.service.CommunityService;
import org.dspace.content.service.ItemService;
import org.dspace.core.Context;
import org.dspace.core.LogManager;
import org.dspace.services.ConfigurationService;
import org.dspace.services.factory.DSpaceServicesFactory;
/**
* Command-line utility for generating HTML and Sitemaps.org protocol Sitemaps.
*
* @author Robert Tansley
* @author Stuart Lewis
*/
public class GenerateSitemaps
{
/** Logger */
private static Logger log = Logger.getLogger(GenerateSitemaps.class);
private static final CommunityService communityService = ContentServiceFactory.getInstance().getCommunityService();
private static final CollectionService collectionService = ContentServiceFactory.getInstance().getCollectionService();
private static final ItemService itemService = ContentServiceFactory.getInstance().getItemService();
private static final ConfigurationService configurationService = DSpaceServicesFactory.getInstance().getConfigurationService();
public static void main(String[] args) throws Exception
{
final String usage = GenerateSitemaps.class.getCanonicalName();
CommandLineParser parser = new PosixParser();
HelpFormatter hf = new HelpFormatter();
Options options = new Options();
options.addOption("h", "help", false, "help");
options.addOption("s", "no_sitemaps", false,
"do not generate sitemaps.org protocol sitemap");
options.addOption("b", "no_htmlmap", false,
"do not generate a basic HTML sitemap");
options.addOption("a", "ping_all", false,
"ping configured search engines");
options
.addOption("p", "ping", true,
"ping specified search engine URL");
CommandLine line = null;
try
{
line = parser.parse(options, args);
}
catch (ParseException pe)
{
hf.printHelp(usage, options);
System.exit(1);
}
if (line.hasOption('h'))
{
hf.printHelp(usage, options);
System.exit(0);
}
if (line.getArgs().length != 0)
{
hf.printHelp(usage, options);
System.exit(1);
}
/*
* Sanity check -- if no sitemap generation or pinging to do, print
* usage
*/
if (line.getArgs().length != 0 || line.hasOption('b')
&& line.hasOption('s') && !line.hasOption('g')
&& !line.hasOption('m') && !line.hasOption('y')
&& !line.hasOption('p'))
{
System.err
.println("Nothing to do (no sitemap to generate, no search engines to ping)");
hf.printHelp(usage, options);
System.exit(1);
}
// Note the negation (CLI options indicate NOT to generate a sitemap)
if (!line.hasOption('b') || !line.hasOption('s'))
{
generateSitemaps(!line.hasOption('b'), !line.hasOption('s'));
}
if (line.hasOption('a'))
{
pingConfiguredSearchEngines();
}
if (line.hasOption('p'))
{
try
{
pingSearchEngine(line.getOptionValue('p'));
}
catch (MalformedURLException me)
{
System.err
.println("Bad search engine URL (include all except sitemap URL)");
System.exit(1);
}
}
System.exit(0);
}
/**
* Generate sitemap.org protocol and/or basic HTML sitemaps.
*
* @param makeHTMLMap
* if {@code true}, generate an HTML sitemap.
* @param makeSitemapOrg
* if {@code true}, generate an sitemap.org sitemap.
* @throws SQLException if database error
* if a database error occurs.
* @throws IOException if IO error
* if IO error occurs.
*/
public static void generateSitemaps(boolean makeHTMLMap,
boolean makeSitemapOrg) throws SQLException, IOException
{
String sitemapStem = configurationService.getProperty("dspace.url")
+ "/sitemap";
String htmlMapStem = configurationService.getProperty("dspace.url")
+ "/htmlmap";
String handleURLStem = configurationService.getProperty("dspace.url")
+ "/handle/";
File outputDir = new File(configurationService.getProperty("sitemap.dir"));
if (!outputDir.exists() && !outputDir.mkdir())
{
log.error("Unable to create output directory");
}
AbstractGenerator html = null;
AbstractGenerator sitemapsOrg = null;
if (makeHTMLMap)
{
html = new HTMLSitemapGenerator(outputDir, htmlMapStem + "?map=",
null);
}
if (makeSitemapOrg)
{
sitemapsOrg = new SitemapsOrgGenerator(outputDir, sitemapStem
+ "?map=", null);
}
Context c = new Context();
List<Community> comms = communityService.findAll(c);
for (Community comm : comms) {
String url = handleURLStem + comm.getHandle();
if (makeHTMLMap) {
html.addURL(url, null);
}
if (makeSitemapOrg) {
sitemapsOrg.addURL(url, null);
}
}
List<Collection> colls = collectionService.findAll(c);
for (Collection coll : colls) {
String url = handleURLStem + coll.getHandle();
if (makeHTMLMap) {
html.addURL(url, null);
}
if (makeSitemapOrg) {
sitemapsOrg.addURL(url, null);
}
}
Iterator<Item> allItems = itemService.findAll(c);
int itemCount = 0;
while (allItems.hasNext())
{
Item i = allItems.next();
String url = handleURLStem + i.getHandle();
Date lastMod = i.getLastModified();
if (makeHTMLMap)
{
html.addURL(url, lastMod);
}
if (makeSitemapOrg)
{
sitemapsOrg.addURL(url, lastMod);
}
itemCount++;
}
if (makeHTMLMap)
{
int files = html.finish();
log.info(LogManager.getHeader(c, "write_sitemap",
"type=html,num_files=" + files + ",communities="
+ comms.size() + ",collections=" + colls.size()
+ ",items=" + itemCount));
}
if (makeSitemapOrg)
{
int files = sitemapsOrg.finish();
log.info(LogManager.getHeader(c, "write_sitemap",
"type=html,num_files=" + files + ",communities="
+ comms.size() + ",collections=" + colls.size()
+ ",items=" + itemCount));
}
c.abort();
}
/**
* Ping all search engines configured in {@code dspace.cfg}.
*
* @throws UnsupportedEncodingException
* theoretically should never happen
*/
public static void pingConfiguredSearchEngines()
throws UnsupportedEncodingException
{
String[] engineURLs = configurationService
.getArrayProperty("sitemap.engineurls");
if (ArrayUtils.isEmpty(engineURLs))
{
log.warn("No search engine URLs configured to ping");
return;
}
for (int i = 0; i < engineURLs.length; i++)
{
try
{
pingSearchEngine(engineURLs[i]);
}
catch (MalformedURLException me)
{
log.warn("Bad search engine URL in configuration: "
+ engineURLs[i]);
}
}
}
/**
* Ping the given search engine.
*
* @param engineURL
* Search engine URL minus protocol etc, e.g.
* {@code www.google.com}
* @throws MalformedURLException
* if the passed in URL is malformed
* @throws UnsupportedEncodingException
* theoretically should never happen
*/
public static void pingSearchEngine(String engineURL)
throws MalformedURLException, UnsupportedEncodingException
{
// Set up HTTP proxy
if ((StringUtils.isNotBlank(configurationService.getProperty("http.proxy.host")))
&& (StringUtils.isNotBlank(configurationService.getProperty("http.proxy.port"))))
{
System.setProperty("proxySet", "true");
System.setProperty("proxyHost", configurationService
.getProperty("http.proxy.host"));
System.getProperty("proxyPort", configurationService
.getProperty("http.proxy.port"));
}
String sitemapURL = configurationService.getProperty("dspace.url")
+ "/sitemap";
URL url = new URL(engineURL + URLEncoder.encode(sitemapURL, "UTF-8"));
try
{
HttpURLConnection connection = (HttpURLConnection) url
.openConnection();
BufferedReader in = new BufferedReader(new InputStreamReader(
connection.getInputStream()));
String inputLine;
StringBuffer resp = new StringBuffer();
while ((inputLine = in.readLine()) != null)
{
resp.append(inputLine).append("\n");
}
in.close();
if (connection.getResponseCode() == 200)
{
log.info("Pinged " + url.toString() + " successfully");
}
else
{
log.warn("Error response pinging " + url.toString() + ":\n"
+ resp);
}
}
catch (IOException e)
{
log.warn("Error pinging " + url.toString(), e);
}
}
}