/*
* Copyright 2009 Bernhard Grünewaldt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.codehaus.mojo.sitemapxml;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.zip.GZIPOutputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.maven.plugin.AbstractMojo;
import org.apache.maven.plugin.MojoExecutionException;
import org.apache.maven.plugin.MojoFailureException;
import org.apache.xerces.parsers.DOMParser;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
/**
* SitemapMojo will produce a sitemap.xml and a zipped version sitemap.xml.gz file following the standard
* http://www.sitemaps.org/protocol.php.
* <p>
* The parameters are:<br>
* <b>input</b> - The path to the site.xml file. Example: /home/foo/bar/site.xml<br>
* <b>domainbase</b> - The base for all relative urls from site.xml. Example: http://www.foo.bar/, will be the prefix
* for all relative URLs.<br>
* <b>target</b> - The target dir to put the sitemap files. Should be the base dir of the site. Example:
* /home/foo/bar/target/site/.<br>
* <p>
* Run From Eclipse with m2eclipse plugin <br>
* <b>GOAL</b>: <code>
* org.codehaus.mojo:sitemapxml-maven-plugin:generate
* </code>
* <p>
* <b>pom.xml</b>: <br>
* {@code <plugin> <groupId>org.codehaus.mojo</groupId> <artifactId>sitemapxml-maven-plugin</artifactId> <configuration>
* <input>$ basedir}/src/site/site.xml</input> <domainbase>http://www.foo.bar/</domainbase>
* <target>${project.reporting.outputDirectory}</target> </configuration> </plugin> * }
* <p>
* Required is only domainbase. The other parameters have default values. <br>
* Before running the plugin you should install it locally with "mvn install".
*
* Generates a sitemap.xml and a sitemap.xml.gz file inside of <b>target</b> dir.
*
* @author Bernhard Grünewaldt
* @goal generate
*
*/
public class SitemapMojo
extends AbstractMojo
{
/**
* Filename Parameter for site.xml
*
* @parameter expression="${basedir}/src/site/site.xml"
* @required
*/
private File input;
/**
* Filename for target sitemap.xml
*
* @parameter expression="${project.reporting.outputDirectory}"
* @required
*/
private File target;
/**
* Base domain for relative urls from site.xml. Should be "http://www.foo.bar/". Will be the prefix for all relative urls.
*
* @parameter
* @required
*/
private String domainbase;
/**
* Generating the sitemap.xml
*/
public void execute()
throws MojoExecutionException, MojoFailureException
{
Collection duplicateUrlCheck = new ArrayList();
if (target != null ) {
if (! target.isDirectory()) {
target.mkdirs();
}
target = new File( target.getAbsolutePath() + System.getProperty( "file.separator" ) + "sitemap.xml" );
}
try
{
if ( input == null || !input.exists() )
{
getLog().error( "Xml file does not exist: " + input );
throw new MojoExecutionException( "goal failed, input xml file does not exist" );
}
DOMParser parser = new DOMParser();
parser.parse( input.toString() );
Document doc = parser.getDocument();
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db;
try
{
db = dbf.newDocumentBuilder();
}
catch ( ParserConfigurationException e )
{
throw new MojoExecutionException( "ParserConfigurationException", e );
}
Document xmldoc = db.newDocument();
Element root = xmldoc.createElement( "urlset" );
root.setAttribute( "xmlns", "http://www.sitemaps.org/schemas/sitemap/0.9" );
NodeList nodes = doc.getElementsByTagName( "item" );
for ( int i = 0; i < nodes.getLength(); i++ )
{
NamedNodeMap nmap = nodes.item( i ).getAttributes();
Element url = xmldoc.createElement( "url" );
Element location = xmldoc.createElement( "loc" );
String nodeurl = nmap.getNamedItem( "href" ).getNodeValue().toString();
// If the url is absolute do not prepend the domainbase
if ( !( nodeurl.startsWith( "https://" ) || nodeurl.startsWith( "http://" ) || nodeurl.startsWith( "ftp://" ) ) )
{
nodeurl = domainbase + nodeurl;
} else {
// Do not add external URLS to sitemap!
break;
}
location.appendChild( xmldoc.createTextNode( nodeurl ) );
Element lastmod = xmldoc.createElement( "lastmod" );
SimpleDateFormat df = new SimpleDateFormat( "yyyy-MM-dd" );
lastmod.appendChild( xmldoc.createTextNode( df.format( new Date() ) ) );
// Element changefreq = xmldoc.createElement("changefreq");
// changefreq.appendChild( xmldoc.createTextNode("monthly") );
// Element priority = xmldoc.createElement("priority");
// priority.appendChild( xmldoc.createTextNode("0.8") );
url.appendChild( location );
url.appendChild( lastmod );
// url.appendChild( changefreq );
// url.appendChild( priority );
if (! duplicateUrlCheck.contains( nodeurl )) {
root.appendChild( url );
duplicateUrlCheck.add( nodeurl );
}
}
xmldoc.appendChild( root );
TransformerFactory tf = TransformerFactory.newInstance();
Transformer transformer = null;
try
{
transformer = tf.newTransformer();
}
catch ( TransformerConfigurationException e )
{
throw new MojoExecutionException( "goal failed", e );
}
DOMSource source = new DOMSource( xmldoc );
StreamResult result = new StreamResult( target );
try
{
transformer.transform( source, result );
}
catch ( TransformerException e )
{
throw new MojoExecutionException( "goal failed", e );
}
// now gzip the file
gzipFile( target );
}
catch ( MalformedURLException e )
{
throw new MojoExecutionException( "Invalid file URL to site.xml", e );
}
catch ( IOException e )
{
throw new MojoExecutionException( "goal failed", e );
}
catch ( SAXException e )
{
throw new MojoExecutionException( "goal failed", e );
}
}
/**
* This method generates a gzipped file of the source file. <br>
* The gzipped file is named sourceFilename.gz.<br>
* It will be generated in the same directory.
*
* @param source File that should get gzipped
*/
private void gzipFile( File source )
throws MojoExecutionException
{
File gzipfile = new File( source.getAbsoluteFile() + ".gz" );
GZIPOutputStream gzipoutputstream = null;
int bufferSize = 8192;
byte[] buffer = new byte[bufferSize];
// Create GZIP Stream
try
{
FileOutputStream outputstream = new FileOutputStream( gzipfile );
gzipoutputstream = new GZIPOutputStream( outputstream );
}
catch ( IOException e )
{
throw new MojoExecutionException( "goal failed", e );
}
// GZIP the file
try
{
FileInputStream inputstream = new FileInputStream( source );
int length;
while ( ( length = inputstream.read( buffer, 0, bufferSize ) ) != -1 )
{
gzipoutputstream.write( buffer, 0, length );
}
inputstream.close();
}
catch ( IOException e )
{
throw new MojoExecutionException( "goal failed", e );
}
// Close Stream
try
{
gzipoutputstream.close();
}
catch ( IOException e )
{
throw new MojoExecutionException( "goal failed", e );
}
}
}