/* * This is eMonocot, a global online biodiversity information resource. * * Copyright © 2011–2015 The Board of Trustees of the Royal Botanic Gardens, Kew and The University of Oxford * * eMonocot is free software: you can redistribute it and/or modify it under the terms of the * GNU Affero General Public License as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * eMonocot is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * The complete text of the GNU Affero General Public License is in the source repository as the file * ‘COPYING’. It is also available from <http://www.gnu.org/licenses/>. */ package org.emonocot.job.sitemap; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.List; import org.apache.commons.io.FileUtils; import org.emonocot.model.marshall.xml.StaxEventItemWriter; import org.joda.time.ReadableInstant; import org.joda.time.format.ISODateTimeFormat; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.batch.core.ChunkListener; import org.springframework.batch.core.ExitStatus; import org.springframework.batch.core.StepExecution; import org.springframework.batch.core.StepExecutionListener; import org.springframework.batch.item.ExecutionContext; import org.springframework.core.io.FileSystemResource; import org.springframework.core.io.Resource; /** * @author jk00kg * */ public class SitemapFilesListener implements StepExecutionListener, ChunkListener { private static Logger logger = LoggerFactory.getLogger(SitemapFilesListener.class); /** * The maximum size allowed by the sitemap protocol is 10MB */ private static long MAX_SITEMAP_LENGTH = 9 * 1024 * 1024; /** * The maximum number of url elements in a single sitemap file */ private static int MAX_URL_COUNT = 9000; private StepExecution currentStep; private List<Url> sitemapNames = new ArrayList<Url>(); private String portalBaseUrl; private String sitemapSpoolDir; private StaxEventItemWriter staxWriter; /** * Initialized in beforeStep */ private int fileCount; /** * Initialized in beforeStep */ private FileSystemResource currentFile; private int chunkOfFile = 0; private int commitSize = 1000; /** * Used to hold a set of URLs written as the last step of the job */ private ExecutionContext jobExContext; private String sitemapDir; /** * @param portalBaseUrl the portalBaseUrl to set */ public void setPortalBaseUrl(String portalBaseUrl) { this.portalBaseUrl = portalBaseUrl; } public void setSitemapDir(String sitemapDir) { this.sitemapDir = sitemapDir; } /** * @return the sitemapNames */ public List<Url> getSitemapNames() { return sitemapNames; } /** * @param sitemapSpoolDir the sitemapSpoolDir to set */ public void setSitemapSpoolDir(String sitemapSpoolDir) { this.sitemapSpoolDir = sitemapSpoolDir; } /** * @param staxWriter the staxWriter to set */ public void setStaxWriter(StaxEventItemWriter staxWriter) { this.staxWriter = staxWriter; } /* (non-Javadoc) * @see org.springframework.batch.core.StepExecutionListener#beforeStep(org.springframework.batch.core.StepExecution) */ public void beforeStep(StepExecution stepExecution) { currentStep = stepExecution; logger.debug("currentStep set to: " + currentStep.getStepName()); if (jobExContext == null){ jobExContext = currentStep.getJobExecution().getExecutionContext(); jobExContext.put("sitemaps.url", sitemapNames); } fileCount = 0; currentFile = new FileSystemResource(sitemapSpoolDir + "/"+ currentStep.getStepName() + fileCount + ".xml"); //Set here because it can change at the end of a chunk staxWriter.setResource((Resource) currentFile); } /* (non-Javadoc) * @see org.springframework.batch.core.StepExecutionListener#afterStep(org.springframework.batch.core.StepExecution) */ public ExitStatus afterStep(StepExecution stepExecution) { logger.debug("After Step " + currentStep.getStepName()); try { Url u = new Url(); u.setLastmod(ISODateTimeFormat.dateTime().print((ReadableInstant) null)); u.setLoc(new URL(portalBaseUrl +"/" + sitemapDir + "/" + currentFile.getFilename())); sitemapNames.add(u); } catch (MalformedURLException e) { logger.error("Unable create Url for sitemap", e); } //reset counts to nulls to support beforeStep() currentStep = null; currentFile = null; chunkOfFile = 0; commitSize = 0; return stepExecution.getExitStatus(); } public void beforeChunk() { //Check sizes (MB & count) & if over limit if (FileUtils.sizeOf(currentFile.getFile()) >= MAX_SITEMAP_LENGTH || (chunkOfFile * commitSize) >= MAX_URL_COUNT){ logger.debug("Creating a new file"); try { Url u = new Url(); u.setLastmod(ISODateTimeFormat.dateTime().print((ReadableInstant) null)); u.setLoc(new URL(portalBaseUrl + "/sitemap/" + currentFile.getFilename())); sitemapNames.add(u); } catch (MalformedURLException e) { logger.error("Unable create Url for sitemap", e); } //close & open writer with new name staxWriter.close(); currentFile = new FileSystemResource(sitemapSpoolDir + "/"+ currentStep.getStepName() + ++fileCount + ".xml"); logger.debug("Open:" + currentFile.isOpen()); logger.debug("Writable:" + currentFile.isWritable()); staxWriter.setResource((Resource) currentFile); staxWriter.open(currentStep.getExecutionContext()); chunkOfFile = 0; } } public void afterChunk() { logger.debug("End Chunk " + currentStep.getCommitCount()); chunkOfFile++; if (currentStep.getCommitCount() == 1) { commitSize = currentStep.getReadCount() + currentStep.getReadSkipCount(); logger.debug("Set commitSize to " + commitSize); } } }