/**********************************************************************************
* $URL:https://source.sakaiproject.org/svn/osp/trunk/presentation/api-impl/src/java/org/theospi/portfolio/presentation/export/PresentationExport.java $
* $Id:PresentationExport.java 9134 2006-05-08 20:28:42Z chmaurer@iupui.edu $
***********************************************************************************
*
* Copyright (c) 2005, 2006, 2008, 2009 The Sakai Foundation
*
* Licensed under the Educational Community License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.opensource.org/licenses/ECL-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
**********************************************************************************/
package org.theospi.portfolio.presentation.export;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URL;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.StringTokenizer;
import java.util.zip.Adler32;
import java.util.zip.CheckedOutputStream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import websphinx.Access;
import websphinx.Crawler;
import websphinx.DownloadParameters;
import websphinx.Link;
import websphinx.LinkEvent;
import websphinx.LinkListener;
import websphinx.Page;
public class PresentationExport extends Crawler implements LinkListener {
protected final transient Log logger = LogFactory.getLog(getClass());
private PortfolioMirror mirror = null;
private String hostName = null;
private String webappName = null;
private String tempDirectory = null;
public static final int BUFFER = 1024 * 10;
private ArrayList errorLinks = new ArrayList();
private static SessionAccess access = new SessionAccess();
public PresentationExport(String url, String tempDirectory) throws IOException {
this.tempDirectory = tempDirectory;
Access.setAccess(access);
URL urlObj = new URL(url);
this.hostName = urlObj.getHost();
String path = urlObj.getPath();
StringTokenizer tok = new StringTokenizer(path, "/", false);
webappName = tok.nextToken();
if (!tok.hasMoreTokens()) {
webappName = "";
}
else {
webappName = "/" + webappName;
}
mirror = new PortfolioMirror(tempDirectory, webappName);
this.setRootHrefs(url);
this.setLinkType(Crawler.ALL_LINKS);
this.setSynchronous(true);
this.setDomain(Crawler.WEB);
this.addLinkListener(this);
DownloadParameters dp = getDownloadParameters();
dp = dp.changeMaxThreads(1);
setDownloadParameters(dp.changeMaxPageSize(2000));
}
public void createZip(OutputStream out) throws IOException {
File directory = new File(tempDirectory + webappName);
CheckedOutputStream checksum = null;
ZipOutputStream zos = null;
try{
checksum = new CheckedOutputStream(out, new Adler32());
zos = new ZipOutputStream(new BufferedOutputStream(checksum));
recurseDirectory("", directory, zos);
zos.finish();
zos.flush();
}
finally {
if (zos != null) {
try {
zos.close();
} catch (IOException e) {
}
}
if (checksum != null) {
try {
checksum.close();
} catch (IOException e) {
}
}
}
}
/**
* places a directory into the zip stream
* @param parentPath
* @param directory
* @param zos
* @throws IOException
*/
protected void recurseDirectory(String parentPath, File directory, ZipOutputStream zos) throws IOException {
// get all files... go through those
File[] files = directory.listFiles(new DirectoryFileFilter(false));
if(files == null)
throw new NullPointerException("recursing through a directory which is not a directory: " + parentPath + " ---- " + directory);
addFiles(zos, parentPath, files);
// get all directories... go through those...
File[] directories = directory.listFiles(new DirectoryFileFilter(true));
for (int i=0;i<directories.length;i++) {
recurseDirectory(parentPath + directories[i].getName() + "/",
directories[i], zos);
}
}
protected void addFiles(ZipOutputStream out, String parentPrefix,
File [] files) throws IOException {
BufferedInputStream origin = null;
byte data[] = new byte[BUFFER];
for (int i=0;i<files.length;i++) {
String fileName = URLDecoder.decode( parentPrefix + files[i].getName() );
logger.debug("Adding " + fileName);
InputStream in = null;
try {
in = new FileInputStream(files[i]);
if (in == null)
throw new NullPointerException();
origin = new BufferedInputStream(in, BUFFER);
if (fileName == null)
throw new NullPointerException();
ZipEntry entry = new ZipEntry(fileName);
out.putNextEntry(entry);
int count;
while ((count = origin.read(data, 0, BUFFER)) != -1) {
out.write(data, 0, count);
}
out.closeEntry();
} finally {
try {
if (origin != null) {
origin.close();
}
} catch (Exception e) {
logger.warn("Error cleaning up resource: ", e);
}
try {
in.close();
} catch (Exception e) {
logger.warn("Error cleaning up resource: ", e);
}
}
}
}
/**
* Start crawling. Returns either when the crawl is done, or
* when pause() or stop() is called. Because this method implements the
* java.lang.Runnable interface, a crawler can be run in the
* background thread.
*/
public void run() {
super.run();
// process error links
for (Iterator i=errorLinks.iterator();i.hasNext();) {
Link link = (Link)i.next();
visit(link.getPage());
}
}
public synchronized void visit(Page page) {
try {
mirror.writePage(page);
mirror.rewrite();
} catch (IOException e) {
logger.info("Error visiting link. Most likely broken link.", e);
}
logger.debug("visiting page");
super.visit(page);
}
public synchronized boolean shouldVisit(Link link) {
if (link.getMethod() == Link.POST) {
return false;
}
if (!link.getHost().equalsIgnoreCase(hostName)) {
return false;
}
// TODO maybe if (link.getURL().getFile().startsWith(webappName + "/showPublicPortfolio.do")) {
// return false;
//}
return true;
}
public void deleteTemp() {
File temp = new File(tempDirectory);
deleteContent(temp);
temp.delete();
}
protected void deleteContent(File directory) {
File[] files = directory.listFiles(new DirectoryFileFilter(false));
if (files != null) {
for (int i=0;i<files.length;i++) {
files[i].delete();
}
}
// get all directories... go through those...
File[] directories = directory.listFiles(new DirectoryFileFilter(true));
if (directories != null) {
for (int i=0;i<directories.length;i++) {
deleteContent(directories[i]);
directories[i].delete();
}
}
}
/**
* Notify that an event occured on a link.
*/
public void crawled(LinkEvent event) {
if (event.getID() == LinkEvent.ERROR) {
// switch to stream page link
if (!(event.getLink().getPage() instanceof StreamedPage)) {
logger.debug("loading file through streamed page.");
Link newLink = new Link(event.getLink().getURL());
newLink.setPage(new StreamedPage(event.getLink()));
addErrorLink(newLink);
}
else {
logger.error("Link error " + event.getLink().getURL().toExternalForm(),
event.getException());
}
}
else if (event.getID() == LinkEvent.QUEUED) {
if (event.getLink().getPage() instanceof StreamedPage) {
event.getLink().setStatus(LinkEvent.DOWNLOADED);
}
}
}
protected synchronized void addErrorLink(Link newLink) {
errorLinks.add(newLink);
}
/**
* Implements the FileFilter. it accepts the switch of whether to accept files or directories
*
*/
private static class DirectoryFileFilter implements FileFilter {
private boolean directories = false;
public DirectoryFileFilter(boolean directories) {
this.directories = directories;
}
/**
* Tests whether or not the specified abstract pathname should be
* included in a pathname list.
*
* @param pathname The abstract pathname to be tested
* @return <code>true</code> if and only if <code>pathname</code>
* should be included
*/
public boolean accept(File pathname) {
if (directories) {
return pathname.isDirectory();
}
else {
return pathname.isFile();
}
}
}
}